From 694ff37d3c1d832074df68cc4787b48a699480a3 Mon Sep 17 00:00:00 2001 From: zhayongliang Date: Thu, 24 Mar 2022 10:17:40 +0800 Subject: [PATCH 1/2] add xlm script --- PyTorch/built-in/nlp/XLM_ID0740_for_PyTorch/test/env.sh | 1 - .../built-in/nlp/XLM_ID0740_for_PyTorch/tokenize_en_zh.sh | 5 +---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/PyTorch/built-in/nlp/XLM_ID0740_for_PyTorch/test/env.sh b/PyTorch/built-in/nlp/XLM_ID0740_for_PyTorch/test/env.sh index bc171a7524..560102bd3d 100644 --- a/PyTorch/built-in/nlp/XLM_ID0740_for_PyTorch/test/env.sh +++ b/PyTorch/built-in/nlp/XLM_ID0740_for_PyTorch/test/env.sh @@ -42,7 +42,6 @@ export COMBINED_ENABLE=1 export HCCL_WHITELIST_DISABLE=1 export SCALAR_TO_HOST_MEM=1 -export TRI_COMBINED_ENABLE=1 #设置Device侧日志等级为error ${install_path}/driver/tools/msnpureport -d 0 -g error diff --git a/PyTorch/built-in/nlp/XLM_ID0740_for_PyTorch/tokenize_en_zh.sh b/PyTorch/built-in/nlp/XLM_ID0740_for_PyTorch/tokenize_en_zh.sh index 2fadc5b72c..2a654b937c 100644 --- a/PyTorch/built-in/nlp/XLM_ID0740_for_PyTorch/tokenize_en_zh.sh +++ b/PyTorch/built-in/nlp/XLM_ID0740_for_PyTorch/tokenize_en_zh.sh @@ -14,14 +14,11 @@ cat data/wiki/txt/zh.valid | ./tools/tokenize.sh zh > data/wiki/txt/token_zh.val shuf -r -n 10000000 data/wiki/txt/token_en.train >> $OUTPATH/bpe.train.en shuf -r -n 10000000 data/wiki/txt/token_zh.train >> $OUTPATH/bpe.train.zh -$FASTBPE learnbpe 50000 $OUTPATH/bpe.train.en > $OUTPATH/codes +$FASTBPE learnbpe 50000 $OUTPATH/bpe.train.en $OUTPATH/bpe.train.zh > $OUTPATH/codes $FASTBPE applybpe $OUTPATH/train.en data/wiki/txt/token_en.train $OUTPATH/codes $FASTBPE applybpe $OUTPATH/test.en data/wiki/txt/token_en.test $OUTPATH/codes $FASTBPE applybpe $OUTPATH/valid.en data/wiki/txt/token_en.valid $OUTPATH/codes -rm -rf $OUTPATH/codes - -$FASTBPE learnbpe 50000 $OUTPATH/bpe.train.zh > $OUTPATH/codes $FASTBPE applybpe $OUTPATH/train.zh data/wiki/txt/token_zh.train $OUTPATH/codes $FASTBPE applybpe $OUTPATH/test.zh data/wiki/txt/token_zh.test $OUTPATH/codes $FASTBPE applybpe $OUTPATH/valid.zh data/wiki/txt/token_zh.valid $OUTPATH/codes -- Gitee From cdc3a092f207c5e2df301430e97277f5ac706d9d Mon Sep 17 00:00:00 2001 From: zhayongliang Date: Fri, 25 Mar 2022 10:44:31 +0800 Subject: [PATCH 2/2] add xlm script --- .../nlp/XLM_ID0740_for_PyTorch/tokenize_en_zh.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/PyTorch/built-in/nlp/XLM_ID0740_for_PyTorch/tokenize_en_zh.sh b/PyTorch/built-in/nlp/XLM_ID0740_for_PyTorch/tokenize_en_zh.sh index 2a654b937c..9bbf29c23a 100644 --- a/PyTorch/built-in/nlp/XLM_ID0740_for_PyTorch/tokenize_en_zh.sh +++ b/PyTorch/built-in/nlp/XLM_ID0740_for_PyTorch/tokenize_en_zh.sh @@ -25,10 +25,10 @@ $FASTBPE applybpe $OUTPATH/valid.zh data/wiki/txt/token_zh.valid $OUTPATH/codes $FASTBPE getvocab $OUTPATH/train.en $OUTPATH/train.zh > $OUTPATH/vocab -python3.7 preprocess.py $OUTPATH/vocab $OUTPATH/train.en.pth -python3.7 preprocess.py $OUTPATH/vocab $OUTPATH/test.en.pth -python3.7 preprocess.py $OUTPATH/vocab $OUTPATH/valid.en.pth +python3.7 preprocess.py $OUTPATH/vocab $OUTPATH/train.en +python3.7 preprocess.py $OUTPATH/vocab $OUTPATH/test.en +python3.7 preprocess.py $OUTPATH/vocab $OUTPATH/valid.en -python3.7 preprocess.py $OUTPATH/vocab $OUTPATH/train.zh.pth -python3.7 preprocess.py $OUTPATH/vocab $OUTPATH/test.zh.pth -python3.7 preprocess.py $OUTPATH/vocab $OUTPATH/valid.zh.pth +python3.7 preprocess.py $OUTPATH/vocab $OUTPATH/train.zh +python3.7 preprocess.py $OUTPATH/vocab $OUTPATH/test.zh +python3.7 preprocess.py $OUTPATH/vocab $OUTPATH/valid.zh -- Gitee