From addbde3ffc894f00367744d6a0e9bce774cd0e7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E5=A7=9C=E5=A5=94?= Date: Thu, 9 Jun 2022 16:13:31 +0800 Subject: [PATCH 01/14] init ci-pipeline --- .gitignore | 2 + AcessScan/README.md | 157 ++++++++++++++++ AcessScan/access_upline.py | 374 +++++++++++++++++++++++++++++++++++++ AcessScan/link_list.txt | 1 + AcessScan/run_upline.sh | 41 ++++ AcessScan/startRun.sh | 311 ++++++++++++++++++++++++++++++ README.en.md | 36 ---- README.md | 37 +--- 8 files changed, 887 insertions(+), 72 deletions(-) create mode 100644 .gitignore create mode 100644 AcessScan/README.md create mode 100644 AcessScan/access_upline.py create mode 100644 AcessScan/link_list.txt create mode 100644 AcessScan/run_upline.sh create mode 100644 AcessScan/startRun.sh delete mode 100644 README.en.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000..13bbe5b911 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.idea/ + diff --git a/AcessScan/README.md b/AcessScan/README.md new file mode 100644 index 0000000000..c7216b9b2a --- /dev/null +++ b/AcessScan/README.md @@ -0,0 +1,157 @@ +- [AcessScan门禁扫描工具使用说明](#AcessScan门禁扫描工具使用说明.md) + +- 门禁扫描checklist: + + 1、license扫描:.py/.cpp文件中需要添加license,若未添加,结果返回失败; + + 2、垃圾文件扫描:若后缀为.so/.log/.h5/.event/.log/.pbtxt/.zip/.tar/.tar.gz/.swp/.ipynp/.pyc及文件名称中含有.ckpt,则视为垃圾文件,结果返回失败; + + 3、首层目录必要文件扫描:首层目录下必须存在LINCENSE,README.md,modelzoo_level.txt 三个文件,否则,结果返回失败;不可以存在00-access目录,否则,结果返回失 + + 败;(注:README.md可以命名为其他名称,如README.*/Readme.*/ReadMe.*/readme.*,*代表任意字符) + + 4、大文件扫描:若文件大小超过2M,则视为大文件,结果返回失败; + + 5、内部链接扫描:文件中不可存在蓝驱无法访问的地址,如http://3ms.huawei.com,否则结果返回失败;(README.md不在扫描范围内) + + 6、敏感信息扫描:如工号,文件中的目录内容包含:wx+6/7/8/9位数字或00+6/7/8位数字的组合(扫描范围:所有文件); 如IP:文件中不可存在IP,如: + + http://10.137.54.150/data,否则,结果返回失败(README.md不在扫描范围内); + + 7、网络功能、性能、精度扫描:网络首层目录下必须配置modelzoo_level.txt文件,且文件内容包含三个关键字段:FuncStatus(功能是否OK,值可填写OK/NOK); + + PerfStatus(性能是否OK或达到极致PERFECT,值可填OK/NOK/PERFECT);PrecisionStatus(精度是否OK,值可填写OK/NOK);若网络功能、性能、精度均通过,内容格式如 + + 下所示: + + +``` + FuncStatus:OK + PerfStatus:OK + PrecisionStatus:OK +``` + 注:“:”两侧无需空格,英文格式; + + 校验规则: + + a、FuncStatus:OK,PerfStatus:OK,PrecisionStatus:OK,表示网络功能、性能、精度均通过,网络代码必须放置在Official领域目录下; + + b、FuncStatus:OK,PerfStatus:PERFENCT,PrecisionStatus:OK,表示网络功能、精度均通过、性能达到极致,网络代码必须放置在Benchmark领域目录下; + + c、FuncStatus:OK,PerfStatus:NOK,PrecisionStatus:OK,表示网络功能通过、性能不通过、精度通过,网络代码必须放置在Research领域目录下; + + d、FuncStatus:OK,PerfStatus:OK,PrecisionStatus:NOK,表示网络功能通过、性能通过、精度不通过,网络代码必须放置在Research领域目录下; + + e、FuncStatus:OK,PerfStatus:NOK,PrecisionStatus:NOK,表示网络功能通过、性能不通过、精度不通过,网络代码必须放置在Research领域目录下; + + f、FuncStatus:NOK,表示网络功能不通过,网络代码不允许放置主仓内; + + +- 代码结构: + + +``` + ├── run_upline.sh //开始扫描执行脚本 + ├── access_upline.py //实现门禁扫描规则的代码脚本 + ├── link_list.txt //进行内部链接扫描时,内部链接关键字的配置脚本 +``` + + +- 重要参数: + + - run_upline.sh重要参数如下: + + --id_dir //pr_filelist.txt 及modelzoo代码存放文件存放的路径 + + - access_upline.py重要参数如下: + + --pr_filelist_dir //需要上传仓上的所有文件名称及其路径(build-in开始的路径,例如:built-in/MindSpore/Benchmark/cv/detection/Mask_R_CNN_for_MindSpore/eval.py) + + --linklisttxt //配置文件link_list.txt所在路径 + + --FileSizeLimit //配置大文件的大小,默认为2 + +- 操作步骤:(以AlexNet_for_TensorFlow为例) + + 1、获取代码:run_upline.sh、access_upline.py、link_list.txt脚本,下载脚本放置同一目录下,例如:/home 目录; + + +``` + ├── /home + ├──├──run_upline.sh + ├──├──access_upline.py + ├──├──link_list.txt +``` + + + 2、数据准备: + + a、将需提交pr的代码放置在id_dir(可自定义),例如:/home: + + + ├── /home + ├──├──run_upline.sh + ├──├──access_upline.py + ├──├──link_list.txt + ├──├──modelzoo/build-in/... .../AlexNet_for_TensorFlow + + + + b、pr_filelist.txt配置文件准备,pr_filelist.txt内容应该为AlexNet_for_TensorFlow目录下所有文件的路径如下所示: + + built-in/TensorFlow/Official/cv/image_classification/AlexNet_for_TensorFlow/train.py + built-in/TensorFlow/Official/cv/image_classification/AlexNet_for_TensorFlow/README.md + built-in/TensorFlow/Official/cv/image_classification/AlexNet_for_TensorFlow/scripts/train_alexnet_1p.sh + built-in/TensorFlow/Official/cv/image_classification/AlexNet_for_TensorFlow/alexnet/alexnet.py + ...依次同理 + + 注意:路径是从build-in开始,且只需配置需要上传仓的文件,确保文件真实存在对应的路径下; + + c、将pr_filelist.txt文件放置在modelzoo/目录同级,如下: + + + ├── /home + ├──├──run_upline.sh + ├──├──access_upline.py + ├──├──link_list.txt + ├──├──modelzoo/build-in/... .../AlexNet_for_TensorFlow + ├──├──pr_filelist.txt + + + 3、开始扫描 + + a、配置run_upline.sh脚本,将id_dir配置为实际路径,即/home id_dir='/home' + + b、执行如下指令: bash run_upline.sh + + 4、扫描结果分析 + + a、若check success,则表示扫描通过,如下所示: + + +``` + =================Start to Check License ================= + =================Start to Check Size of File ================= + =================Start to Check funk file ================= + =================Start to Check file of First Directory ================= + =================Start to Check Internal Link ================= + =================Start to Check Sensitive Information ================= + =================Start to Check modelzoo level ================= + check success +``` + + + b、若结果返回 check fail,则表示失败,如下所示,失败原因请查看wiki门禁校验规则。 + + +``` + =================Start to Check License ================= + =================Start to Check Size of File ================= + =================Start to Check funk file ================= + =================Start to Check file of First Directory ================= + =================Start to Check Internal Link ================= + =================Start to Check Sensitive Information ================= + =================Start to Check modelzoo level ================= + PerfStatus is not OK or PERFECT or PrecisionStatus is not OK ,You should put the code under the Research directory! + check fail +``` diff --git a/AcessScan/access_upline.py b/AcessScan/access_upline.py new file mode 100644 index 0000000000..b191ce03c0 --- /dev/null +++ b/AcessScan/access_upline.py @@ -0,0 +1,374 @@ +import sys +import os +import os.path +import filecmp +import argparse +import shutil +import gzip +import math +from pathlib import Path +import chardet +import re + + +def init_args(): + parser = argparse.ArgumentParser() + parser.add_argument('--model_dir', type=str, default="./AlexNet_for_TensorFlow", + help='model dirrectory of the project') + parser.add_argument('--pr_filelist_dir', type=str, default="./pr_filelist0.txt", + help='model dirrectory of the pr_filelist') + parser.add_argument('--linklisttxt', type=str, default='./link_list.txt', + help='model dirrectory of the link_list') + parser.add_argument('--FileSizeLimit', type=int, default=2, + help='model size of FileSizeLimit') + return parser.parse_args() + + +def model_str_name(model_dir): + ''' + 功能:将路径转换为字符串 + ''' + model_dir_str = str(model_dir) + return model_dir_str + + +def file_size_check(path_pr_list, FileSizeLimit, fram_str, modelzoo_dir, dot=2, ): + ''' + :param model_dir : 网络目录 + :param FileSizeLimit : 文件限制大小 + 实现功能:扫描文件大小,小于2MB + ''' + filesize_check = 0 + # path = fram_str + dict1 = {} + + with open(path_pr_list, 'r') as fooc: + # 读取pr_filelist.txt的内容 + for model_dir in fooc: + ''' + 判断并处理三种类型的文件: LICENSE,py文件,其他文件 + ''' + model_dir = model_dir.strip('\n') + # 获取模型框架路径字符串 + model_dir_str = model_str_name(model_dir) + # 拼装路径 + model_dir2 = fram_str + modelzoo_dir + '/' + model_dir_str + model_dir1 = Path(model_dir2) + if model_dir1.exists(): + # 获取model_dir目录下所有文件 + pathTmp = str(model_dir1) + # print(pathTmp) + filesize = os.path.getsize(pathTmp) # 如果是文件,获取文件大小 + # 转换单位为兆 + filesize1 = str(round(filesize / math.pow(1024, 2), dot)) + # print('{} 文件大小为:{}'.format(filename, filesize)) + dict1[model_dir_str] = filesize1 # 将文件大小添加到字典 + else: + print('{},The file is not exist!'.format(model_dir)) + filesize_check = 0 + for key, value in dict1.items(): + if float(value) >= FileSizeLimit: + print('{},size of file is {}M and greater than {}M,please check it!'.format(key, value, FileSizeLimit)) + filesize_check = 1 + else: + continue + print('filesize_check=%d' % filesize_check) + + +def file_scan(path_pr_list, fram_str, modelzoo_dir): + with open(path_pr_list, 'r') as fooa: + # 读取pr_filelist.txt的内容 + license_check = 0 + for model_dir in fooa: + ''' + 判断并处理三种类型的文件: LICENSE,py文件,其他文件 + ''' + # 去除换行符 + model_dir = model_dir.strip('\n') + # 获取模型框架路径字符串 + model_dir_str = model_str_name(model_dir) + # # 处理LICENSE文件 + # # 判断LICENSE文件中是否存在关键字LICENSE/license + # # 判断.py/.cpp文件中是否存在关键字LICENSE/license + a = model_dir_str[-3:] + b = model_dir_str[-7:] + c = model_dir_str[-4:] + z = model_dir_str[-11:] + if (b == 'LICENSE') or (a == '.py') or (c == '.cpp'): + # 判断LICENSE文件中是否存在关键字LICENSE/license + if z == '__init__.py': + continue + else: + a, b, c, d, e, f = 'LICENSE', 'license', 'License', 'Licence', 'licence', 'LICENCE' + model_dir1 = fram_str + modelzoo_dir + '/' + model_dir_str + model_dir = Path(model_dir1) + if model_dir.exists(): + with open(str(model_dir), 'r') as foob: + content = foob.read() + if (a in content) or (b in content) or (c in content) or (d in content) or ( + e in content) or (f in content): + continue + else: + # model_name = os.path.basename(model_dir) + license_check = 1 + print('{},The keyword license no exists in the file,please check it!'.format( + model_dir_str)) + foob.close() + else: + print('{},The file is not exist!'.format(model_dir_str)) + license_check = 0 + print('license_check=%d' % license_check) + if license_check == 1: + print('License check failed, Please follow the guide to add License:') + print('https://gitee.com/ascend/modelzoo/blob/master/contrib/CONTRIBUTING.md') + + +def get_model_fram(pr_filelist0_str): + ''' + 功能:获取网络框架名称路径 + :param pr_filelist0_dir: + :return: + ''' + if '_TensorFlow' in pr_filelist0_str: + tf_str = '_TensorFlow' + a = pr_filelist0_str.index(tf_str) + b = a + 11 + model_fram = pr_filelist0_str[:b] + elif '_PyTorch' in pr_filelist0_str: + pt_str = '_PyTorch' + a = pr_filelist0_str.index(pt_str) + b = a + 8 + model_fram = pr_filelist0_str[:b] + elif '_MindSpore' in pr_filelist0_str: + ms_str = '_MindSpore' + a = pr_filelist0_str.index(ms_str) + b = a + 10 + model_fram = pr_filelist0_str[:b] + else: + model_fram = '' + return model_fram + + +def check_firstlevel_file(path_pr_list, fram_str, modelzoo_dir): + ''' + :param pr_filelist0_str: 网络路径 + :return: + 功能:检查首层目录是否存在README.md LICENSE文件 + ''' + with open(path_pr_list, 'r') as fooa: + for filepath_inprlist in fooa: + filepath_inprlist = filepath_inprlist.strip('\n') + pr_filelist0_str = filepath_inprlist + firstlevel_check = 0 + firstlevel_check1 = 0 + firstlevel_check3 = 0 + firstlevel_check4 = 0 + model_fram = get_model_fram(pr_filelist0_str) + if model_fram == '': # 网络名称不规范处理 + firstlevel_check4 = 5 + else: # 网络名称符合规范处理 + # 线上实际网络代码路径 + fram_path1 = fram_str + modelzoo_dir + '/' + get_model_fram(pr_filelist0_str) + fram_path = fram_path1 + # 判断文件是否存在 + filepath_inprlist = Path(fram_path) + if filepath_inprlist.exists(): + # 获取首层目录下所有文件名称 + # if os.path.isdir(fram_path): + # b, c ='README.md', 'LICENSE' + a, b, c, d = 'README', 'readme', 'LICENSE', 'Readme' + filelist = os.listdir(fram_path) + with open('first_filename.txt', 'w') as file: + file.write(str(filelist)) + file.close() + with open('first_filename.txt', 'r') as file1: + content1 = file1.read() + if (a in content1) or (b in content1) or (d in content1): + # if (b in filelist) and (c in filelist): + firstlevel_check = 0 + if (a not in content1) and (d not in content1) and (b not in content1): + firstlevel_check1 = 2 + # if b not in filelist: + # firstlevel_check2 = 3 + if c not in filelist: + firstlevel_check3 = 4 + file1.close() + else: + print('{},The file is not exist!'.format(filepath_inprlist)) + firstlevel_check = 0 + if firstlevel_check1 == 2: + print('{},{} is not exist,please check and add it!'.format(get_model_fram(pr_filelist0_str), a)) + firstlevel_check = 1 + # if firstlevel_check2 == 3: + # print('{},{} is not exist,please check and add it!'.format(get_model_fram(pr_filelist0_str), b)) + # firstlevel_check = 1 + if firstlevel_check3 == 4: + print('{},{} is not exist,please check and add it!'.format(get_model_fram(pr_filelist0_str), c)) + firstlevel_check = 1 + if firstlevel_check4 == 5: + # print('{},The network name of file is not standard,please check name of modelzoo!'.format(pr_filelist0_str)) + firstlevel_check = 0 + print('firstlevel_check=%d' % firstlevel_check) + + +def funk_file(path_pr_list, fram_str, modelzoo_dir): + ''' + 功能:检测当前路径下所有垃圾文件 + 参数:pr_filelist0_str:字符串化后的路径 + ''' + with open(path_pr_list, 'r') as fooa: + funkfile_check = 0 + for filepath_inprlist in fooa: + filepath_inprlist = filepath_inprlist.strip('\n') + pr_filelist0_str1 = fram_str + modelzoo_dir + '/' + filepath_inprlist + pr_filelist0_str = Path(pr_filelist0_str1) + if pr_filelist0_str.exists(): + # pr_filelist0_str = filepath_inprlist + funk_file_typr = ['.log', '.pbtxt', '.pb', '.h5', '.so', '.zip', '.tar', '.event', '.tar.gz', '.swp'] + # 获取文件名 + file_name = os.path.basename(str(pr_filelist0_str)) + # 获取文件后缀名 + file_suffix = os.path.splitext(file_name)[1] + if file_suffix in funk_file_typr: + funkfile_check = 1 + print('{}, The file is Junk file, please check it !'.format(filepath_inprlist)) + if '.ckpt' in str(pr_filelist0_str): + funkfile_check = 1 + print('{}, The file is Junk file, please check it !'.format(filepath_inprlist)) + else: + funkfile_check = 0 + print('{},The file is not exist!'.format(filepath_inprlist)) + print('funkfile_check=%d' % funkfile_check) + + +def check_link(path_pr_list, fram_str, modelzoo_dir, onelink): + ''' + 功能:检测文件内部是否包含内部链接 + fram_file:文件所在路径 + alink: 一条字符串化的链接 + ''' + with open(path_pr_list, 'r') as fooa: + internal_link_check = 0 + for filepath_inprlist in fooa: + filepath_inprlist = filepath_inprlist.strip('\n') + pr_filelist0_str1 = fram_str + modelzoo_dir + '/' + filepath_inprlist + # 将路径名称字符串化 + file_name = model_str_name(pr_filelist0_str1) + a = 'README' + b = 'readme' + c = 'Readme' + d = 'ReadMe' + if (a not in file_name) and (b not in file_name) and (c not in file_name) and (d not in file_name): + pr_filelist0_str = Path(pr_filelist0_str1) + if pr_filelist0_str.exists(): + with open(str(pr_filelist0_str), 'r', errors='ignore') as foo: + for words in foo: + if onelink in words: + link = onelink[0:] + internal_link_check = 1 + print('{},This is an internal links that includes {},please check it!'.format( + filepath_inprlist, link)) + else: + continue + foo.close() + else: + internal_link_check = 0 + print('{},The file is not exist!'.format(filepath_inprlist)) + print('internal_link_check=%d' % internal_link_check) + + +def check_Sensitive_content(path_pr_list, fram_str, modelzoo_dir): + with open(path_pr_list, 'r') as fooa: + sensitive_check = 0 + for fram_file_dir in fooa: + # 去除换行操作 + fram_file_dir = fram_file_dir.strip('\n') + # pr中文件绝对路径 + pr_filelist0_str1 = fram_str + modelzoo_dir + '/' + fram_file_dir + # 判断文件是否存在 + pr_filelist0_str = Path(pr_filelist0_str1) + if pr_filelist0_str.exists(): + # 如果文件存在,打开文件 + with open(str(pr_filelist0_str), 'r', errors='ignore') as foo: + for words in foo: + if ('0.00' not in words) and ('0.' not in words): + if re.findall(r'(00[\d]{5}|00[\d]{6}|00[\d]{7}|00[\d]{8}|00[\d]{9}|00[\d]{10})', words) or \ + re.findall(r'([ ]00[\d]{5}|00[\d]{6}|00[\d]{7}|00[\d]{8}|00[\d]{9}|00[\d]{10})', + words) or \ + re.findall(r'([ ]00[\d]{5}|00[\d]{6}|00[\d]{7}|00[\d]{8}|00[\d]{9}|00[\d]{10}[ ])', + words) or \ + re.findall(r'(.[ ]00[\d]{5}|00[\d]{6}|00[\d]{7}|00[\d]{8}|00[\d]{9}|00[\d]{10})', + words) or \ + re.findall(r'(.[ ]00[\d]{5}|00[\d]{6}|00[\d]{7}|00[\d]{8}|00[\d]{9}|00[\d]{10}.)', + words) or \ + re.findall(r'(.[ ]00[\d]{5}|00[\d]{6}|00[\d]{7}|00[\d]{8}|00[\d]{9}|00[\d]{10}[ ])', + words) or \ + re.findall( + r'(.[ ]00[\d]{5}|00[\d]{6}|00[\d]{7}|00[\d]{8}|00[\d]{9}|00[\d]{10}[ ].)', + words) or \ + re.findall(r'(wx\d{6}|wx\d{7}|wx\d{8}|wx\d{9}|wx\d{10}|wx\d{11})', words) or \ + re.findall(r'([ ]wx\d{6}|wx\d{7}|wx\d{8}|wx\d{9}|wx\d{10}|wx\d{11})', words) or \ + re.findall(r'([ ]wx\d{6}|wx\d{7}|wx\d{8}|wx\d{9}|wx\d{10}|wx\d{11}[ ])', words) or \ + re.findall(r'(.[ ]wx\d{6}|wx\d{7}|wx\d{8}|wx\d{9}|wx\d{10}|wx\d{11})', words) or \ + re.findall(r'(.[ ]wwx\d{6}|wx\d{7}|wx\d{8}|wx\d{9}|wx\d{10}|wx\d{11}.)', words) or \ + re.findall(r'(.[ ]wx\d{6}|wx\d{7}|wx\d{8}|wx\d{9}|wx\d{10}|wx\d{11}[ ])', words) or \ + re.findall(r'(.[ ]wx\d{6}|wx\d{7}|wx\d{8}|wx\d{9}|wx\d{10}|wx\d{11}[ ].)', words): + print( + '{}, There may be a job number in the file, please check the line that is: {}'.format( + fram_file_dir, words)) + sensitive_check = 1 + foo.close() + with open(str(pr_filelist0_str), 'r', errors='ignore') as fooc: + for words in fooc: + if re.findall( + r'http://\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b', + words) or \ + re.findall( + r'https://\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b', + words): + print( + '{}, There may be an ip address in the file, please check the line that is:{}'.format( + fram_file_dir, words)) + sensitive_check = 1 + fooc.close() + else: + sensitive_check = 0 + print('{},The file is not exist!'.format(sensitive_check)) + print('sensitive_check=%d' % sensitive_check) + + +def main(): + args = init_args() + path_pr_list = args.pr_filelist_dir + alink = args.linklisttxt + # print(path_pr_list) + tf_str = 'pr_filelist.txt' + a = path_pr_list.index(tf_str) + fram_str = path_pr_list[:a] + modelzoo_dir = 'modelzoo' + FileSizeLimit = args.FileSizeLimit + print('=================Start to Check License =================') + file_scan(path_pr_list, fram_str, modelzoo_dir) + ''' + :param model_dir : 网络目录 + :param FileSizeLimit : 文件限制大小 + 实现功能:扫描文件大小,小于2MB + ''' + print('=================Start to Check Size of File =================') + file_size_check(path_pr_list, FileSizeLimit, fram_str, modelzoo_dir, dot=2) + print('=================Start to Check funk file =================') + funk_file(path_pr_list, fram_str, modelzoo_dir) + # 层级目录检查 + print('=================Start to Check file of First Directory =================') + # TODO 检查正确的模型根目录 + # check_firstlevel_file(path_pr_list, fram_str, modelzoo_dir) + print('=================Start to Check Internal Link =================') + with open(alink, 'r') as food: + for onelink in food: + check_link(path_pr_list, fram_str, modelzoo_dir, onelink) + print('=================Start to Check Sensitive Information =================') + check_Sensitive_content(path_pr_list, fram_str, modelzoo_dir) + + +if __name__ == '__main__': + main() diff --git a/AcessScan/link_list.txt b/AcessScan/link_list.txt new file mode 100644 index 0000000000..b3c936f912 --- /dev/null +++ b/AcessScan/link_list.txt @@ -0,0 +1 @@ +huawei.com \ No newline at end of file diff --git a/AcessScan/run_upline.sh b/AcessScan/run_upline.sh new file mode 100644 index 0000000000..e33c33bba5 --- /dev/null +++ b/AcessScan/run_upline.sh @@ -0,0 +1,41 @@ +#!/bin/bash +id_dir='/home/jenkins/share-data/gitee/ascend/modelzoo/code/compile/20_bak' +file_log='log1/py_train.log' +dir_log='log1' +if [ -d $dir_log ]; +then + if [ -f $file_log ]; + then + rm -f $file_log + fi +else + mkdir $dir_log +fi +#=========功能列表============= +#1、py/cpp文件中license检查 +#2、垃圾文件检查,后缀为so/log/h5/event +#3、首层目录必要文件检查:LINCENSE,README.md,requirements.txt +#4、文件大小检查,不超过2M +#5、内部链接扫描 +#6、敏感信息扫描,如wx/00开头的工号 +#7、网络功能、性能、精度扫描; +#py功能实现 +python3 access_upline.py --pr_filelist_dir=$id_dir/pr_filelist.txt >$file_log 2>&1 +#结果呈现 +license_check=`grep -ri "license_check=1" ${file_log} | wc -l` +filesize_check=`grep -ri "filesize_check=1" ${file_log} | wc -l` +firstlevel_check=`grep -ri "firstlevel_check=1" ${file_log} | wc -l` +funkfile_check=`grep -ri "funkfile_check=1" ${file_log} | wc -l` +internal_link_check=`grep -ri "internal_link_check=1" ${file_log} | wc -l` +sensitive_check=`grep -ri "sensitive_check=1" ${file_log} | wc -l` +modelzoo_level_check=`grep -ri "modelzoo_level_check=1" ${file_log} | wc -l` +#echo "========== $sensitive_check" +cat $file_log | grep -v "check=1" | grep -v "check=0" +if [[ $license_check -ge 1 || $filesize_check -ge 1 || $firstlevel_check -ge 1 || $funkfile_check -ge 1 || $internal_link_check -ge 1 || $sensitive_check -ge 1 || modelzoo_level_check -ge 1 ]]; +then + echo "check fail" + exit 1 +else + echo "check success" +fi + diff --git a/AcessScan/startRun.sh b/AcessScan/startRun.sh new file mode 100644 index 0000000000..4e1ffaebd2 --- /dev/null +++ b/AcessScan/startRun.sh @@ -0,0 +1,311 @@ +#!/bin/bash + +echo "####################################################################" +echo "# Start Modelzoo Network Test.... " +echo "####################################################################" + +top_dir=`pwd` + +hostname="worker-121-36-69-71" +config_dir=/root/lava_workspace/$hostname +log_dir=/root/lava_workspace/$hostname/log +test_dir=/root/lava_workspace/$hostname/git +modelzoo_dir=$1/modelzoo +ascend310_ip="183.129.213.69" +ascend910_ip="218.2.129.25" + +if [ -f $top_dir/result.xml ] +then + #echo "clear $top_dir/result.xml" + rm -rf $top_dir/result.xml +fi + +if [ -f $top_dir/result.txt ] +then + #echo "clear $top_dir/result.txt" + rm -rf $top_dir/result.txt +fi + +if [ -f $top_dir/result.bak ] +then + #echo "clear $top_dir/result.bak" + rm -rf $top_dir/result.bak +fi + +if [ -d $top_dir/log ] +then + #echo "clear $top_dir/log/*" + rm -rf $top_dir/log/* +fi + +echo "=================Modified files in this PR: =================" +cat $1/pr_filelist.txt + + + +#如果PR只涉及到.MD文件的修改,则无需执行用例,直接返回OK +if [[ `grep -ciE ".MD|.txt|.doc|.docx|LICENSE" "$1/pr_filelist.txt"` -ne '0' && `grep -cE ".py|.sh|.cpp" "$1/pr_filelist.txt"` -eq '0' ]] ;then + echo "Only .MD|.txt|.doc|.docx|LICENSE in pr_filelist, No need to run testcases!" + exit 0 +fi +#=========功能列表============= +#1、py/cpp文件中license检查 +#2、垃圾文件检查,后缀为so/log/h5/event +#3、首层目录必要文件检查:LINCENSE,README.md,requirements.txt +#4、文件大小检查,不超过2M +#5、内部链接扫描 +file_log='log1/py_train.log' +dir_log='log1' +if [ -d $dir_log ]; +then + if [ -f $file_log ]; + then + rm -f $file_log + fi +else + mkdir $dir_log +fi +python3 access_upline.py --pr_filelist_dir=$1/pr_filelist.txt >$file_log 2>&1 +license_check=`grep -ri "license_check=1" ${file_log} | wc -l` +filesize_check=`grep -ri "filesize_check=1" ${file_log} | wc -l` +firstlevel_check=`grep -ri "firstlevel_check=1" ${file_log} | wc -l` +funkfile_check=`grep -ri "funkfile_check=1" ${file_log} | wc -l` +internal_link_check=`grep -ri "internal_link_check=1" ${file_log} | wc -l` +sensitive_check=`grep -ri "sensitive_check=1" ${file_log} | wc -l` +cat $file_log | grep -v "check=1" | grep -v "check=0" +if [[ $license_check -ge 1 || $filesize_check -ge 1 || $firstlevel_check -ge 1 || $funkfile_check -ge 1 || $internal_link_check -ge 1 || sensitive_check -ge 1 ]]; +then + echo "check fail" + exit 1 +else + echo "check success" +fi +#exit $status + + +echo "=================Start to Check License =================" +#license检查 +lincense_check=0 +while read line +do + a=`echo $line |awk -F "_for_" '{print $1}' | awk -F "/" '{print $NF}'` + b=`echo $line |awk -F "_for_" '{print $2}' | awk -F "/" '{print $1}'` + result=`echo $a`_for_`echo $b` + lise_dir=$(echo ${line%$result*}/$result/LICENSE) + directory=$(echo ${line%$result*}/$result/) + if [ -n "$b" ] && [ -d $1/modelzoo/$directory ]; + then + if [ -f $1/modelzoo/$lise_dir ]; + then + true + else + echo "$result license is not exist!" + let lincense_check=1 + fi + else + true + #echo "$result name -ERROR" + fi +done < $1/pr_filelist.txt + + +#py/cpp文件检查 +while read line +do + function checkfile() + { + result=$(echo $1 | grep -E "\.py|\.cpp" | grep -v "__init__.py") + if [ -n "$result" ]; + then + Hw_result=`cat $1 | grep -i "License"` + if [ -n "$Hw_result" ]; + then + true + else + echo "$1 license check fail!" + let lincense_check=1 + fi + else + #echo "$1 no need check" + true + fi + } + function getAllFiles() + { + for fileName in `ls $1`; + do + dir_or_file=$1"/"$fileName + if [ -d $dir_or_file ] + then + getAllFiles $dir_or_file + else + checkfile $dir_or_file + fi + done + } + if [ -f $1/modelzoo/$line ]; + then + #echo $line + checkfile $1/modelzoo/$line + else + getAllFiles $1/modelzoo/$line + fi + +done < $1/pr_filelist.txt + +if [ $lincense_check -eq '1' ] ;then + echo "License check failed, Please follow the guide to add License:" + echo "https://gitee.com/ascend/modelzoo/blob/master/contrib/CONTRIBUTING.md" + exit 1 +fi + +#如果新增的都是目录,则无需执行用例,直接返回OK +check_res=0 +while read line +do + if [[ ! $line =~ ".keep" ]]; + then + let check_res=1 + fi +done < $1/pr_filelist.txt + +if [ $check_res -eq '0' ] ;then + echo "Add directorys in contrib/Research, No need to run testcases!" + exit 0 +fi + +#代码安全检查模块 +while read line +do + if [ -d $1/modelzoo/$line ]; + then + rmresult=`grep -rn -w "rm " $1/modelzoo/$line/*.sh | wc -l` + cpresult=`grep -rn -w "cp " $1/modelzoo/$line/*.sh | wc -l` + toresult=`grep -rn -w "touch " $1/modelzoo/$line/*.sh | wc -l` + if [ $rmresult -gt 0 ] || [ $cpresult -gt 0 ] || [ $toresult -gt 0 ] ; + then + echo "Please do not use rm/cp/touch in .sh, tks!" + fi + elif [[ $1/modelzoo/$line =~ ".sh" ]]; + then + rmresult=`cat $1/modelzoo/$line | grep -w "rm " | wc -l` + cpresult=`cat $1/modelzoo/$line | grep -w "cp " | wc -l` + toresult=`cat $1/modelzoo/$line | grep -w "touch " | wc -l` + if [ $rmresult -gt 0 ] || [ $cpresult -gt 0 ] || [ $toresult -gt 0 ] ; + then + echo "Please do not use rm/cp/touch in .sh, tks!" + fi + fi +done < $1/pr_filelist.txt + +#文件大小检查模块,超过10M则报错 +filesize_check=0 +maxsize=$((1024*1024*2)) +while read line +do + if [ -d $1/modelzoo/$line ]; + then + #PR提交里面不存在目录,如果是空目录,则为.keep + echo "directory" + else + filesize=`ls -l $1/modelzoo/$line | awk '{ print $5 }'` + if [[ $filesize -gt $maxsize ]]; + then + echo "File size of $1/modelzoo/$line greater than 2M, Please remove it!" + let filesize_check=1 + fi + fi +done < $1/pr_filelist.txt + +if [ $filesize_check -eq '1' ] ;then + echo "File size check failed, exit!" + exit 1 +fi + +#如果PR不涉及contrib/TensorFlow/Research目录,则无需执行用例,直接返回OK +if [ `grep -c "contrib/TensorFlow/Research" "$1/pr_filelist.txt"` -eq '0' ] ;then + echo "This pr dosn't have changes in contrib/TensorFlow/Research, No need to run testcases!" + exit 0 +fi + +date_time=`date +%Y%m%d`"."`date +%H%M%S` +echo "====================================================================" +echo "$date_time : start run test case , please wait ..." +echo "====================================================================" + +python3 createCases.py $1/pr_filelist.txt $modelzoo_dir + +#如果case.txt中没有生成用例,则报错退出 +if [ `grep -c ".sh" "$top_dir/cases.txt"` -eq '0' ] ;then + echo "No testcases was found, Please check your PR!" + exit 1 +fi + +date_time=`date +%Y%m%d`"."`date +%H%M%S` +echo "====================================================================" +echo "$date_time : copy source code to Ascend310&Ascend910 , please wait ..." +echo "====================================================================" + +if [ `grep -c "_offline_inference" "$top_dir/cases.txt"` -ne '0' ] ;then + ./auto_scp.sh "$modelzoo_dir/contrib" "$ascend310_ip" "/home/HwHiAiUser/modelzoo" "Root@123" "22" >/dev/null 2>&1 +fi +if [[ `grep -c "_online_inference" "$top_dir/cases.txt"` -ne '0' || `grep -c "_train" "$top_dir/cases.txt"` -ne '0' ]] ;then + ./auto_scp.sh "$modelzoo_dir/contrib" "$ascend910_ip" "/home/HwHiAiUser/modelzoo" "Root@123" "7745" >/dev/null 2>&1 +fi + +date_time=`date +%Y%m%d`"."`date +%H%M%S` +echo "====================================================================" +echo "$date_time : cat testcase info" +echo "====================================================================" +cat cases.txt + +echo "====================================================================" +num=1 +cat cases.txt | while read line +do + date_time=`date +%Y%m%d`"."`date +%H%M%S` + echo "$date_time : start run test case num [ $num ] : [ $line ]" + echo "====================================================================" + array=(${line//,/ }) + case=${array[0]} + echo $case + + if [ -f "$test_dir/$case" ] + then + chmod +x $test_dir/$case + sleep 1 + $test_dir/$line + date_time=`date +%Y%m%d`"."`date +%H%M%S` + echo "$date_time : finished test case num [ $num ] : [ $line ]" + wc -l $top_dir/result.txt + let num=num+1 + if [ -s $top_dir/result.txt ] + then + cp -rf $top_dir/result.txt $top_dir/result.bak + else + echo "####################################################################" + echo "$date_time ERROR : Check Test Result FAIL" + echo "ERROR INFO : $top_dir/result.txt is empty , please check..." + echo "####################################################################" + fi + echo "====================================================================" + else + echo "####################################################################" + echo "$date_time ERROR : Run Testcase FAIL" + echo "ERROR INFO : Could not find testcase [ $test_dir/$case ]" + echo "####################################################################" + fi +done + +cp -r $top_dir/log $1/modelzoo_log + +date_time=`date +%Y%m%d`"."`date +%H%M%S` +echo "####################################################################" +echo "# Modelzoo Network Test Finished! " +echo "####################################################################" + +if [ `grep -c "fail" "$top_dir/result.txt"` -ne '0' ] ;then + exit 1 +else + exit 0 +fi \ No newline at end of file diff --git a/README.en.md b/README.en.md deleted file mode 100644 index 91170f8ed7..0000000000 --- a/README.en.md +++ /dev/null @@ -1,36 +0,0 @@ -# ModelZoo-PyTorch - -#### Description -{**When you're done, you can delete the content in this README and update the file with details for others getting started with your repository**} - -#### Software Architecture -Software architecture description - -#### Installation - -1. xxxx -2. xxxx -3. xxxx - -#### Instructions - -1. xxxx -2. xxxx -3. xxxx - -#### Contribution - -1. Fork the repository -2. Create Feat_xxx branch -3. Commit your code -4. Create Pull Request - - -#### Gitee Feature - -1. You can use Readme\_XXX.md to support different languages, such as Readme\_en.md, Readme\_zh.md -2. Gitee blog [blog.gitee.com](https://blog.gitee.com) -3. Explore open source project [https://gitee.com/explore](https://gitee.com/explore) -4. The most valuable open source project [GVP](https://gitee.com/gvp) -5. The manual of Gitee [https://gitee.com/help](https://gitee.com/help) -6. The most popular members [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git a/README.md b/README.md index cf52c85e96..c008648980 100644 --- a/README.md +++ b/README.md @@ -1,39 +1,4 @@ # ModelZoo-PyTorch -#### 介绍 -{**以下是 Gitee 平台说明,您可以替换此简介** -Gitee 是 OSCHINA 推出的基于 Git 的代码托管平台(同时支持 SVN)。专为开发者提供稳定、高效、安全的云端软件开发协作平台 -无论是个人、团队、或是企业,都能够用 Gitee 实现代码托管、项目管理、协作开发。企业项目请看 [https://gitee.com/enterprises](https://gitee.com/enterprises)} +This branch is only used for ci-pipeline.. -#### 软件架构 -软件架构说明 - - -#### 安装教程 - -1. xxxx -2. xxxx -3. xxxx - -#### 使用说明 - -1. xxxx -2. xxxx -3. xxxx - -#### 参与贡献 - -1. Fork 本仓库 -2. 新建 Feat_xxx 分支 -3. 提交代码 -4. 新建 Pull Request - - -#### 特技 - -1. 使用 Readme\_XXX.md 来支持不同的语言,例如 Readme\_en.md, Readme\_zh.md -2. Gitee 官方博客 [blog.gitee.com](https://blog.gitee.com) -3. 你可以 [https://gitee.com/explore](https://gitee.com/explore) 这个地址来了解 Gitee 上的优秀开源项目 -4. [GVP](https://gitee.com/gvp) 全称是 Gitee 最有价值开源项目,是综合评定出的优秀开源项目 -5. Gitee 官方提供的使用手册 [https://gitee.com/help](https://gitee.com/help) -6. Gitee 封面人物是一档用来展示 Gitee 会员风采的栏目 [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) -- Gitee From aa5cea3294a4999103e30dd68a94f22c74594d50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E5=A7=9C=E5=A5=94?= Date: Thu, 9 Jun 2022 16:45:48 +0800 Subject: [PATCH 02/14] remove --- AcessScan/link_list.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AcessScan/link_list.txt b/AcessScan/link_list.txt index b3c936f912..133929bfc9 100644 --- a/AcessScan/link_list.txt +++ b/AcessScan/link_list.txt @@ -1 +1 @@ -huawei.com \ No newline at end of file +xxxxxx.com \ No newline at end of file -- Gitee From ebbee71bff78128e458f9f2bda5f55fdc0765ad3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E5=A7=9C=E5=A5=94?= Date: Thu, 9 Jun 2022 16:47:54 +0800 Subject: [PATCH 03/14] update --- AcessScan/access_upline.py | 1 + 1 file changed, 1 insertion(+) diff --git a/AcessScan/access_upline.py b/AcessScan/access_upline.py index b191ce03c0..0314c80824 100644 --- a/AcessScan/access_upline.py +++ b/AcessScan/access_upline.py @@ -363,6 +363,7 @@ def main(): # TODO 检查正确的模型根目录 # check_firstlevel_file(path_pr_list, fram_str, modelzoo_dir) print('=================Start to Check Internal Link =================') + # TODO 设置正确的涉A的不允许出现在资料里的网站,维护到link_list.txt with open(alink, 'r') as food: for onelink in food: check_link(path_pr_list, fram_str, modelzoo_dir, onelink) -- Gitee From 4934938b66b87478fe58307e0268121b40f9f2b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E5=A7=9C=E5=A5=94?= Date: Thu, 9 Jun 2022 17:05:39 +0800 Subject: [PATCH 04/14] update --- AcessScan/.keep | 0 AcessScan/access_upline.py | 806 ++++++++++++++++----- AcessScan/core_binding_config.json | 6 + AcessScan/link_list.txt | 2 +- AcessScan/startRun.sh | 200 ++--- AcessScan/upline_access_black_http.json | 4 + LicenseTool/LicenseTool.py | 305 ++++++++ LicenseTool/README.md | 44 ++ LicenseTool/data/LICENSE_Pytorch/LICENSE | 29 + LicenseTool/data/LICENSE_TF/LICENSE | 284 ++++++++ LicenseTool/data/cache/.keep | 0 LicenseTool/data/cpp_license_py_all.txt | 33 + LicenseTool/data/cpp_license_tf_all.txt | 29 + LicenseTool/data/cpp_license_tf_huawei.txt | 16 + LicenseTool/data/py_license_py_all.txt | 33 + LicenseTool/data/py_license_tf_all.txt | 29 + LicenseTool/data/py_license_tf_huawei.txt | 16 + 17 files changed, 1581 insertions(+), 255 deletions(-) create mode 100644 AcessScan/.keep create mode 100644 AcessScan/core_binding_config.json create mode 100644 AcessScan/upline_access_black_http.json create mode 100644 LicenseTool/LicenseTool.py create mode 100644 LicenseTool/README.md create mode 100644 LicenseTool/data/LICENSE_Pytorch/LICENSE create mode 100644 LicenseTool/data/LICENSE_TF/LICENSE create mode 100644 LicenseTool/data/cache/.keep create mode 100644 LicenseTool/data/cpp_license_py_all.txt create mode 100644 LicenseTool/data/cpp_license_tf_all.txt create mode 100644 LicenseTool/data/cpp_license_tf_huawei.txt create mode 100644 LicenseTool/data/py_license_py_all.txt create mode 100644 LicenseTool/data/py_license_tf_all.txt create mode 100644 LicenseTool/data/py_license_tf_huawei.txt diff --git a/AcessScan/.keep b/AcessScan/.keep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/AcessScan/access_upline.py b/AcessScan/access_upline.py index 0314c80824..275e9a7494 100644 --- a/AcessScan/access_upline.py +++ b/AcessScan/access_upline.py @@ -9,8 +9,8 @@ import math from pathlib import Path import chardet import re - - +import time +import json def init_args(): parser = argparse.ArgumentParser() parser.add_argument('--model_dir', type=str, default="./AlexNet_for_TensorFlow", @@ -21,29 +21,27 @@ def init_args(): help='model dirrectory of the link_list') parser.add_argument('--FileSizeLimit', type=int, default=2, help='model size of FileSizeLimit') + parser.add_argument('--train_performance_keyword', type=str, default="./train_performance_keyword.txt", + help='keywords of the train_performance_keyword file') + parser.add_argument('--train_full_keyword', type=str, default="./train_full_keyword.txt", + help='keywords of the train_full_keyword file') return parser.parse_args() - - def model_str_name(model_dir): ''' 功能:将路径转换为字符串 ''' model_dir_str = str(model_dir) return model_dir_str - - -def file_size_check(path_pr_list, FileSizeLimit, fram_str, modelzoo_dir, dot=2, ): +def file_size_check(path_pr_list, FileSizeLimit, fram_str,modelzoo_dir,dot=2,): ''' :param model_dir : 网络目录 :param FileSizeLimit : 文件限制大小 实现功能:扫描文件大小,小于2MB ''' filesize_check = 0 - # path = fram_str dict1 = {} - with open(path_pr_list, 'r') as fooc: - # 读取pr_filelist.txt的内容 + # 读取pr_filelist.txt的内容 for model_dir in fooc: ''' 判断并处理三种类型的文件: LICENSE,py文件,其他文件 @@ -51,31 +49,33 @@ def file_size_check(path_pr_list, FileSizeLimit, fram_str, modelzoo_dir, dot=2, model_dir = model_dir.strip('\n') # 获取模型框架路径字符串 model_dir_str = model_str_name(model_dir) - # 拼装路径 + #拼装路径 model_dir2 = fram_str + modelzoo_dir + '/' + model_dir_str model_dir1 = Path(model_dir2) if model_dir1.exists(): # 获取model_dir目录下所有文件 pathTmp = str(model_dir1) - # print(pathTmp) filesize = os.path.getsize(pathTmp) # 如果是文件,获取文件大小 # 转换单位为兆 filesize1 = str(round(filesize / math.pow(1024, 2), dot)) - # print('{} 文件大小为:{}'.format(filename, filesize)) dict1[model_dir_str] = filesize1 # 将文件大小添加到字典 else: - print('{},The file is not exist!'.format(model_dir)) - filesize_check = 0 + pass for key, value in dict1.items(): if float(value) >= FileSizeLimit: - print('{},size of file is {}M and greater than {}M,please check it!'.format(key, value, FileSizeLimit)) + print('{},size of file is {}M and greater than {}M,please check and delete it!'.format(key,value, FileSizeLimit)) filesize_check = 1 else: continue + fooc.close() print('filesize_check=%d' % filesize_check) - - -def file_scan(path_pr_list, fram_str, modelzoo_dir): +def file_scan(path_pr_list,fram_str,modelzoo_dir): + ''' + 功能:判断.py/.cpp文件中是否存在关键字LICENSE/license,若不存在,则返回license_check状态为1,即失败; + path_pr_list:pr_filelist.txt文件完整路径,其内容包含需要扫描文件 + fram_str:pr_filelist.txt文件所在的当前目录 + modelzoo_dir:modelzoo,字符串,用于拼接网络代码所在的完整路径 + ''' with open(path_pr_list, 'r') as fooa: # 读取pr_filelist.txt的内容 license_check = 0 @@ -83,135 +83,229 @@ def file_scan(path_pr_list, fram_str, modelzoo_dir): ''' 判断并处理三种类型的文件: LICENSE,py文件,其他文件 ''' - # 去除换行符 + #去除换行符 model_dir = model_dir.strip('\n') # 获取模型框架路径字符串 model_dir_str = model_str_name(model_dir) - # # 处理LICENSE文件 - # # 判断LICENSE文件中是否存在关键字LICENSE/license # # 判断.py/.cpp文件中是否存在关键字LICENSE/license - a = model_dir_str[-3:] - b = model_dir_str[-7:] - c = model_dir_str[-4:] - z = model_dir_str[-11:] - if (b == 'LICENSE') or (a == '.py') or (c == '.cpp'): - # 判断LICENSE文件中是否存在关键字LICENSE/license - if z == '__init__.py': + py_file = model_dir_str[-3:] + cpp_file = model_dir_str[-4:] + init_file = model_dir_str[-11:] + if (py_file == '.py') or (cpp_file == '.cpp'): + #排除init文件 + if init_file == '__init__.py': continue + # 判断文件中是否存在关键字LICENSE/license else: - a, b, c, d, e, f = 'LICENSE', 'license', 'License', 'Licence', 'licence', 'LICENCE' + LICENSE,license,License,Licence,licence,LICENCE= 'LICENSE', 'license','License','Licence','licence','LICENCE' model_dir1 = fram_str + modelzoo_dir + '/' + model_dir_str model_dir = Path(model_dir1) if model_dir.exists(): - with open(str(model_dir), 'r') as foob: + with open(str(model_dir), 'r',encoding='gb18030',errors='ignore') as foob: content = foob.read() - if (a in content) or (b in content) or (c in content) or (d in content) or ( - e in content) or (f in content): + if (LICENSE in content) or (license in content) or (License in content) or (Licence in content) or (licence in content) or (LICENCE in content): continue else: - # model_name = os.path.basename(model_dir) license_check = 1 - print('{},The keyword license no exists in the file,please check it!'.format( - model_dir_str)) + print('{},The keyword license no exists in the file,please check and add it!'.format(model_dir_str)) foob.close() else: - print('{},The file is not exist!'.format(model_dir_str)) - license_check = 0 + pass print('license_check=%d' % license_check) if license_check == 1: print('License check failed, Please follow the guide to add License:') print('https://gitee.com/ascend/modelzoo/blob/master/contrib/CONTRIBUTING.md') - - +def spt_path(pr_filelist0_str): + model_name_list = pr_filelist0_str.split('/') + return model_name_list def get_model_fram(pr_filelist0_str): ''' 功能:获取网络框架名称路径 :param pr_filelist0_dir: :return: ''' - if '_TensorFlow' in pr_filelist0_str: - tf_str = '_TensorFlow' - a = pr_filelist0_str.index(tf_str) + if 'for_TensorFlow' in pr_filelist0_str: + if 'for_TensorFlow2.X' in pr_filelist0_str: + tf_str = 'for_TensorFlow2.X' + a = pr_filelist0_str.index(tf_str) + b = a + 17 + model_fram = pr_filelist0_str[:b] + else: + tf_str = 'for_TensorFlow' + a = pr_filelist0_str.index(tf_str) + b = a + 14 + model_fram = pr_filelist0_str[:b] + elif 'for_PyTorch' in pr_filelist0_str: + pt_str = 'for_PyTorch' + a = pr_filelist0_str.index(pt_str) b = a + 11 model_fram = pr_filelist0_str[:b] - elif '_PyTorch' in pr_filelist0_str: - pt_str = '_PyTorch' + elif 'for_MindSpore' in pr_filelist0_str: + ms_str = 'for_MindSpore' + a = pr_filelist0_str.index(ms_str) + b = a + 13 + model_fram = pr_filelist0_str[:b] + elif 'for_Tensorflow' in pr_filelist0_str: + if 'for_Tensorflow2.X' in pr_filelist0_str: + tf_str = 'for_Tensorflow2.X' + a = pr_filelist0_str.index(tf_str) + b = a + 17 + model_fram = pr_filelist0_str[:b] + else: + tf_str = 'for_Tensorflow' + a = pr_filelist0_str.index(tf_str) + b = a + 14 + model_fram = pr_filelist0_str[:b] + elif 'for_Pytorch' in pr_filelist0_str: + pt_str = 'for_Pytorch' a = pr_filelist0_str.index(pt_str) - b = a + 8 + b = a + 11 + model_fram = pr_filelist0_str[:b] + elif 'for_Mindspore' in pr_filelist0_str: + ms_str = 'for_Mindspore' + a = pr_filelist0_str.index(ms_str) + b = a + 13 model_fram = pr_filelist0_str[:b] - elif '_MindSpore' in pr_filelist0_str: - ms_str = '_MindSpore' + elif 'for_ACL' in pr_filelist0_str: + ms_str = 'for_ACL' a = pr_filelist0_str.index(ms_str) - b = a + 10 + b = a + 7 model_fram = pr_filelist0_str[:b] - else: + else : model_fram = '' - return model_fram - - -def check_firstlevel_file(path_pr_list, fram_str, modelzoo_dir): + return model_fram +def check_firstlevel_file(path_pr_list,fram_str,modelzoo_dir): ''' - :param pr_filelist0_str: 网络路径 - :return: - 功能:检查首层目录是否存在README.md LICENSE文件 + 功能1:检查首层目录是否存在必要LICENSE文件 + 功能2:检查首层目录是否存在必要README.md文件 + 功能3:检查首层目录是否存在必要requirements.txt文件 + 功能4:检查首层目录是否存在必要modelzoo_level.txt文件 + 功能5:垃圾目录00-access拒绝入仓 + 功能6:kernel_meta目录视为垃圾目录,拒绝入仓 + path_pr_list:pr_filelist.txt文件完整路径,其内容包含需要扫描文件 + fram_str:pr_filelist.txt文件所在的当前目录 + modelzoo_dir:modelzoo,字符串,用于拼接网络代码所在的完整路径 ''' + #不规范标识字段,0:pass,1:fail + firstlevel_check = 0 + #readme检查 + firstlevel_check1 = 0 + #00-access垃圾目录检查 + firstlevel_check2 = 0 + firstlevel_check6 = 0 + #LICENSE文件检查 + firstlevel_check3 = 0 + #modelzoo_level.txt检查 + firstlevel_check4 = 0 + #requirements.txt检查 + firstlevel_check5 = 0 + firstlevel_check8 = 0 + #kernel_metala垃圾目录 + firstlevel_check7 = 0 with open(path_pr_list, 'r') as fooa: for filepath_inprlist in fooa: filepath_inprlist = filepath_inprlist.strip('\n') pr_filelist0_str = filepath_inprlist - firstlevel_check = 0 - firstlevel_check1 = 0 - firstlevel_check3 = 0 - firstlevel_check4 = 0 model_fram = get_model_fram(pr_filelist0_str) - if model_fram == '': # 网络名称不规范处理 - firstlevel_check4 = 5 - else: # 网络名称符合规范处理 - # 线上实际网络代码路径 - fram_path1 = fram_str + modelzoo_dir + '/' + get_model_fram(pr_filelist0_str) - fram_path = fram_path1 - # 判断文件是否存在 - filepath_inprlist = Path(fram_path) - if filepath_inprlist.exists(): + fram_path1 = fram_str + modelzoo_dir + '/' + model_fram + fram_path = model_str_name(fram_path1) + dir_str = model_str_name(filepath_inprlist) + with open('first_filename4.txt', 'w') as file4: + file4.write(str(dir_str)) + file4.close() + with open('first_filename4.txt', 'r') as file5: + content4 = file5.read() + file5.close() + h = 'requirements.txt' + g = 'modelzoo_level.txt' + #如果网络名称不规范,排除推理及高校 + #if (model_fram == '' and 'built-in/ACL_' not in content4 and 'contrib' not in content4 ) : # 网络名称不规范处理 + if (model_fram == ''): # 网络名称不规范处理 + #获取文件名 + fram_path2 = fram_str + modelzoo_dir + '/' + pr_filelist0_str + file_name2 = os.path.basename(fram_path2) + #截取不规范网络名称路径 + b = filepath_inprlist.index(file_name2) + fram_unst_dname = filepath_inprlist[:b] + if fram_unst_dname != '': + fram_unst_dname_true = fram_str + modelzoo_dir + '/' + fram_unst_dname + filepath_inprlist2 = Path(fram_unst_dname_true) + if filepath_inprlist2.exists(): + #获取首层目录下所有文件 + file_name3 =os.listdir(fram_unst_dname_true) + with open('first_filename3.txt', 'w') as file: + file.write(str(file_name3)) + file.close() + with open('first_filename3.txt', 'r') as file2: + content2 = file2.read() + file2.close() + a, b, c, d, e = 'README', 'readme', 'LICENSE', 'Readme', 'ReadMe' + if (a in content2) or (d in content2) or (b in content2) or (e in content2): + if c in content2: + if g not in content2: + firstlevel_check4 = 4 + if h not in content2: + firstlevel_check5 = 1 + if '00-access' in content2: + firstlevel_check6 = 1 + if model_fram != '': + # 判断路径是否真实存在 + filepath_inprlist1 = Path(fram_path) + if filepath_inprlist1.exists(): # 获取首层目录下所有文件名称 - # if os.path.isdir(fram_path): - # b, c ='README.md', 'LICENSE' - a, b, c, d = 'README', 'readme', 'LICENSE', 'Readme' + a,b,c,d,e = 'README','readme','LICENSE','Readme','ReadMe' filelist = os.listdir(fram_path) - with open('first_filename.txt', 'w') as file: + with open('first_filename.txt','w') as file: file.write(str(filelist)) file.close() with open('first_filename.txt', 'r') as file1: content1 = file1.read() - if (a in content1) or (b in content1) or (d in content1): - # if (b in filelist) and (c in filelist): - firstlevel_check = 0 - if (a not in content1) and (d not in content1) and (b not in content1): - firstlevel_check1 = 2 - # if b not in filelist: - # firstlevel_check2 = 3 - if c not in filelist: - firstlevel_check3 = 4 file1.close() + if '00-access' in content1: + firstlevel_check2 = 3 + if (a not in content1) and (d not in content1) and (b not in content1) and (e not in content1): + firstlevel_check1 = 2 + break + if c not in content1: + firstlevel_check3 = 4 + if h not in content1: + firstlevel_check8 = 8 + break + else: - print('{},The file is not exist!'.format(filepath_inprlist)) - firstlevel_check = 0 + pass + #kernel_meta目录视为垃圾目录,拒绝入仓 + if '/kernel_meta/' in content4: + firstlevel_check7 = 1 if firstlevel_check1 == 2: print('{},{} is not exist,please check and add it!'.format(get_model_fram(pr_filelist0_str), a)) firstlevel_check = 1 - # if firstlevel_check2 == 3: - # print('{},{} is not exist,please check and add it!'.format(get_model_fram(pr_filelist0_str), b)) - # firstlevel_check = 1 + if firstlevel_check8 ==8: + print('{},{} is not exist,please check and add it!'.format(get_model_fram(pr_filelist0_str), h)) + firstlevel_check = 1 if firstlevel_check3 == 4: print('{},{} is not exist,please check and add it!'.format(get_model_fram(pr_filelist0_str), c)) + print('License check failed, Please follow the guide to add LICENSE:') + print('https://gitee.com/ascend/modelzoo/blob/master/contrib/CONTRIBUTING.md') + firstlevel_check = 1 + if firstlevel_check4 == 4: + print('{},The {} file is non-existent in the model code of the file,Please follow the guide to add {}:'.format(fram_unst_dname, g, g)) + print('https://gitee.com/ascend/modelzoo/blob/master/contrib/CONTRIBUTING.md') + firstlevel_check = 1 + if firstlevel_check5 == 1: + print('{},The {} file is non-existent in the model code of the file,Please check and add {}:'.format(fram_unst_dname, h, h)) + firstlevel_check = 1 + if firstlevel_check6 == 1: + firstlevel_check = 1 + print('{},00-access directory should not exist,please delete it!'.format(fram_unst_dname)) + if firstlevel_check7 == 1: + firstlevel_check = 1 + print('{}, kernel_meta is junk directory,please check and delete it!'.format(pr_filelist0_str)) + if firstlevel_check2 == 3: firstlevel_check = 1 - if firstlevel_check4 == 5: - # print('{},The network name of file is not standard,please check name of modelzoo!'.format(pr_filelist0_str)) - firstlevel_check = 0 + print('{},00-access directory should not exist,please delete it!'.format(get_model_fram(pr_filelist0_str))) print('firstlevel_check=%d' % firstlevel_check) - - -def funk_file(path_pr_list, fram_str, modelzoo_dir): +def junk_file(path_pr_list,fram_str,modelzoo_dir): ''' 功能:检测当前路径下所有垃圾文件 参数:pr_filelist0_str:字符串化后的路径 @@ -223,25 +317,63 @@ def funk_file(path_pr_list, fram_str, modelzoo_dir): pr_filelist0_str1 = fram_str + modelzoo_dir + '/' + filepath_inprlist pr_filelist0_str = Path(pr_filelist0_str1) if pr_filelist0_str.exists(): - # pr_filelist0_str = filepath_inprlist - funk_file_typr = ['.log', '.pbtxt', '.pb', '.h5', '.so', '.zip', '.tar', '.event', '.tar.gz', '.swp'] + funk_file_typr = ['.log', '.pbtxt', '.pb', '.h5', '.so', '.zip', '.tar', '.event','.tar.gz','.swp','.ipynb','.pyc','.novalocal','.bin','.pth','.onnx','.npy','.om','.pkl','.pt','.mat','.tfrecord'] + junl_file_typr1 = ['.jpg', '.png'] # 获取文件名 file_name = os.path.basename(str(pr_filelist0_str)) # 获取文件后缀名 file_suffix = os.path.splitext(file_name)[1] if file_suffix in funk_file_typr: funkfile_check = 1 - print('{}, The file is Junk file, please check it !'.format(filepath_inprlist)) + print('{}, The file is Junk file, please check and delete it !'.format(filepath_inprlist)) if '.ckpt' in str(pr_filelist0_str): funkfile_check = 1 - print('{}, The file is Junk file, please check it !'.format(filepath_inprlist)) + print('{}, The file is Junk file, please check and delete it !'.format(filepath_inprlist)) + if 'events.out.' in str(pr_filelist0_str): + funkfile_check = 1 + print('{}, The file is Junk file, please check and delete it !'.format(filepath_inprlist)) + if 'network_need_files.txt' in str(pr_filelist0_str): + funkfile_check = 1 + print('{}, The file is Junk file,please check and delete it!'.format(filepath_inprlist)) + #loss*.txt 视为垃圾文件 + loss_file_list = re.findall(r"\w*loss\w*.txt",file_name) + loss_png_list = re.findall(r"\w*loss\w*.png", file_name) + if loss_file_list != [] or loss_png_list != []: + funkfile_check = 1 + print('{}, The file is Junk file,please check and delete it!'.format(filepath_inprlist)) + datas_path = ['data', 'datas','datasets'] + for data in datas_path: + #目录中存在data目录 + if data in str(filepath_inprlist): + data_str = data + path_list = filepath_inprlist.split('/') + for datapath in path_list: + if data_str in datapath: + a = filepath_inprlist.index(datapath) + fram_data = filepath_inprlist[:a] + #data所在路径 + data_path = fram_str + modelzoo_dir + '/' + fram_data + datapath + #如果路径真是存在 + fram_data_str = Path(data_path) + if fram_data_str.exists(): + #获取文件名称的后缀 + file_suffix1 = os.path.splitext(pr_filelist0_str1)[-1] + if file_suffix1 in junl_file_typr1: + # .jpg/.png在modelzoo下文件路径 + funkfile_check = 1 + print('{}, The file is Junk file,please check it!'.format(filepath_inprlist)) + break + if 'ge_proto_' in str(pr_filelist0_str) and file_suffix == '.txt': + funkfile_check = 1 + print('{}, The file is Junk file,please check it!'.format(filepath_inprlist)) + if 'events.' in str(pr_filelist0_str) and file_suffix == '.novalocal': + funkfile_check = 1 + print('{}, The file is Junk file,please check it!'.format(filepath_inprlist)) else: - funkfile_check = 0 - print('{},The file is not exist!'.format(filepath_inprlist)) + pass + fooa.close() print('funkfile_check=%d' % funkfile_check) - - -def check_link(path_pr_list, fram_str, modelzoo_dir, onelink): +def check_link(path_pr_list, fram_str,modelzoo_dir,onelink): ''' 功能:检测文件内部是否包含内部链接 fram_file:文件所在路径 @@ -254,122 +386,454 @@ def check_link(path_pr_list, fram_str, modelzoo_dir, onelink): pr_filelist0_str1 = fram_str + modelzoo_dir + '/' + filepath_inprlist # 将路径名称字符串化 file_name = model_str_name(pr_filelist0_str1) - a = 'README' - b = 'readme' - c = 'Readme' - d = 'ReadMe' - if (a not in file_name) and (b not in file_name) and (c not in file_name) and (d not in file_name): - pr_filelist0_str = Path(pr_filelist0_str1) + if ('README' not in file_name) and ('readme' not in file_name) and ('Readme' not in file_name) and ('ReadMe' not in file_name): + pr_filelist0_str = Path(file_name) if pr_filelist0_str.exists(): - with open(str(pr_filelist0_str), 'r', errors='ignore') as foo: + with open(str(pr_filelist0_str), 'r',encoding='gb18030',errors='ignore') as foo: for words in foo: if onelink in words: link = onelink[0:] internal_link_check = 1 - print('{},This is an internal links that includes {},please check it!'.format( - filepath_inprlist, link)) + print('{},This is an internal links that includes {},please check this line that: {}'.format(filepath_inprlist,link,words)) else: continue foo.close() else: - internal_link_check = 0 - print('{},The file is not exist!'.format(filepath_inprlist)) + pass print('internal_link_check=%d' % internal_link_check) - - -def check_Sensitive_content(path_pr_list, fram_str, modelzoo_dir): +def check_Sensitive_content(path_pr_list,fram_str,modelzoo_dir): + with open(os.getcwd() + "/upline_access_black_http.json", 'r') as load_f: + load_dict = json.load(load_f) with open(path_pr_list, 'r') as fooa: sensitive_check = 0 for fram_file_dir in fooa: - # 去除换行操作 - fram_file_dir = fram_file_dir.strip('\n') - # pr中文件绝对路径 + #去除换行操作 + fram_file_dir= fram_file_dir.strip('\n') + #pr中文件绝对路径 pr_filelist0_str1 = fram_str + modelzoo_dir + '/' + fram_file_dir - # 判断文件是否存在 - pr_filelist0_str = Path(pr_filelist0_str1) + #判断文件是否存在 + file_name = model_str_name(pr_filelist0_str1) + pr_filelist0_str = Path(file_name) if pr_filelist0_str.exists(): - # 如果文件存在,打开文件 - with open(str(pr_filelist0_str), 'r', errors='ignore') as foo: + #如果文件存在,打开文件 + with open(str(pr_filelist0_str), 'r',encoding='gb18030',errors='ignore') as foo: for words in foo: + words = words.strip('\n') if ('0.00' not in words) and ('0.' not in words): - if re.findall(r'(00[\d]{5}|00[\d]{6}|00[\d]{7}|00[\d]{8}|00[\d]{9}|00[\d]{10})', words) or \ - re.findall(r'([ ]00[\d]{5}|00[\d]{6}|00[\d]{7}|00[\d]{8}|00[\d]{9}|00[\d]{10})', - words) or \ - re.findall(r'([ ]00[\d]{5}|00[\d]{6}|00[\d]{7}|00[\d]{8}|00[\d]{9}|00[\d]{10}[ ])', - words) or \ - re.findall(r'(.[ ]00[\d]{5}|00[\d]{6}|00[\d]{7}|00[\d]{8}|00[\d]{9}|00[\d]{10})', - words) or \ - re.findall(r'(.[ ]00[\d]{5}|00[\d]{6}|00[\d]{7}|00[\d]{8}|00[\d]{9}|00[\d]{10}.)', - words) or \ - re.findall(r'(.[ ]00[\d]{5}|00[\d]{6}|00[\d]{7}|00[\d]{8}|00[\d]{9}|00[\d]{10}[ ])', - words) or \ + # 工号识别 + if re.findall( + r'([/][A-Za-z]00[\d]{5}[/]|[/][A-Za-z]00[\d]{6}[/]|[/][A-Za-z]00[\d]{7}[/]|[/][A-Za-z]00[\d]{8}[/]|[/][A-Za-z]00[\d]{9}[/]|[/][A-Za-z]00[\d]{10}[/])', + words) or \ re.findall( - r'(.[ ]00[\d]{5}|00[\d]{6}|00[\d]{7}|00[\d]{8}|00[\d]{9}|00[\d]{10}[ ].)', + r'([/][A-Za-z]wx\d{6}[/]|[/][A-Za-z]wx\d{7}[/]|[/][A-Za-z]wx\d{8}[/]|[/][A-Za-z]wx\d{9}[/]|[/][A-Za-z]wx\d{10}[/]|[/][A-Za-z]wx\d{11}[/])', words) or \ - re.findall(r'(wx\d{6}|wx\d{7}|wx\d{8}|wx\d{9}|wx\d{10}|wx\d{11})', words) or \ - re.findall(r'([ ]wx\d{6}|wx\d{7}|wx\d{8}|wx\d{9}|wx\d{10}|wx\d{11})', words) or \ - re.findall(r'([ ]wx\d{6}|wx\d{7}|wx\d{8}|wx\d{9}|wx\d{10}|wx\d{11}[ ])', words) or \ - re.findall(r'(.[ ]wx\d{6}|wx\d{7}|wx\d{8}|wx\d{9}|wx\d{10}|wx\d{11})', words) or \ - re.findall(r'(.[ ]wwx\d{6}|wx\d{7}|wx\d{8}|wx\d{9}|wx\d{10}|wx\d{11}.)', words) or \ - re.findall(r'(.[ ]wx\d{6}|wx\d{7}|wx\d{8}|wx\d{9}|wx\d{10}|wx\d{11}[ ])', words) or \ - re.findall(r'(.[ ]wx\d{6}|wx\d{7}|wx\d{8}|wx\d{9}|wx\d{10}|wx\d{11}[ ].)', words): + re.findall( + r'([/]00[\d]{5}[/]|[/]00[\d]{6}[/]|[/]00[\d]{7}[/]|[/]00[\d]{8}[/]|[/]00[\d]{9}[/]|[/]00[\d]{10}[/])', + words): print( '{}, There may be a job number in the file, please check the line that is: {}'.format( fram_file_dir, words)) sensitive_check = 1 foo.close() - with open(str(pr_filelist0_str), 'r', errors='ignore') as fooc: - for words in fooc: - if re.findall( - r'http://\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b', - words) or \ - re.findall( - r'https://\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b', - words): - print( - '{}, There may be an ip address in the file, please check the line that is:{}'.format( - fram_file_dir, words)) - sensitive_check = 1 - fooc.close() - else: - sensitive_check = 0 - print('{},The file is not exist!'.format(sensitive_check)) - print('sensitive_check=%d' % sensitive_check) + # 获取文件名 + env_file_name = os.path.basename(str(pr_filelist0_str)) + if re.findall(r'train\w*.sh', env_file_name) or re.findall(r'infer\w*.sh', env_file_name): + with open(str(pr_filelist0_str), 'r', encoding='gb18030', errors='ignore') as fooc: + for words in fooc: + words = words.strip('\n') + # 不合规配置环境变量1、export install_path 2、export LD_LIBRARY_PATH 3、export PYTHONPATH 4、export PATH 5、export ASCEND_OPP_PATH + if re.findall(r'\w*export install_path\w*', words) or re.findall(r'\w*export LD_LIBRARY_PATH\w*', words) or re.findall(r'\w*export PATH\w*', words) \ + or re.findall(r'\w*export ASCEND_OPP_PATH\w*', words): + print('{}, There are non compliant configuration environment variables, please check the line that is: {}'.format( + fram_file_dir, words)) + sensitive_check = 1 + if re.findall(r'\w*export PYTHONPATH\w*', words) and re.findall(r'\w*install_path\w*', words): + print('{}, There are non compliant configuration environment variables, please check the line that is: {}'.format( + fram_file_dir, words)) + sensitive_check = 1 + fooc.close() + if ('README' not in file_name) and ('readme' not in file_name) and ('Readme' not in file_name) and ('ReadMe' not in file_name): + with open(str(pr_filelist0_str), 'r',encoding='gb18030',errors='ignore') as fooc: + for words in fooc: + # ip 识别 + if ('device_ip' in words) or ('server_id' in words): + continue + elif re.findall( + r'http://\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b', + words) or \ + re.findall( + r'https://\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b', + words): #or \ + # re.findall( + # r'(?$file_log 2>&1 +#docs文件检查 +echo "=================Start to Check Type of File =================" +while read line +do + dir2=`echo $line | sed "s/\// /g"` + for i in $dir2; + do + if [[ $i == 'test' ]];then + #echo "=========666"; + result=$(echo $line | grep -E "\.py|\.cpp" | grep -v "__init__.py") + if [ -n "$result" ]; + then + if [ ! -f "$1/modelzoo/$line" ];then + docs_nums=`grep -a $'\r' $1/modelzoo/$line | wc -l` + if [[ $docs_nums -eq 0 ]];then + continue + else + echo "$line ,This is a docs file,please check and delete it!" + let UNIX_check=1 + fi + fi + fi + fi + done +done < $1/pr_filelist.txt + license_check=`grep -ri "license_check=1" ${file_log} | wc -l` filesize_check=`grep -ri "filesize_check=1" ${file_log} | wc -l` firstlevel_check=`grep -ri "firstlevel_check=1" ${file_log} | wc -l` funkfile_check=`grep -ri "funkfile_check=1" ${file_log} | wc -l` internal_link_check=`grep -ri "internal_link_check=1" ${file_log} | wc -l` sensitive_check=`grep -ri "sensitive_check=1" ${file_log} | wc -l` -cat $file_log | grep -v "check=1" | grep -v "check=0" -if [[ $license_check -ge 1 || $filesize_check -ge 1 || $firstlevel_check -ge 1 || $funkfile_check -ge 1 || $internal_link_check -ge 1 || sensitive_check -ge 1 ]]; +modelzoo_level_check=`grep -ri "modelzoo_level_check=1" ${file_log} | wc -l` +file_word_check=`grep -ri "file_word_check=1" ${file_log} | wc -l` +core_binding_check=`grep -ri "core_binding_check=1" ${file_log} | wc -l` +cat $file_log | grep -a -v "check=1" | grep -a -v "check=0" +if [[ $license_check -ge 1 || $filesize_check -ge 1 || $firstlevel_check -ge 1 || $funkfile_check -ge 1 || $internal_link_check -ge 1 +|| $sensitive_check -ge 1 || $modelzoo_level_check -ge 1 || file_word_check -ge 1 || UNIX_check -ge 1 || core_binding_check -ge 1 ]]; then echo "check fail" exit 1 @@ -82,83 +111,81 @@ else fi #exit $status - -echo "=================Start to Check License =================" -#license检查 -lincense_check=0 -while read line -do - a=`echo $line |awk -F "_for_" '{print $1}' | awk -F "/" '{print $NF}'` - b=`echo $line |awk -F "_for_" '{print $2}' | awk -F "/" '{print $1}'` - result=`echo $a`_for_`echo $b` - lise_dir=$(echo ${line%$result*}/$result/LICENSE) - directory=$(echo ${line%$result*}/$result/) - if [ -n "$b" ] && [ -d $1/modelzoo/$directory ]; - then - if [ -f $1/modelzoo/$lise_dir ]; - then - true - else - echo "$result license is not exist!" - let lincense_check=1 - fi - else - true - #echo "$result name -ERROR" - fi -done < $1/pr_filelist.txt +#echo "=================Start to Check License =================" +##license检查 +#lincense_check=0 +#while read line +#do +# a=`echo $line |awk -F "_for_" '{print $1}' | awk -F "/" '{print $NF}'` +# b=`echo $line |awk -F "_for_" '{print $2}' | awk -F "/" '{print $1}'` +# result=`echo $a`_for_`echo $b` +# lise_dir=$(echo ${line%$result*}/$result/LICENSE) +# directory=$(echo ${line%$result*}/$result/) +# if [ -n "$b" ] && [ -d $1/modelzoo/$directory ]; +# then +# if [ -f $1/modelzoo/$lise_dir ]; +# then +# true +# else +# echo "$result license is not exist!" +# let lincense_check=1 +# fi +# else +# true +# #echo "$result name -ERROR" +# fi +#done < $1/pr_filelist.txt #py/cpp文件检查 -while read line -do - function checkfile() - { - result=$(echo $1 | grep -E "\.py|\.cpp" | grep -v "__init__.py") - if [ -n "$result" ]; - then - Hw_result=`cat $1 | grep -i "License"` - if [ -n "$Hw_result" ]; - then - true - else - echo "$1 license check fail!" - let lincense_check=1 - fi - else - #echo "$1 no need check" - true - fi - } - function getAllFiles() - { - for fileName in `ls $1`; - do - dir_or_file=$1"/"$fileName - if [ -d $dir_or_file ] - then - getAllFiles $dir_or_file - else - checkfile $dir_or_file - fi - done - } - if [ -f $1/modelzoo/$line ]; - then - #echo $line - checkfile $1/modelzoo/$line - else - getAllFiles $1/modelzoo/$line - fi - -done < $1/pr_filelist.txt - -if [ $lincense_check -eq '1' ] ;then - echo "License check failed, Please follow the guide to add License:" - echo "https://gitee.com/ascend/modelzoo/blob/master/contrib/CONTRIBUTING.md" - exit 1 -fi - +#while read line +#do +# function checkfile() +# { +# result=$(echo $1 | grep -E "\.py|\.cpp" | grep -v "__init__.py") +# if [ -n "$result" ]; +# then +# Hw_result=`cat $1 | grep -i "License"` +# if [ -n "$Hw_result" ]; +# then +# true +# else +# echo "$1 license check fail!" +# let lincense_check=1 +# fi +# else +# #echo "$1 no need check" +# true +# fi +# } +# function getAllFiles() +# { +# for fileName in `ls $1`; +# do +# dir_or_file=$1"/"$fileName +# if [ -d $dir_or_file ] +# then +# getAllFiles $dir_or_file +# else +# checkfile $dir_or_file +# fi +# done +# } +# if [ -f "$1/modelzoo/$line" ]; +# then +# #echo $line +# checkfile $1/modelzoo/$line +# else +# getAllFiles $1/modelzoo/$line +# fi +# +#done < $1/pr_filelist.txt +# +#if [ $lincense_check -eq '1' ] ;then +# echo "License check failed, Please follow the guide to add License:" +# echo "https://gitee.com/ascend/modelzoo/blob/master/contrib/CONTRIBUTING.md" +# exit 1 +#fi #如果新增的都是目录,则无需执行用例,直接返回OK check_res=0 while read line @@ -173,7 +200,6 @@ if [ $check_res -eq '0' ] ;then echo "Add directorys in contrib/Research, No need to run testcases!" exit 0 fi - #代码安全检查模块 while read line do @@ -198,6 +224,11 @@ do fi done < $1/pr_filelist.txt +endtime=`date +'%Y-%m-%d %H:%M:%S'` +start_seconds=$(date --date="$starttime" +%s); +end_seconds=$(date --date="$endtime" +%s); +echo "本次运行时间: "$((end_seconds-start_seconds))"s" +:< Date: Thu, 9 Jun 2022 17:11:39 +0800 Subject: [PATCH 05/14] update --- AcessScan/access_upline.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/AcessScan/access_upline.py b/AcessScan/access_upline.py index 275e9a7494..c99bb60c3f 100644 --- a/AcessScan/access_upline.py +++ b/AcessScan/access_upline.py @@ -830,7 +830,8 @@ def main(): print('=================Start to Check Sensitive Information =================') check_Sensitive_content(path_pr_list, fram_str, modelzoo_dir) print('=================Start to Check Modelzoo Level =================') - modelzoo_level_check(path_pr_list, fram_str, modelzoo_dir) + # TODO official已删,不需要再检查放哪了,modelzoo_level检查可以继续保留 + # modelzoo_level_check(path_pr_list, fram_str, modelzoo_dir) print('=================Start to Check File&Keywords of Test Directory =================') file_word_check(fram_str, modelzoo_dir, path_pr_list, train_full_keyword, train_performance_keyword) print('=================Start to Check core_binding&Device Id status =================') -- Gitee From 9f81b0fac0ae8d6a563fbf5cac3fd29c35b55fb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E5=A7=9C=E5=A5=94?= Date: Thu, 9 Jun 2022 17:37:28 +0800 Subject: [PATCH 06/14] update TODO --- AcessScan/access_upline.py | 362 ++++++++++++++++++++++--------------- 1 file changed, 221 insertions(+), 141 deletions(-) diff --git a/AcessScan/access_upline.py b/AcessScan/access_upline.py index c99bb60c3f..8e02bdf5dc 100644 --- a/AcessScan/access_upline.py +++ b/AcessScan/access_upline.py @@ -11,6 +11,8 @@ import chardet import re import time import json + + def init_args(): parser = argparse.ArgumentParser() parser.add_argument('--model_dir', type=str, default="./AlexNet_for_TensorFlow", @@ -26,13 +28,19 @@ def init_args(): parser.add_argument('--train_full_keyword', type=str, default="./train_full_keyword.txt", help='keywords of the train_full_keyword file') return parser.parse_args() + + +# TODO 过于简单函数整改 def model_str_name(model_dir): ''' 功能:将路径转换为字符串 ''' model_dir_str = str(model_dir) return model_dir_str -def file_size_check(path_pr_list, FileSizeLimit, fram_str,modelzoo_dir,dot=2,): + + +# gitee上已经设置限制到文件到1M,这边可以设置更小,可以针对文件类型设置下 +def file_size_check(path_pr_list, FileSizeLimit, fram_str, modelzoo_dir, dot=2, ): ''' :param model_dir : 网络目录 :param FileSizeLimit : 文件限制大小 @@ -41,7 +49,7 @@ def file_size_check(path_pr_list, FileSizeLimit, fram_str,modelzoo_dir,dot=2,): filesize_check = 0 dict1 = {} with open(path_pr_list, 'r') as fooc: - # 读取pr_filelist.txt的内容 + # 读取pr_filelist.txt的内容 for model_dir in fooc: ''' 判断并处理三种类型的文件: LICENSE,py文件,其他文件 @@ -49,7 +57,7 @@ def file_size_check(path_pr_list, FileSizeLimit, fram_str,modelzoo_dir,dot=2,): model_dir = model_dir.strip('\n') # 获取模型框架路径字符串 model_dir_str = model_str_name(model_dir) - #拼装路径 + # 拼装路径 model_dir2 = fram_str + modelzoo_dir + '/' + model_dir_str model_dir1 = Path(model_dir2) if model_dir1.exists(): @@ -63,13 +71,16 @@ def file_size_check(path_pr_list, FileSizeLimit, fram_str,modelzoo_dir,dot=2,): pass for key, value in dict1.items(): if float(value) >= FileSizeLimit: - print('{},size of file is {}M and greater than {}M,please check and delete it!'.format(key,value, FileSizeLimit)) + print('{},size of file is {}M and greater than {}M,please check and delete it!'.format(key, value, + FileSizeLimit)) filesize_check = 1 else: continue fooc.close() print('filesize_check=%d' % filesize_check) -def file_scan(path_pr_list,fram_str,modelzoo_dir): + + +def file_scan(path_pr_list, fram_str, modelzoo_dir): ''' 功能:判断.py/.cpp文件中是否存在关键字LICENSE/license,若不存在,则返回license_check状态为1,即失败; path_pr_list:pr_filelist.txt文件完整路径,其内容包含需要扫描文件 @@ -83,7 +94,7 @@ def file_scan(path_pr_list,fram_str,modelzoo_dir): ''' 判断并处理三种类型的文件: LICENSE,py文件,其他文件 ''' - #去除换行符 + # 去除换行符 model_dir = model_dir.strip('\n') # 获取模型框架路径字符串 model_dir_str = model_str_name(model_dir) @@ -91,23 +102,25 @@ def file_scan(path_pr_list,fram_str,modelzoo_dir): py_file = model_dir_str[-3:] cpp_file = model_dir_str[-4:] init_file = model_dir_str[-11:] - if (py_file == '.py') or (cpp_file == '.cpp'): - #排除init文件 + if (py_file == '.py') or (cpp_file == '.cpp'): + # 排除init文件 if init_file == '__init__.py': continue # 判断文件中是否存在关键字LICENSE/license else: - LICENSE,license,License,Licence,licence,LICENCE= 'LICENSE', 'license','License','Licence','licence','LICENCE' + LICENSE, license, License, Licence, licence, LICENCE = 'LICENSE', 'license', 'License', 'Licence', 'licence', 'LICENCE' model_dir1 = fram_str + modelzoo_dir + '/' + model_dir_str model_dir = Path(model_dir1) if model_dir.exists(): - with open(str(model_dir), 'r',encoding='gb18030',errors='ignore') as foob: + with open(str(model_dir), 'r', encoding='gb18030', errors='ignore') as foob: content = foob.read() - if (LICENSE in content) or (license in content) or (License in content) or (Licence in content) or (licence in content) or (LICENCE in content): + if (LICENSE in content) or (license in content) or (License in content) or ( + Licence in content) or (licence in content) or (LICENCE in content): continue else: license_check = 1 - print('{},The keyword license no exists in the file,please check and add it!'.format(model_dir_str)) + print('{},The keyword license no exists in the file,please check and add it!'.format( + model_dir_str)) foob.close() else: pass @@ -115,9 +128,13 @@ def file_scan(path_pr_list,fram_str,modelzoo_dir): if license_check == 1: print('License check failed, Please follow the guide to add License:') print('https://gitee.com/ascend/modelzoo/blob/master/contrib/CONTRIBUTING.md') + +# TODO 过于简单函数整改 def spt_path(pr_filelist0_str): model_name_list = pr_filelist0_str.split('/') return model_name_list + +# TODO 简化解析函数 def get_model_fram(pr_filelist0_str): ''' 功能:获取网络框架名称路径 @@ -171,10 +188,12 @@ def get_model_fram(pr_filelist0_str): a = pr_filelist0_str.index(ms_str) b = a + 7 model_fram = pr_filelist0_str[:b] - else : + else: model_fram = '' - return model_fram -def check_firstlevel_file(path_pr_list,fram_str,modelzoo_dir): + return model_fram + +# TODO 拆分功能 +def check_firstlevel_file(path_pr_list, fram_str, modelzoo_dir): ''' 功能1:检查首层目录是否存在必要LICENSE文件 功能2:检查首层目录是否存在必要README.md文件 @@ -186,21 +205,21 @@ def check_firstlevel_file(path_pr_list,fram_str,modelzoo_dir): fram_str:pr_filelist.txt文件所在的当前目录 modelzoo_dir:modelzoo,字符串,用于拼接网络代码所在的完整路径 ''' - #不规范标识字段,0:pass,1:fail + # 不规范标识字段,0:pass,1:fail firstlevel_check = 0 - #readme检查 + # readme检查 firstlevel_check1 = 0 - #00-access垃圾目录检查 + # 00-access垃圾目录检查 firstlevel_check2 = 0 firstlevel_check6 = 0 - #LICENSE文件检查 + # LICENSE文件检查 firstlevel_check3 = 0 - #modelzoo_level.txt检查 + # modelzoo_level.txt检查 firstlevel_check4 = 0 - #requirements.txt检查 + # requirements.txt检查 firstlevel_check5 = 0 firstlevel_check8 = 0 - #kernel_metala垃圾目录 + # kernel_metala垃圾目录 firstlevel_check7 = 0 with open(path_pr_list, 'r') as fooa: for filepath_inprlist in fooa: @@ -218,21 +237,21 @@ def check_firstlevel_file(path_pr_list,fram_str,modelzoo_dir): file5.close() h = 'requirements.txt' g = 'modelzoo_level.txt' - #如果网络名称不规范,排除推理及高校 - #if (model_fram == '' and 'built-in/ACL_' not in content4 and 'contrib' not in content4 ) : # 网络名称不规范处理 + # 如果网络名称不规范,排除推理及高校 + # if (model_fram == '' and 'built-in/ACL_' not in content4 and 'contrib' not in content4 ) : # 网络名称不规范处理 if (model_fram == ''): # 网络名称不规范处理 - #获取文件名 + # 获取文件名 fram_path2 = fram_str + modelzoo_dir + '/' + pr_filelist0_str file_name2 = os.path.basename(fram_path2) - #截取不规范网络名称路径 + # 截取不规范网络名称路径 b = filepath_inprlist.index(file_name2) fram_unst_dname = filepath_inprlist[:b] if fram_unst_dname != '': fram_unst_dname_true = fram_str + modelzoo_dir + '/' + fram_unst_dname filepath_inprlist2 = Path(fram_unst_dname_true) if filepath_inprlist2.exists(): - #获取首层目录下所有文件 - file_name3 =os.listdir(fram_unst_dname_true) + # 获取首层目录下所有文件 + file_name3 = os.listdir(fram_unst_dname_true) with open('first_filename3.txt', 'w') as file: file.write(str(file_name3)) file.close() @@ -240,7 +259,7 @@ def check_firstlevel_file(path_pr_list,fram_str,modelzoo_dir): content2 = file2.read() file2.close() a, b, c, d, e = 'README', 'readme', 'LICENSE', 'Readme', 'ReadMe' - if (a in content2) or (d in content2) or (b in content2) or (e in content2): + if (a in content2) or (d in content2) or (b in content2) or (e in content2): if c in content2: if g not in content2: firstlevel_check4 = 4 @@ -253,9 +272,9 @@ def check_firstlevel_file(path_pr_list,fram_str,modelzoo_dir): filepath_inprlist1 = Path(fram_path) if filepath_inprlist1.exists(): # 获取首层目录下所有文件名称 - a,b,c,d,e = 'README','readme','LICENSE','Readme','ReadMe' + a, b, c, d, e = 'README', 'readme', 'LICENSE', 'Readme', 'ReadMe' filelist = os.listdir(fram_path) - with open('first_filename.txt','w') as file: + with open('first_filename.txt', 'w') as file: file.write(str(filelist)) file.close() with open('first_filename.txt', 'r') as file1: @@ -274,13 +293,13 @@ def check_firstlevel_file(path_pr_list,fram_str,modelzoo_dir): else: pass - #kernel_meta目录视为垃圾目录,拒绝入仓 + # kernel_meta目录视为垃圾目录,拒绝入仓 if '/kernel_meta/' in content4: firstlevel_check7 = 1 if firstlevel_check1 == 2: print('{},{} is not exist,please check and add it!'.format(get_model_fram(pr_filelist0_str), a)) firstlevel_check = 1 - if firstlevel_check8 ==8: + if firstlevel_check8 == 8: print('{},{} is not exist,please check and add it!'.format(get_model_fram(pr_filelist0_str), h)) firstlevel_check = 1 if firstlevel_check3 == 4: @@ -289,11 +308,14 @@ def check_firstlevel_file(path_pr_list,fram_str,modelzoo_dir): print('https://gitee.com/ascend/modelzoo/blob/master/contrib/CONTRIBUTING.md') firstlevel_check = 1 if firstlevel_check4 == 4: - print('{},The {} file is non-existent in the model code of the file,Please follow the guide to add {}:'.format(fram_unst_dname, g, g)) + print( + '{},The {} file is non-existent in the model code of the file,Please follow the guide to add {}:'.format( + fram_unst_dname, g, g)) print('https://gitee.com/ascend/modelzoo/blob/master/contrib/CONTRIBUTING.md') firstlevel_check = 1 if firstlevel_check5 == 1: - print('{},The {} file is non-existent in the model code of the file,Please check and add {}:'.format(fram_unst_dname, h, h)) + print('{},The {} file is non-existent in the model code of the file,Please check and add {}:'.format( + fram_unst_dname, h, h)) firstlevel_check = 1 if firstlevel_check6 == 1: firstlevel_check = 1 @@ -305,7 +327,9 @@ def check_firstlevel_file(path_pr_list,fram_str,modelzoo_dir): firstlevel_check = 1 print('{},00-access directory should not exist,please delete it!'.format(get_model_fram(pr_filelist0_str))) print('firstlevel_check=%d' % firstlevel_check) -def junk_file(path_pr_list,fram_str,modelzoo_dir): + +# TODO 不需要存在,只需要维护.gitignore即可 +def junk_file(path_pr_list, fram_str, modelzoo_dir): ''' 功能:检测当前路径下所有垃圾文件 参数:pr_filelist0_str:字符串化后的路径 @@ -317,7 +341,9 @@ def junk_file(path_pr_list,fram_str,modelzoo_dir): pr_filelist0_str1 = fram_str + modelzoo_dir + '/' + filepath_inprlist pr_filelist0_str = Path(pr_filelist0_str1) if pr_filelist0_str.exists(): - funk_file_typr = ['.log', '.pbtxt', '.pb', '.h5', '.so', '.zip', '.tar', '.event','.tar.gz','.swp','.ipynb','.pyc','.novalocal','.bin','.pth','.onnx','.npy','.om','.pkl','.pt','.mat','.tfrecord'] + funk_file_typr = ['.log', '.pbtxt', '.pb', '.h5', '.so', '.zip', '.tar', '.event', '.tar.gz', '.swp', + '.ipynb', '.pyc', '.novalocal', '.bin', '.pth', '.onnx', '.npy', '.om', '.pkl', '.pt', + '.mat', '.tfrecord'] junl_file_typr1 = ['.jpg', '.png'] # 获取文件名 file_name = os.path.basename(str(pr_filelist0_str)) @@ -335,15 +361,15 @@ def junk_file(path_pr_list,fram_str,modelzoo_dir): if 'network_need_files.txt' in str(pr_filelist0_str): funkfile_check = 1 print('{}, The file is Junk file,please check and delete it!'.format(filepath_inprlist)) - #loss*.txt 视为垃圾文件 - loss_file_list = re.findall(r"\w*loss\w*.txt",file_name) + # loss*.txt 视为垃圾文件 + loss_file_list = re.findall(r"\w*loss\w*.txt", file_name) loss_png_list = re.findall(r"\w*loss\w*.png", file_name) if loss_file_list != [] or loss_png_list != []: funkfile_check = 1 print('{}, The file is Junk file,please check and delete it!'.format(filepath_inprlist)) - datas_path = ['data', 'datas','datasets'] + datas_path = ['data', 'datas', 'datasets'] for data in datas_path: - #目录中存在data目录 + # 目录中存在data目录 if data in str(filepath_inprlist): data_str = data path_list = filepath_inprlist.split('/') @@ -351,12 +377,12 @@ def junk_file(path_pr_list,fram_str,modelzoo_dir): if data_str in datapath: a = filepath_inprlist.index(datapath) fram_data = filepath_inprlist[:a] - #data所在路径 + # data所在路径 data_path = fram_str + modelzoo_dir + '/' + fram_data + datapath - #如果路径真是存在 + # 如果路径真是存在 fram_data_str = Path(data_path) if fram_data_str.exists(): - #获取文件名称的后缀 + # 获取文件名称的后缀 file_suffix1 = os.path.splitext(pr_filelist0_str1)[-1] if file_suffix1 in junl_file_typr1: # .jpg/.png在modelzoo下文件路径 @@ -373,7 +399,9 @@ def junk_file(path_pr_list,fram_str,modelzoo_dir): pass fooa.close() print('funkfile_check=%d' % funkfile_check) -def check_link(path_pr_list, fram_str,modelzoo_dir,onelink): + +# TODO 不需要存在,只需要维护.gitignore即可 +def check_link(path_pr_list, fram_str, modelzoo_dir, onelink): ''' 功能:检测文件内部是否包含内部链接 fram_file:文件所在路径 @@ -386,37 +414,42 @@ def check_link(path_pr_list, fram_str,modelzoo_dir,onelink): pr_filelist0_str1 = fram_str + modelzoo_dir + '/' + filepath_inprlist # 将路径名称字符串化 file_name = model_str_name(pr_filelist0_str1) - if ('README' not in file_name) and ('readme' not in file_name) and ('Readme' not in file_name) and ('ReadMe' not in file_name): + if ('README' not in file_name) and ('readme' not in file_name) and ('Readme' not in file_name) and ( + 'ReadMe' not in file_name): pr_filelist0_str = Path(file_name) if pr_filelist0_str.exists(): - with open(str(pr_filelist0_str), 'r',encoding='gb18030',errors='ignore') as foo: + with open(str(pr_filelist0_str), 'r', encoding='gb18030', errors='ignore') as foo: for words in foo: if onelink in words: link = onelink[0:] internal_link_check = 1 - print('{},This is an internal links that includes {},please check this line that: {}'.format(filepath_inprlist,link,words)) + print( + '{},This is an internal links that includes {},please check this line that: {}'.format( + filepath_inprlist, link, words)) else: continue foo.close() else: pass print('internal_link_check=%d' % internal_link_check) -def check_Sensitive_content(path_pr_list,fram_str,modelzoo_dir): + +# TODO 含义复制,需要重构 +def check_Sensitive_content(path_pr_list, fram_str, modelzoo_dir): with open(os.getcwd() + "/upline_access_black_http.json", 'r') as load_f: load_dict = json.load(load_f) with open(path_pr_list, 'r') as fooa: sensitive_check = 0 for fram_file_dir in fooa: - #去除换行操作 - fram_file_dir= fram_file_dir.strip('\n') - #pr中文件绝对路径 + # 去除换行操作 + fram_file_dir = fram_file_dir.strip('\n') + # pr中文件绝对路径 pr_filelist0_str1 = fram_str + modelzoo_dir + '/' + fram_file_dir - #判断文件是否存在 + # 判断文件是否存在 file_name = model_str_name(pr_filelist0_str1) pr_filelist0_str = Path(file_name) if pr_filelist0_str.exists(): - #如果文件存在,打开文件 - with open(str(pr_filelist0_str), 'r',encoding='gb18030',errors='ignore') as foo: + # 如果文件存在,打开文件 + with open(str(pr_filelist0_str), 'r', encoding='gb18030', errors='ignore') as foo: for words in foo: words = words.strip('\n') if ('0.00' not in words) and ('0.' not in words): @@ -442,18 +475,23 @@ def check_Sensitive_content(path_pr_list,fram_str,modelzoo_dir): for words in fooc: words = words.strip('\n') # 不合规配置环境变量1、export install_path 2、export LD_LIBRARY_PATH 3、export PYTHONPATH 4、export PATH 5、export ASCEND_OPP_PATH - if re.findall(r'\w*export install_path\w*', words) or re.findall(r'\w*export LD_LIBRARY_PATH\w*', words) or re.findall(r'\w*export PATH\w*', words) \ - or re.findall(r'\w*export ASCEND_OPP_PATH\w*', words): - print('{}, There are non compliant configuration environment variables, please check the line that is: {}'.format( + if re.findall(r'\w*export install_path\w*', words) or re.findall( + r'\w*export LD_LIBRARY_PATH\w*', words) or re.findall(r'\w*export PATH\w*', words) \ + or re.findall(r'\w*export ASCEND_OPP_PATH\w*', words): + print( + '{}, There are non compliant configuration environment variables, please check the line that is: {}'.format( fram_file_dir, words)) sensitive_check = 1 - if re.findall(r'\w*export PYTHONPATH\w*', words) and re.findall(r'\w*install_path\w*', words): - print('{}, There are non compliant configuration environment variables, please check the line that is: {}'.format( + if re.findall(r'\w*export PYTHONPATH\w*', words) and re.findall(r'\w*install_path\w*', + words): + print( + '{}, There are non compliant configuration environment variables, please check the line that is: {}'.format( fram_file_dir, words)) sensitive_check = 1 fooc.close() - if ('README' not in file_name) and ('readme' not in file_name) and ('Readme' not in file_name) and ('ReadMe' not in file_name): - with open(str(pr_filelist0_str), 'r',encoding='gb18030',errors='ignore') as fooc: + if ('README' not in file_name) and ('readme' not in file_name) and ('Readme' not in file_name) and ( + 'ReadMe' not in file_name): + with open(str(pr_filelist0_str), 'r', encoding='gb18030', errors='ignore') as fooc: for words in fooc: # ip 识别 if ('device_ip' in words) or ('server_id' in words): @@ -463,21 +501,22 @@ def check_Sensitive_content(path_pr_list,fram_str,modelzoo_dir): words) or \ re.findall( r'https://\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b', - words): #or \ - # re.findall( - # r'(? Date: Thu, 9 Jun 2022 17:51:13 +0800 Subject: [PATCH 07/14] update TODO --- AcessScan/access_upline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AcessScan/access_upline.py b/AcessScan/access_upline.py index 8e02bdf5dc..986114c584 100644 --- a/AcessScan/access_upline.py +++ b/AcessScan/access_upline.py @@ -870,7 +870,7 @@ def scan_core_binding(path_pr_list, fram_str, modelzoo_dir): print('{},The file has binding cores, please check and modify it'.format(pr_filelist0_str)) print('core_binding_check=%d' % core_binding_check) -# 无效代码删除,对应每个检查部分需要做好注释工作,另外硬编码较多,需要看看是否合理 +# TODO 无效代码删除,对应每个检查部分需要做好注释工作,另外硬编码较多,需要看看是否合理 def main(): args = init_args() path_pr_list = args.pr_filelist_dir -- Gitee From 57ff0e82ab008354ebba71fa25c4091550bfb242 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E5=A7=9C=E5=A5=94?= Date: Fri, 10 Jun 2022 15:07:10 +0800 Subject: [PATCH 08/14] update --- AcessScan/access_upline.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/AcessScan/access_upline.py b/AcessScan/access_upline.py index 986114c584..6001d97c12 100644 --- a/AcessScan/access_upline.py +++ b/AcessScan/access_upline.py @@ -911,7 +911,8 @@ def main(): # TODO official已删,不需要再检查放哪了,modelzoo_level检查可以继续保留 # modelzoo_level_check(path_pr_list, fram_str, modelzoo_dir) print('=================Start to Check File&Keywords of Test Directory =================') - file_word_check(fram_str, modelzoo_dir, path_pr_list, train_full_keyword, train_performance_keyword) + # TODO 兼容train*full*.sh形式 + # file_word_check(fram_str, modelzoo_dir, path_pr_list, train_full_keyword, train_performance_keyword) print('=================Start to Check core_binding&Device Id status =================') scan_core_binding(path_pr_list, fram_str, modelzoo_dir) -- Gitee From e1c0a0b35538cf4897b0a66d297cc2260ab37b01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E5=A7=9C=E5=A5=94?= Date: Fri, 10 Jun 2022 16:09:27 +0800 Subject: [PATCH 09/14] update TODO --- AcessScan/access_upline.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/AcessScan/access_upline.py b/AcessScan/access_upline.py index 6001d97c12..fa436bba53 100644 --- a/AcessScan/access_upline.py +++ b/AcessScan/access_upline.py @@ -400,7 +400,7 @@ def junk_file(path_pr_list, fram_str, modelzoo_dir): fooa.close() print('funkfile_check=%d' % funkfile_check) -# TODO 不需要存在,只需要维护.gitignore即可 +# TODO 软链检查会报错,详见PR718 def check_link(path_pr_list, fram_str, modelzoo_dir, onelink): ''' 功能:检测文件内部是否包含内部链接 @@ -901,10 +901,11 @@ def main(): print('=================Start to Check file of First Directory =================') check_firstlevel_file(path_pr_list, fram_str, modelzoo_dir) print('=================Start to Check Internal Link =================') - with open(alink, 'r') as food: - for onelink in food: - onelink = onelink.strip('\n') - check_link(path_pr_list, fram_str, modelzoo_dir, onelink) + # TODO 软链检查会报错,详见PR718 + # with open(alink, 'r') as food: + # for onelink in food: + # onelink = onelink.strip('\n') + # check_link(path_pr_list, fram_str, modelzoo_dir, onelink) print('=================Start to Check Sensitive Information =================') check_Sensitive_content(path_pr_list, fram_str, modelzoo_dir) print('=================Start to Check Modelzoo Level =================') -- Gitee From 58e85b2bed3f3c83650ba5657b140d772fbe63a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E5=A7=9C=E5=A5=94?= Date: Fri, 10 Jun 2022 16:38:53 +0800 Subject: [PATCH 10/14] update TODO --- AcessScan/access_upline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AcessScan/access_upline.py b/AcessScan/access_upline.py index fa436bba53..171f7d39f9 100644 --- a/AcessScan/access_upline.py +++ b/AcessScan/access_upline.py @@ -447,7 +447,7 @@ def check_Sensitive_content(path_pr_list, fram_str, modelzoo_dir): # 判断文件是否存在 file_name = model_str_name(pr_filelist0_str1) pr_filelist0_str = Path(file_name) - if pr_filelist0_str.exists(): + if pr_filelist0_str.exists() and os.path.isfile(pr_filelist0_str): # 如果文件存在,打开文件 with open(str(pr_filelist0_str), 'r', encoding='gb18030', errors='ignore') as foo: for words in foo: -- Gitee From e04a283d4a30e4c2141b4c4ad81000552c2ee7a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E5=A7=9C=E5=A5=94?= Date: Wed, 15 Jun 2022 10:15:20 +0800 Subject: [PATCH 11/14] update TODO --- AcessScan/access_upline.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/AcessScan/access_upline.py b/AcessScan/access_upline.py index 171f7d39f9..ca7d10fdef 100644 --- a/AcessScan/access_upline.py +++ b/AcessScan/access_upline.py @@ -915,7 +915,8 @@ def main(): # TODO 兼容train*full*.sh形式 # file_word_check(fram_str, modelzoo_dir, path_pr_list, train_full_keyword, train_performance_keyword) print('=================Start to Check core_binding&Device Id status =================') - scan_core_binding(path_pr_list, fram_str, modelzoo_dir) + # TODO 没有必要进行绑核检查 + # scan_core_binding(path_pr_list, fram_str, modelzoo_dir) if __name__ == '__main__': -- Gitee From bcfbd845a3143a444198674c61f19398f83f49ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E5=A7=9C=E5=A5=94?= Date: Thu, 16 Jun 2022 20:25:59 +0800 Subject: [PATCH 12/14] update --- AcessScan/access_upline.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/AcessScan/access_upline.py b/AcessScan/access_upline.py index ca7d10fdef..e7480e4552 100644 --- a/AcessScan/access_upline.py +++ b/AcessScan/access_upline.py @@ -907,7 +907,8 @@ def main(): # onelink = onelink.strip('\n') # check_link(path_pr_list, fram_str, modelzoo_dir, onelink) print('=================Start to Check Sensitive Information =================') - check_Sensitive_content(path_pr_list, fram_str, modelzoo_dir) + # TODO 黑名单不合理,需要check + # check_Sensitive_content(path_pr_list, fram_str, modelzoo_dir) print('=================Start to Check Modelzoo Level =================') # TODO official已删,不需要再检查放哪了,modelzoo_level检查可以继续保留 # modelzoo_level_check(path_pr_list, fram_str, modelzoo_dir) -- Gitee From 6e757ee7fdbe2cb77643b0b646573a1a31e90af2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E5=A7=9C=E5=A5=94?= Date: Tue, 19 Jul 2022 03:10:15 +0000 Subject: [PATCH 13/14] update AcessScan/access_upline.py. --- AcessScan/access_upline.py | 1163 ++++++++---------------------------- 1 file changed, 259 insertions(+), 904 deletions(-) diff --git a/AcessScan/access_upline.py b/AcessScan/access_upline.py index e7480e4552..23ee2b8ca9 100644 --- a/AcessScan/access_upline.py +++ b/AcessScan/access_upline.py @@ -1,924 +1,279 @@ -import sys -import os -import os.path -import filecmp -import argparse -import shutil -import gzip -import math -from pathlib import Path -import chardet import re -import time +import os +import sys import json +import math +import glob +import argparse +class AccessCodeCheck(object): + """ModelZoo 门禁代码检查""" + def __init__(self): + # self.modelzoo_dir = "ModelZoo-PyTorch" + self.modelzoo_dir = "modelzoo" + self.args = self.init_args() + self.prListFile = self.args.pr_filelist_dir + self.fram_str = self.prListFile[:self.prListFile.index('pr_filelist.txt')] + self.pr_filelist = self.get_result_dict() + self.succResultList = [] + self.failResultList = [] + self.errorResultList = [] + # self.fileNameList = self.get_file_name_list() + # self.fullPathList = self.get_full_path_list() -def init_args(): - parser = argparse.ArgumentParser() - parser.add_argument('--model_dir', type=str, default="./AlexNet_for_TensorFlow", - help='model dirrectory of the project') - parser.add_argument('--pr_filelist_dir', type=str, default="./pr_filelist0.txt", - help='model dirrectory of the pr_filelist') - parser.add_argument('--linklisttxt', type=str, default='./link_list.txt', - help='model dirrectory of the link_list') - parser.add_argument('--FileSizeLimit', type=int, default=2, - help='model size of FileSizeLimit') - parser.add_argument('--train_performance_keyword', type=str, default="./train_performance_keyword.txt", - help='keywords of the train_performance_keyword file') - parser.add_argument('--train_full_keyword', type=str, default="./train_full_keyword.txt", - help='keywords of the train_full_keyword file') - return parser.parse_args() - - -# TODO 过于简单函数整改 -def model_str_name(model_dir): - ''' - 功能:将路径转换为字符串 - ''' - model_dir_str = str(model_dir) - return model_dir_str - - -# gitee上已经设置限制到文件到1M,这边可以设置更小,可以针对文件类型设置下 -def file_size_check(path_pr_list, FileSizeLimit, fram_str, modelzoo_dir, dot=2, ): - ''' - :param model_dir : 网络目录 - :param FileSizeLimit : 文件限制大小 - 实现功能:扫描文件大小,小于2MB - ''' - filesize_check = 0 - dict1 = {} - with open(path_pr_list, 'r') as fooc: - # 读取pr_filelist.txt的内容 - for model_dir in fooc: - ''' - 判断并处理三种类型的文件: LICENSE,py文件,其他文件 - ''' - model_dir = model_dir.strip('\n') - # 获取模型框架路径字符串 - model_dir_str = model_str_name(model_dir) - # 拼装路径 - model_dir2 = fram_str + modelzoo_dir + '/' + model_dir_str - model_dir1 = Path(model_dir2) - if model_dir1.exists(): - # 获取model_dir目录下所有文件 - pathTmp = str(model_dir1) - filesize = os.path.getsize(pathTmp) # 如果是文件,获取文件大小 - # 转换单位为兆 - filesize1 = str(round(filesize / math.pow(1024, 2), dot)) - dict1[model_dir_str] = filesize1 # 将文件大小添加到字典 - else: - pass - for key, value in dict1.items(): - if float(value) >= FileSizeLimit: - print('{},size of file is {}M and greater than {}M,please check and delete it!'.format(key, value, - FileSizeLimit)) - filesize_check = 1 - else: - continue - fooc.close() - print('filesize_check=%d' % filesize_check) - + def init_args(self): + """功能:读取通用参数""" + parser = argparse.ArgumentParser() + parser.add_argument('--pr_filelist_dir', type=str, default="./pr_filelist0.txt", + help='model dirrectory of the pr_filelist') + parser.add_argument('--linklisttxt', type=str, default="./link_list.txt", + help='model dirrectory of the link_list') + return parser.parse_args() -def file_scan(path_pr_list, fram_str, modelzoo_dir): - ''' - 功能:判断.py/.cpp文件中是否存在关键字LICENSE/license,若不存在,则返回license_check状态为1,即失败; - path_pr_list:pr_filelist.txt文件完整路径,其内容包含需要扫描文件 - fram_str:pr_filelist.txt文件所在的当前目录 - modelzoo_dir:modelzoo,字符串,用于拼接网络代码所在的完整路径 - ''' - with open(path_pr_list, 'r') as fooa: - # 读取pr_filelist.txt的内容 - license_check = 0 - for model_dir in fooa: - ''' - 判断并处理三种类型的文件: LICENSE,py文件,其他文件 - ''' - # 去除换行符 - model_dir = model_dir.strip('\n') - # 获取模型框架路径字符串 - model_dir_str = model_str_name(model_dir) - # # 判断.py/.cpp文件中是否存在关键字LICENSE/license - py_file = model_dir_str[-3:] - cpp_file = model_dir_str[-4:] - init_file = model_dir_str[-11:] - if (py_file == '.py') or (cpp_file == '.cpp'): - # 排除init文件 - if init_file == '__init__.py': - continue - # 判断文件中是否存在关键字LICENSE/license - else: - LICENSE, license, License, Licence, licence, LICENCE = 'LICENSE', 'license', 'License', 'Licence', 'licence', 'LICENCE' - model_dir1 = fram_str + modelzoo_dir + '/' + model_dir_str - model_dir = Path(model_dir1) - if model_dir.exists(): - with open(str(model_dir), 'r', encoding='gb18030', errors='ignore') as foob: - content = foob.read() - if (LICENSE in content) or (license in content) or (License in content) or ( - Licence in content) or (licence in content) or (LICENCE in content): - continue - else: - license_check = 1 - print('{},The keyword license no exists in the file,please check and add it!'.format( - model_dir_str)) - foob.close() - else: - pass - print('license_check=%d' % license_check) - if license_check == 1: - print('License check failed, Please follow the guide to add License:') - print('https://gitee.com/ascend/modelzoo/blob/master/contrib/CONTRIBUTING.md') + def get_result_dict(self): + """ + 功能:生成检查文件列表 + """ + with open(self.prListFile, 'r') as f: + content = f.read() + content = content.split('\n') + prfilelist = [] + for filename in content: + fullname = os.path.join(self.fram_str, self.modelzoo_dir, filename) + if os.path.exists(fullname): + prfilelist.append(filename.strip('\n')) + prfilelist = self.check_rawcode(prfilelist) + return prfilelist -# TODO 过于简单函数整改 -def spt_path(pr_filelist0_str): - model_name_list = pr_filelist0_str.split('/') - return model_name_list + def check_rawcode(self,filelist): + """ + 功能:判断文件是否属于开源源码,开源源码不做检查 + """ + no_raw_filelist = [] + raw_filelist = [] + for f in filelist: + path_list = f.split("/")[:-1] + for i, p in enumerate(path_list): + file_path = os.path.join(self.fram_str, self.modelzoo_dir, *path_list[:i]) + path_list_files = os.listdir(file_path) + if ".gitrawcode" in path_list_files: + raw_filelist.append(f) + break + for f in filelist: + if f not in raw_filelist: + no_raw_filelist.append(f) + return no_raw_filelist -# TODO 简化解析函数 -def get_model_fram(pr_filelist0_str): - ''' - 功能:获取网络框架名称路径 - :param pr_filelist0_dir: - :return: - ''' - if 'for_TensorFlow' in pr_filelist0_str: - if 'for_TensorFlow2.X' in pr_filelist0_str: - tf_str = 'for_TensorFlow2.X' - a = pr_filelist0_str.index(tf_str) - b = a + 17 - model_fram = pr_filelist0_str[:b] - else: - tf_str = 'for_TensorFlow' - a = pr_filelist0_str.index(tf_str) - b = a + 14 - model_fram = pr_filelist0_str[:b] - elif 'for_PyTorch' in pr_filelist0_str: - pt_str = 'for_PyTorch' - a = pr_filelist0_str.index(pt_str) - b = a + 11 - model_fram = pr_filelist0_str[:b] - elif 'for_MindSpore' in pr_filelist0_str: - ms_str = 'for_MindSpore' - a = pr_filelist0_str.index(ms_str) - b = a + 13 - model_fram = pr_filelist0_str[:b] - elif 'for_Tensorflow' in pr_filelist0_str: - if 'for_Tensorflow2.X' in pr_filelist0_str: - tf_str = 'for_Tensorflow2.X' - a = pr_filelist0_str.index(tf_str) - b = a + 17 - model_fram = pr_filelist0_str[:b] - else: - tf_str = 'for_Tensorflow' - a = pr_filelist0_str.index(tf_str) - b = a + 14 - model_fram = pr_filelist0_str[:b] - elif 'for_Pytorch' in pr_filelist0_str: - pt_str = 'for_Pytorch' - a = pr_filelist0_str.index(pt_str) - b = a + 11 - model_fram = pr_filelist0_str[:b] - elif 'for_Mindspore' in pr_filelist0_str: - ms_str = 'for_Mindspore' - a = pr_filelist0_str.index(ms_str) - b = a + 13 - model_fram = pr_filelist0_str[:b] - elif 'for_ACL' in pr_filelist0_str: - ms_str = 'for_ACL' - a = pr_filelist0_str.index(ms_str) - b = a + 7 - model_fram = pr_filelist0_str[:b] - else: - model_fram = '' - return model_fram + def file_size_check(self, prfile): + """ + 功能:扫描文件大小,小于1M + 备注:暂时按照小于1M处理,后续细化,不同文件类型不同大小限制。 + """ + prfilename = os.path.join(self.fram_str, self.modelzoo_dir, prfile) + prfilesize = os.path.getsize(prfilename) / math.pow(1024, 2) + if prfilesize < 1: + self.succResultList.append("{}: filesize less than 1M, check succ!".format(prfile)) + else: + self.failResultList.append("{}: filesize less than 1M, check fail!".format(prfile)) -# TODO 拆分功能 -def check_firstlevel_file(path_pr_list, fram_str, modelzoo_dir): - ''' - 功能1:检查首层目录是否存在必要LICENSE文件 - 功能2:检查首层目录是否存在必要README.md文件 - 功能3:检查首层目录是否存在必要requirements.txt文件 - 功能4:检查首层目录是否存在必要modelzoo_level.txt文件 - 功能5:垃圾目录00-access拒绝入仓 - 功能6:kernel_meta目录视为垃圾目录,拒绝入仓 - path_pr_list:pr_filelist.txt文件完整路径,其内容包含需要扫描文件 - fram_str:pr_filelist.txt文件所在的当前目录 - modelzoo_dir:modelzoo,字符串,用于拼接网络代码所在的完整路径 - ''' - # 不规范标识字段,0:pass,1:fail - firstlevel_check = 0 - # readme检查 - firstlevel_check1 = 0 - # 00-access垃圾目录检查 - firstlevel_check2 = 0 - firstlevel_check6 = 0 - # LICENSE文件检查 - firstlevel_check3 = 0 - # modelzoo_level.txt检查 - firstlevel_check4 = 0 - # requirements.txt检查 - firstlevel_check5 = 0 - firstlevel_check8 = 0 - # kernel_metala垃圾目录 - firstlevel_check7 = 0 - with open(path_pr_list, 'r') as fooa: - for filepath_inprlist in fooa: - filepath_inprlist = filepath_inprlist.strip('\n') - pr_filelist0_str = filepath_inprlist - model_fram = get_model_fram(pr_filelist0_str) - fram_path1 = fram_str + modelzoo_dir + '/' + model_fram - fram_path = model_str_name(fram_path1) - dir_str = model_str_name(filepath_inprlist) - with open('first_filename4.txt', 'w') as file4: - file4.write(str(dir_str)) - file4.close() - with open('first_filename4.txt', 'r') as file5: - content4 = file5.read() - file5.close() - h = 'requirements.txt' - g = 'modelzoo_level.txt' - # 如果网络名称不规范,排除推理及高校 - # if (model_fram == '' and 'built-in/ACL_' not in content4 and 'contrib' not in content4 ) : # 网络名称不规范处理 - if (model_fram == ''): # 网络名称不规范处理 - # 获取文件名 - fram_path2 = fram_str + modelzoo_dir + '/' + pr_filelist0_str - file_name2 = os.path.basename(fram_path2) - # 截取不规范网络名称路径 - b = filepath_inprlist.index(file_name2) - fram_unst_dname = filepath_inprlist[:b] - if fram_unst_dname != '': - fram_unst_dname_true = fram_str + modelzoo_dir + '/' + fram_unst_dname - filepath_inprlist2 = Path(fram_unst_dname_true) - if filepath_inprlist2.exists(): - # 获取首层目录下所有文件 - file_name3 = os.listdir(fram_unst_dname_true) - with open('first_filename3.txt', 'w') as file: - file.write(str(file_name3)) - file.close() - with open('first_filename3.txt', 'r') as file2: - content2 = file2.read() - file2.close() - a, b, c, d, e = 'README', 'readme', 'LICENSE', 'Readme', 'ReadMe' - if (a in content2) or (d in content2) or (b in content2) or (e in content2): - if c in content2: - if g not in content2: - firstlevel_check4 = 4 - if h not in content2: - firstlevel_check5 = 1 - if '00-access' in content2: - firstlevel_check6 = 1 - if model_fram != '': - # 判断路径是否真实存在 - filepath_inprlist1 = Path(fram_path) - if filepath_inprlist1.exists(): - # 获取首层目录下所有文件名称 - a, b, c, d, e = 'README', 'readme', 'LICENSE', 'Readme', 'ReadMe' - filelist = os.listdir(fram_path) - with open('first_filename.txt', 'w') as file: - file.write(str(filelist)) - file.close() - with open('first_filename.txt', 'r') as file1: - content1 = file1.read() - file1.close() - if '00-access' in content1: - firstlevel_check2 = 3 - if (a not in content1) and (d not in content1) and (b not in content1) and (e not in content1): - firstlevel_check1 = 2 - break - if c not in content1: - firstlevel_check3 = 4 - if h not in content1: - firstlevel_check8 = 8 - break + def license_check(self, prfile): + """ + 功能:判断.py/.cpp文件是否存在关键字LICENSE/license + """ + prfilename = os.path.join(self.fram_str, self.modelzoo_dir, prfile) + if (prfilename[-3:] == ".py") or (prfilename[-4:] == ".cpp"): + if prfilename[-11:] == "__init__.py": + self.succResultList.append("{}: is __init__.py, no need to check license!".format(prfile)) + else: + with open(prfilename, 'r') as f: + content = f.read() + content = content.lower() + if ("license" in content) or ("licence" in content): + self.succResultList.append("{}: contain LICENCE/licence, check succ!".format(prfile)) + else: + self.failResultList.append("{}: not contain LICENCE/licence, check fail!".format(prfile)) + else: + self.succResultList.append("{}: is not *.py/*.cpp, no need to check license!".format(prfile)) - else: - pass - # kernel_meta目录视为垃圾目录,拒绝入仓 - if '/kernel_meta/' in content4: - firstlevel_check7 = 1 - if firstlevel_check1 == 2: - print('{},{} is not exist,please check and add it!'.format(get_model_fram(pr_filelist0_str), a)) - firstlevel_check = 1 - if firstlevel_check8 == 8: - print('{},{} is not exist,please check and add it!'.format(get_model_fram(pr_filelist0_str), h)) - firstlevel_check = 1 - if firstlevel_check3 == 4: - print('{},{} is not exist,please check and add it!'.format(get_model_fram(pr_filelist0_str), c)) - print('License check failed, Please follow the guide to add LICENSE:') - print('https://gitee.com/ascend/modelzoo/blob/master/contrib/CONTRIBUTING.md') - firstlevel_check = 1 - if firstlevel_check4 == 4: - print( - '{},The {} file is non-existent in the model code of the file,Please follow the guide to add {}:'.format( - fram_unst_dname, g, g)) - print('https://gitee.com/ascend/modelzoo/blob/master/contrib/CONTRIBUTING.md') - firstlevel_check = 1 - if firstlevel_check5 == 1: - print('{},The {} file is non-existent in the model code of the file,Please check and add {}:'.format( - fram_unst_dname, h, h)) - firstlevel_check = 1 - if firstlevel_check6 == 1: - firstlevel_check = 1 - print('{},00-access directory should not exist,please delete it!'.format(fram_unst_dname)) - if firstlevel_check7 == 1: - firstlevel_check = 1 - print('{}, kernel_meta is junk directory,please check and delete it!'.format(pr_filelist0_str)) - if firstlevel_check2 == 3: - firstlevel_check = 1 - print('{},00-access directory should not exist,please delete it!'.format(get_model_fram(pr_filelist0_str))) - print('firstlevel_check=%d' % firstlevel_check) + def get_model_root(self, pr_file_str): + """ + 功能:获取网络框架根目录 + """ + modelRoot = "" + if "/" not in pr_file_str: + return modelRoot + path_list = pr_file_str.split('/') + path_list_len = len(path_list) + model_parent_file = '.modelparant' + while path_list_len > 1: + tmp_path = os.path.join(modelRoot, *path_list[:path_list_len]) + path_list_len -= 1 + if os.path.isfile(os.path.join(self.fram_str, self.modelzoo_dir, tmp_path)): + continue + if model_parent_file in os.listdir(os.path.join(self.fram_str, self.modelzoo_dir, tmp_path)): + return os.path.join(modelRoot, *path_list[:path_list_len]) + return "" -# TODO 不需要存在,只需要维护.gitignore即可 -def junk_file(path_pr_list, fram_str, modelzoo_dir): - ''' - 功能:检测当前路径下所有垃圾文件 - 参数:pr_filelist0_str:字符串化后的路径 - ''' - with open(path_pr_list, 'r') as fooa: - funkfile_check = 0 - for filepath_inprlist in fooa: - filepath_inprlist = filepath_inprlist.strip('\n') - pr_filelist0_str1 = fram_str + modelzoo_dir + '/' + filepath_inprlist - pr_filelist0_str = Path(pr_filelist0_str1) - if pr_filelist0_str.exists(): - funk_file_typr = ['.log', '.pbtxt', '.pb', '.h5', '.so', '.zip', '.tar', '.event', '.tar.gz', '.swp', - '.ipynb', '.pyc', '.novalocal', '.bin', '.pth', '.onnx', '.npy', '.om', '.pkl', '.pt', - '.mat', '.tfrecord'] - junl_file_typr1 = ['.jpg', '.png'] - # 获取文件名 - file_name = os.path.basename(str(pr_filelist0_str)) - # 获取文件后缀名 - file_suffix = os.path.splitext(file_name)[1] - if file_suffix in funk_file_typr: - funkfile_check = 1 - print('{}, The file is Junk file, please check and delete it !'.format(filepath_inprlist)) - if '.ckpt' in str(pr_filelist0_str): - funkfile_check = 1 - print('{}, The file is Junk file, please check and delete it !'.format(filepath_inprlist)) - if 'events.out.' in str(pr_filelist0_str): - funkfile_check = 1 - print('{}, The file is Junk file, please check and delete it !'.format(filepath_inprlist)) - if 'network_need_files.txt' in str(pr_filelist0_str): - funkfile_check = 1 - print('{}, The file is Junk file,please check and delete it!'.format(filepath_inprlist)) - # loss*.txt 视为垃圾文件 - loss_file_list = re.findall(r"\w*loss\w*.txt", file_name) - loss_png_list = re.findall(r"\w*loss\w*.png", file_name) - if loss_file_list != [] or loss_png_list != []: - funkfile_check = 1 - print('{}, The file is Junk file,please check and delete it!'.format(filepath_inprlist)) - datas_path = ['data', 'datas', 'datasets'] - for data in datas_path: - # 目录中存在data目录 - if data in str(filepath_inprlist): - data_str = data - path_list = filepath_inprlist.split('/') - for datapath in path_list: - if data_str in datapath: - a = filepath_inprlist.index(datapath) - fram_data = filepath_inprlist[:a] - # data所在路径 - data_path = fram_str + modelzoo_dir + '/' + fram_data + datapath - # 如果路径真是存在 - fram_data_str = Path(data_path) - if fram_data_str.exists(): - # 获取文件名称的后缀 - file_suffix1 = os.path.splitext(pr_filelist0_str1)[-1] - if file_suffix1 in junl_file_typr1: - # .jpg/.png在modelzoo下文件路径 - funkfile_check = 1 - print('{}, The file is Junk file,please check it!'.format(filepath_inprlist)) - break - if 'ge_proto_' in str(pr_filelist0_str) and file_suffix == '.txt': - funkfile_check = 1 - print('{}, The file is Junk file,please check it!'.format(filepath_inprlist)) - if 'events.' in str(pr_filelist0_str) and file_suffix == '.novalocal': - funkfile_check = 1 - print('{}, The file is Junk file,please check it!'.format(filepath_inprlist)) - else: - pass - fooa.close() - print('funkfile_check=%d' % funkfile_check) + def firstlevel_file_check(self, prfile): + """ + 功能:判断网络框架根目录下是否包含必要的文件 + LICENCE,requirements.txt,modelzoo_level.txt, readme.md + """ + model_root = self.get_model_root(prfile) + check_list = ["requirements.txt", "modelzoo_level.txt", "readme.md"] + if model_root == "": + self.succResultList.append("{}: at root path,no need to check, check succ!".format(prfile)) + if model_root != "": + print("model_root: ",model_root) + model_root_filelist_tmp = os.listdir(os.path.join(self.fram_str, self.modelzoo_dir, model_root)) + model_root_filelist = [] + for model_root_file in model_root_filelist_tmp: + model_root_filelist.append(model_root_file.lower()) + for check_file in check_list: + if check_file not in model_root_filelist: + self.failResultList.append("{}: at model root path, does not contain {}, check fail!".format(model_root, check_file)) + else: + self.succResultList.append("{}: at model root path, contain {}, check succ!".format(model_root, check_file)) + if ("license" in model_root_filelist) or ("licence" in model_root_filelist): + self.succResultList.append("{}: at model root path, contain {}, check succ!".format(model_root, check_file)) + else: + self.failResultList.append("{}: at model root path, does not contain {}, check fail!".format(model_root, check_file)) -# TODO 软链检查会报错,详见PR718 -def check_link(path_pr_list, fram_str, modelzoo_dir, onelink): - ''' - 功能:检测文件内部是否包含内部链接 - fram_file:文件所在路径 - alink: 一条字符串化的链接 - ''' - with open(path_pr_list, 'r') as fooa: - internal_link_check = 0 - for filepath_inprlist in fooa: - filepath_inprlist = filepath_inprlist.strip('\n') - pr_filelist0_str1 = fram_str + modelzoo_dir + '/' + filepath_inprlist - # 将路径名称字符串化 - file_name = model_str_name(pr_filelist0_str1) - if ('README' not in file_name) and ('readme' not in file_name) and ('Readme' not in file_name) and ( - 'ReadMe' not in file_name): - pr_filelist0_str = Path(file_name) - if pr_filelist0_str.exists(): - with open(str(pr_filelist0_str), 'r', encoding='gb18030', errors='ignore') as foo: - for words in foo: - if onelink in words: - link = onelink[0:] - internal_link_check = 1 - print( - '{},This is an internal links that includes {},please check this line that: {}'.format( - filepath_inprlist, link, words)) - else: - continue - foo.close() - else: - pass - print('internal_link_check=%d' % internal_link_check) + def link_check(self, prfile): + """ + 功能:检测文件内部是否包含内部链接 + """ + prfilename = os.path.join(self.fram_str, self.modelzoo_dir, prfile) + with open(self.args.linklisttxt, 'r') as f: + content = f.read() + linklist = content.split('\n') + if ('readme' not in prfilename.lower()): + with open(prfilename, 'r') as f: + content = f.read() + for link in linklist: + if link in content: + self.failResultList.append("{}: contain link[{}], check fail!".format(prfile, link)) + else: + self.succResultList.append("{}: not contain link[{}], check succ!".format(prfile, link)) + else: + self.succResultList.append("{}: is readme file, no need to check link, check succ!".format(prfile)) -# TODO 含义复制,需要重构 -def check_Sensitive_content(path_pr_list, fram_str, modelzoo_dir): - with open(os.getcwd() + "/upline_access_black_http.json", 'r') as load_f: - load_dict = json.load(load_f) - with open(path_pr_list, 'r') as fooa: - sensitive_check = 0 - for fram_file_dir in fooa: - # 去除换行操作 - fram_file_dir = fram_file_dir.strip('\n') - # pr中文件绝对路径 - pr_filelist0_str1 = fram_str + modelzoo_dir + '/' + fram_file_dir - # 判断文件是否存在 - file_name = model_str_name(pr_filelist0_str1) - pr_filelist0_str = Path(file_name) - if pr_filelist0_str.exists() and os.path.isfile(pr_filelist0_str): - # 如果文件存在,打开文件 - with open(str(pr_filelist0_str), 'r', encoding='gb18030', errors='ignore') as foo: - for words in foo: - words = words.strip('\n') - if ('0.00' not in words) and ('0.' not in words): - # 工号识别 - if re.findall( - r'([/][A-Za-z]00[\d]{5}[/]|[/][A-Za-z]00[\d]{6}[/]|[/][A-Za-z]00[\d]{7}[/]|[/][A-Za-z]00[\d]{8}[/]|[/][A-Za-z]00[\d]{9}[/]|[/][A-Za-z]00[\d]{10}[/])', - words) or \ - re.findall( - r'([/][A-Za-z]wx\d{6}[/]|[/][A-Za-z]wx\d{7}[/]|[/][A-Za-z]wx\d{8}[/]|[/][A-Za-z]wx\d{9}[/]|[/][A-Za-z]wx\d{10}[/]|[/][A-Za-z]wx\d{11}[/])', - words) or \ - re.findall( - r'([/]00[\d]{5}[/]|[/]00[\d]{6}[/]|[/]00[\d]{7}[/]|[/]00[\d]{8}[/]|[/]00[\d]{9}[/]|[/]00[\d]{10}[/])', - words): - print( - '{}, There may be a job number in the file, please check the line that is: {}'.format( - fram_file_dir, words)) - sensitive_check = 1 - foo.close() - # 获取文件名 - env_file_name = os.path.basename(str(pr_filelist0_str)) - if re.findall(r'train\w*.sh', env_file_name) or re.findall(r'infer\w*.sh', env_file_name): - with open(str(pr_filelist0_str), 'r', encoding='gb18030', errors='ignore') as fooc: - for words in fooc: - words = words.strip('\n') - # 不合规配置环境变量1、export install_path 2、export LD_LIBRARY_PATH 3、export PYTHONPATH 4、export PATH 5、export ASCEND_OPP_PATH - if re.findall(r'\w*export install_path\w*', words) or re.findall( - r'\w*export LD_LIBRARY_PATH\w*', words) or re.findall(r'\w*export PATH\w*', words) \ - or re.findall(r'\w*export ASCEND_OPP_PATH\w*', words): - print( - '{}, There are non compliant configuration environment variables, please check the line that is: {}'.format( - fram_file_dir, words)) - sensitive_check = 1 - if re.findall(r'\w*export PYTHONPATH\w*', words) and re.findall(r'\w*install_path\w*', - words): - print( - '{}, There are non compliant configuration environment variables, please check the line that is: {}'.format( - fram_file_dir, words)) - sensitive_check = 1 - fooc.close() - if ('README' not in file_name) and ('readme' not in file_name) and ('Readme' not in file_name) and ( - 'ReadMe' not in file_name): - with open(str(pr_filelist0_str), 'r', encoding='gb18030', errors='ignore') as fooc: - for words in fooc: - # ip 识别 - if ('device_ip' in words) or ('server_id' in words): - continue - elif re.findall( - r'http://\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b', - words) or \ - re.findall( - r'https://\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b', - words): # or \ - # re.findall( - # r'(? Date: Tue, 19 Jul 2022 03:11:13 +0000 Subject: [PATCH 14/14] update AcessScan/startRun.sh. --- AcessScan/startRun.sh | 200 ++++++++++++++++++------------------------ 1 file changed, 83 insertions(+), 117 deletions(-) diff --git a/AcessScan/startRun.sh b/AcessScan/startRun.sh index 98a4c0e420..4e1ffaebd2 100644 --- a/AcessScan/startRun.sh +++ b/AcessScan/startRun.sh @@ -3,6 +3,7 @@ echo "####################################################################" echo "# Start Modelzoo Network Test.... " echo "####################################################################" + top_dir=`pwd` hostname="worker-121-36-69-71" @@ -41,6 +42,7 @@ echo "=================Modified files in this PR: =================" cat $1/pr_filelist.txt + #如果PR只涉及到.MD文件的修改,则无需执行用例,直接返回OK if [[ `grep -ciE ".MD|.txt|.doc|.docx|LICENSE" "$1/pr_filelist.txt"` -ne '0' && `grep -cE ".py|.sh|.cpp" "$1/pr_filelist.txt"` -eq '0' ]] ;then echo "Only .MD|.txt|.doc|.docx|LICENSE in pr_filelist, No need to run testcases!" @@ -52,7 +54,6 @@ fi #3、首层目录必要文件检查:LINCENSE,README.md,requirements.txt #4、文件大小检查,不超过2M #5、内部链接扫描 -starttime=`date +'%Y-%m-%d %H:%M:%S'` file_log='log1/py_train.log' dir_log='log1' if [ -d $dir_log ]; @@ -65,44 +66,14 @@ else mkdir $dir_log fi python3 access_upline.py --pr_filelist_dir=$1/pr_filelist.txt >$file_log 2>&1 -#docs文件检查 -echo "=================Start to Check Type of File =================" -while read line -do - dir2=`echo $line | sed "s/\// /g"` - for i in $dir2; - do - if [[ $i == 'test' ]];then - #echo "=========666"; - result=$(echo $line | grep -E "\.py|\.cpp" | grep -v "__init__.py") - if [ -n "$result" ]; - then - if [ ! -f "$1/modelzoo/$line" ];then - docs_nums=`grep -a $'\r' $1/modelzoo/$line | wc -l` - if [[ $docs_nums -eq 0 ]];then - continue - else - echo "$line ,This is a docs file,please check and delete it!" - let UNIX_check=1 - fi - fi - fi - fi - done -done < $1/pr_filelist.txt - license_check=`grep -ri "license_check=1" ${file_log} | wc -l` filesize_check=`grep -ri "filesize_check=1" ${file_log} | wc -l` firstlevel_check=`grep -ri "firstlevel_check=1" ${file_log} | wc -l` funkfile_check=`grep -ri "funkfile_check=1" ${file_log} | wc -l` internal_link_check=`grep -ri "internal_link_check=1" ${file_log} | wc -l` sensitive_check=`grep -ri "sensitive_check=1" ${file_log} | wc -l` -modelzoo_level_check=`grep -ri "modelzoo_level_check=1" ${file_log} | wc -l` -file_word_check=`grep -ri "file_word_check=1" ${file_log} | wc -l` -core_binding_check=`grep -ri "core_binding_check=1" ${file_log} | wc -l` -cat $file_log | grep -a -v "check=1" | grep -a -v "check=0" -if [[ $license_check -ge 1 || $filesize_check -ge 1 || $firstlevel_check -ge 1 || $funkfile_check -ge 1 || $internal_link_check -ge 1 -|| $sensitive_check -ge 1 || $modelzoo_level_check -ge 1 || file_word_check -ge 1 || UNIX_check -ge 1 || core_binding_check -ge 1 ]]; +cat $file_log | grep -v "check=1" | grep -v "check=0" +if [[ $license_check -ge 1 || $filesize_check -ge 1 || $firstlevel_check -ge 1 || $funkfile_check -ge 1 || $internal_link_check -ge 1 || sensitive_check -ge 1 ]]; then echo "check fail" exit 1 @@ -111,81 +82,83 @@ else fi #exit $status -#echo "=================Start to Check License =================" -##license检查 -#lincense_check=0 -#while read line -#do -# a=`echo $line |awk -F "_for_" '{print $1}' | awk -F "/" '{print $NF}'` -# b=`echo $line |awk -F "_for_" '{print $2}' | awk -F "/" '{print $1}'` -# result=`echo $a`_for_`echo $b` -# lise_dir=$(echo ${line%$result*}/$result/LICENSE) -# directory=$(echo ${line%$result*}/$result/) -# if [ -n "$b" ] && [ -d $1/modelzoo/$directory ]; -# then -# if [ -f $1/modelzoo/$lise_dir ]; -# then -# true -# else -# echo "$result license is not exist!" -# let lincense_check=1 -# fi -# else -# true -# #echo "$result name -ERROR" -# fi -#done < $1/pr_filelist.txt + +echo "=================Start to Check License =================" +#license检查 +lincense_check=0 +while read line +do + a=`echo $line |awk -F "_for_" '{print $1}' | awk -F "/" '{print $NF}'` + b=`echo $line |awk -F "_for_" '{print $2}' | awk -F "/" '{print $1}'` + result=`echo $a`_for_`echo $b` + lise_dir=$(echo ${line%$result*}/$result/LICENSE) + directory=$(echo ${line%$result*}/$result/) + if [ -n "$b" ] && [ -d $1/modelzoo/$directory ]; + then + if [ -f $1/modelzoo/$lise_dir ]; + then + true + else + echo "$result license is not exist!" + let lincense_check=1 + fi + else + true + #echo "$result name -ERROR" + fi +done < $1/pr_filelist.txt #py/cpp文件检查 -#while read line -#do -# function checkfile() -# { -# result=$(echo $1 | grep -E "\.py|\.cpp" | grep -v "__init__.py") -# if [ -n "$result" ]; -# then -# Hw_result=`cat $1 | grep -i "License"` -# if [ -n "$Hw_result" ]; -# then -# true -# else -# echo "$1 license check fail!" -# let lincense_check=1 -# fi -# else -# #echo "$1 no need check" -# true -# fi -# } -# function getAllFiles() -# { -# for fileName in `ls $1`; -# do -# dir_or_file=$1"/"$fileName -# if [ -d $dir_or_file ] -# then -# getAllFiles $dir_or_file -# else -# checkfile $dir_or_file -# fi -# done -# } -# if [ -f "$1/modelzoo/$line" ]; -# then -# #echo $line -# checkfile $1/modelzoo/$line -# else -# getAllFiles $1/modelzoo/$line -# fi -# -#done < $1/pr_filelist.txt -# -#if [ $lincense_check -eq '1' ] ;then -# echo "License check failed, Please follow the guide to add License:" -# echo "https://gitee.com/ascend/modelzoo/blob/master/contrib/CONTRIBUTING.md" -# exit 1 -#fi +while read line +do + function checkfile() + { + result=$(echo $1 | grep -E "\.py|\.cpp" | grep -v "__init__.py") + if [ -n "$result" ]; + then + Hw_result=`cat $1 | grep -i "License"` + if [ -n "$Hw_result" ]; + then + true + else + echo "$1 license check fail!" + let lincense_check=1 + fi + else + #echo "$1 no need check" + true + fi + } + function getAllFiles() + { + for fileName in `ls $1`; + do + dir_or_file=$1"/"$fileName + if [ -d $dir_or_file ] + then + getAllFiles $dir_or_file + else + checkfile $dir_or_file + fi + done + } + if [ -f $1/modelzoo/$line ]; + then + #echo $line + checkfile $1/modelzoo/$line + else + getAllFiles $1/modelzoo/$line + fi + +done < $1/pr_filelist.txt + +if [ $lincense_check -eq '1' ] ;then + echo "License check failed, Please follow the guide to add License:" + echo "https://gitee.com/ascend/modelzoo/blob/master/contrib/CONTRIBUTING.md" + exit 1 +fi + #如果新增的都是目录,则无需执行用例,直接返回OK check_res=0 while read line @@ -200,6 +173,7 @@ if [ $check_res -eq '0' ] ;then echo "Add directorys in contrib/Research, No need to run testcases!" exit 0 fi + #代码安全检查模块 while read line do @@ -224,11 +198,6 @@ do fi done < $1/pr_filelist.txt -endtime=`date +'%Y-%m-%d %H:%M:%S'` -start_seconds=$(date --date="$starttime" +%s); -end_seconds=$(date --date="$endtime" +%s); -echo "本次运行时间: "$((end_seconds-start_seconds))"s" -:<