Everfighting
10/17/2019 - 2:36 AM

get_imports

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import re
import os
import glob


# clone所有的仓库,如果仓库存在则更新
def clone_all_project():
    project_names = ["dingqi_pdf_chart_name_server", "table_box_cos_python", "new_get_table_v3_kpmg_official", 
                    "inside_minus", "wubiaoge", "front_end_demo", "add_lines", "cross_page_server", "OCR_liushui", 
                    "new_get_table_v3", "new_zhibiaojixuan_pd", "kpmg_chart_name_server", "inside_minus_dev", 
                    "brat_script", "ner_relation_training", "docx_mark", "kpmg_sjb_chart_name_server", 
                    "pdf_get_text", "caiwu_extract", "ner_only_server", 
                    "ner_relation_server", "qingxi_ruku", "python-swagger", "ocr_person_company", 
                    "pdftoxml", "AnyQ_yizhixing_server", "create_solr_data", "contrast_third", 
                    "third_verify", "get_non_financial_from_text", "contract_extraction", "wordtopdf", 
                    "mulu_info", "xiyu_cuobiezi", "abs_pdf_chart_name_server", "ctc_ocr_train", 
                    "zhaogu_pdf_chart_name_server", "zhaiquan_pdf_chart_name_server", "zhaiquan_chart_name_server", 
                    "kpmg_pdf_chart_name_server", "huatai_chart_name_server", "CuoBieZi", "result_analyse", 
                    "batch_upload", "main_org_server", "table_box_cos", "anyq", "PdfLabelName", "production_extract", 
                    "new_zhibiaojisuan_ai", "TablenameLable", "chart_title_server", "qiebiao_prepare_data", 
                    "Indicator_calculation", "testresult_analysis", "new_get_table_kpmg", "new_get_table_docx_huatai", 
                    "caiwufenxi_bch", "classify_server", "financial_analysis", "docx_table_extract", 
                    "lac_entity_extract", "caiwu_jiance", "get_text_from_pdf", "financiers_extract", "page_turn_server"]
    path = os.getcwd()
    for project in project_names:
        if os.path.exists(project):
            project_path = os.path.join(path, project).replace("\\", "\\\\")
            print(project_path)
            os.system("cd {} && git pull".format(project_path))
            print("repull {} finished.".format(project))
        else:
            project_url = "git@git.alphainsight.ai:ai/{}.git".format(project)
            os.system("git clone {}".format(project_url))
            print("clone {} finished.".format(project_url))

# 校验仓库中直接引用的第三方包是否包含有异常的包
def check_packages():
    red_names = ["poppler", "PypeR", "pytesseract", "ssh-import-id"]
    yellow_names = ["bzr", "chardet", "kenlm", "LinkChecker", "mercurial", "paramiko", 
                    "pycurl", "pygame", "pygobject", "pymssql", "python-apt", "python-Levenshtein",
                    "uWSGI", "animal-sniffer-annotation", "HdrHistogram", "jersey-apache-client4",
                    "jersey-client", "jersey-core", "jersey-server", "jersey-servlet", "jsqlparser", 
                    "jsr311-api", "logback-classic", "logback-core", "mysql-connector-java", "stax-api", 
                    "xpp3_min"]
    package_list = get_imports()
    print("package list: ", package_list)
    print("package count: ", len(package_list))
    for pkg in package_list:
    #     print(pkg)
        if pkg.lower() in red_names:
            print("danger package: {}".format(pkg))
        if pkg.lower() in yellow_names:
            print("warning package: {}".format(pkg))

def get_imports():
    import_list = []
    py_file_list = glob.glob("*/*.py")
    for filename in py_file_list:
        with open(filename, encoding="utf-8") as f:
            file_line_list = f.readlines()
            for file_line in file_line_list:
                from_res = re.search("^from\s*(.*)\s*import", file_line)
                import_res = re.search("^import\s*(.*)", file_line)
                if from_res:
                    package_name = from_res.group(1).rstrip()
                    import_list.extend(unpack(package_name))
                elif import_res:
                    package_name = import_res.group(1).rstrip()
                    import_list.extend(unpack(package_name))
    return list(set(import_list))

def unpack(package_name):
    if "," in package_name:
        names = [name.strip().lower() for name in package_name.split(",")]
    elif "as" in package_name:
        names = [package_name.split("as")[0].strip().lower()]
    else:
        names = [package_name.lower()]
    return names

if __name__ == '__main__':
    # clone_all_project()
    check_packages()