smilevchy
8/15/2017 - 9:09 AM

mergeDuplicateStringResTool.py

#!/usr/bin/python
# coding=utf-8

"""
    处理文本资源文件中的重复资源

    使用方法:
    python mergeDuplicateStringResTool.py [scanned_dir]

    参数解释:
    scanned_dir : 一般是项目根目录

    @author zhanghaifan
"""


import os
import sys
import re


string_res_file_start_tag = "<resources>"
string_res_file_end_tag = "</resources>"
string_res_item = "\t<string name=\"%s\">%s</string>\n"

res_item_pattern = re.compile(u"(<string name=\"(.*?)\">(.*?)</string>)")

end_tag_line_num = -1


def scan_dir(path):
    filenames = os.listdir(path)

    for name in filenames:
        if not check_ignore(name):
            file_path = os.path.join(path, name)

            if (os.path.isdir(file_path)):
                scan_dir(file_path)
            else:
                if not check_ignore_file(name):
                    scan_file(file_path)


def check_ignore(filename):
    if filename.startswith(".") or filename == "build":
        return True

    return False


def check_ignore_file(filename):
    ext = os.path.splitext(filename)[1]
    name = os.path.basename(filename).split(".")[0]

    if ext != ".xml" or name != "strings":
        return True


class StringMetaData:
    file_path = None
    line_num = 0
    value = []
    res_id = None

    def __init__(self, file_path, line_num, value, res_id):
        self.file_path = file_path
        self.line_num = line_num
        self.value = value
        self.res_id = res_id

    def __eq__(self, other):
        return self.value == other.value


def scan_file(file_path):
    # if file_path != "./base/src/main/res/values/strings.xml":
    #    return

    print "************************ scanning file %s" % file_path

    with open(file_path) as f:
        lines = f.readlines()

    string_meta_data_list = gen_string_meta_data_list(lines, file_path)
    common_meta_data_list = gen_common_meta_data_list(string_meta_data_list, file_path)
    handle_lines(common_meta_data_list, lines)
    extract_common_res_id(common_meta_data_list, lines, file_path)


def gen_string_meta_data_list(lines, file_path):
    string_meta_data_list = []

    for (num, line) in enumerate(lines):
        strip_line = line.strip()

        if strip_line == string_res_file_end_tag:
            global end_tag_line_num
            end_tag_line_num = num + 1

        if check_ignore_line(strip_line):
            continue

        match_obj = res_item_pattern.search(strip_line)

        if match_obj is None:
            continue

        string_meta_data_list.append(StringMetaData(file_path, num + 1, match_obj.group(3), match_obj.group(2)))

    return string_meta_data_list


def gen_common_meta_data_list(string_meta_data_list, file_path):
    common_meta_data_list = []

    for meta_data in string_meta_data_list:
        try:
            index_in_common = common_meta_data_list.index(meta_data)
        except ValueError:
            index_in_common = -1

        meta_data.to_be_removed = False

        if index_in_common > -1:
            common_meta_data_list[index_in_common].data_list.append(meta_data)
            meta_data.to_be_removed = True
        elif string_meta_data_list.count(meta_data) > 1:
            poison_data = StringMetaData(file_path, meta_data.line_num, meta_data.value, None)
            poison_data.data_list = []
            poison_data.data_list.append(meta_data)
            common_meta_data_list.append(poison_data)
            meta_data.to_be_removed = True

    return common_meta_data_list


def handle_lines(common_meta_data_list, lines):
    removed = []

    for i in common_meta_data_list[::-1]:
        for j in i.data_list[::-1]:
            removed.append(j.line_num)

    removed.sort()

    for i in removed[::-1]:
        if lines[i].strip() == "":
            continue

        lines[i] = "\t<!--" + lines[i].strip() + "-->\n"

    del lines[end_tag_line_num - 1]


def extract_common_res_id(common_meta_data_list, lines, file_path):
    for m, i in enumerate(common_meta_data_list):
        res_id = "common_res_id_%s" % m

        lines.append(string_res_item % (res_id, i.value))

        for n, j in enumerate(i.data_list):
            replace_res_id()

    lines.append(string_res_file_end_tag)

    with open(file_path, "w") as f:
        for l in lines:
            f.write(l)


# TODO 暂时不实现了
def replace_res_id():
    if False:
        print ""


def check_ignore_line(line):
    if len(line) == 0 or line.startswith("<!--"):
        return True

    return False


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print "No argument inputted, please check."
    else:
        workspace_dir = sys.argv[1]
        scan_dir(workspace_dir)
        print "************************ done"