#!/usr/bin/python
# coding=utf-8
"""
处理文本资源文件中的重复资源
使用方法:
python mergeDuplicateStringResTool.py [scanned_dir]
参数解释:
scanned_dir : 一般是项目根目录
@author zhanghaifan
"""
import os
import sys
import re
string_res_file_start_tag = "<resources>"
string_res_file_end_tag = "</resources>"
string_res_item = "\t<string name=\"%s\">%s</string>\n"
res_item_pattern = re.compile(u"(<string name=\"(.*?)\">(.*?)</string>)")
end_tag_line_num = -1
def scan_dir(path):
filenames = os.listdir(path)
for name in filenames:
if not check_ignore(name):
file_path = os.path.join(path, name)
if (os.path.isdir(file_path)):
scan_dir(file_path)
else:
if not check_ignore_file(name):
scan_file(file_path)
def check_ignore(filename):
if filename.startswith(".") or filename == "build":
return True
return False
def check_ignore_file(filename):
ext = os.path.splitext(filename)[1]
name = os.path.basename(filename).split(".")[0]
if ext != ".xml" or name != "strings":
return True
class StringMetaData:
file_path = None
line_num = 0
value = []
res_id = None
def __init__(self, file_path, line_num, value, res_id):
self.file_path = file_path
self.line_num = line_num
self.value = value
self.res_id = res_id
def __eq__(self, other):
return self.value == other.value
def scan_file(file_path):
# if file_path != "./base/src/main/res/values/strings.xml":
# return
print "************************ scanning file %s" % file_path
with open(file_path) as f:
lines = f.readlines()
string_meta_data_list = gen_string_meta_data_list(lines, file_path)
common_meta_data_list = gen_common_meta_data_list(string_meta_data_list, file_path)
handle_lines(common_meta_data_list, lines)
extract_common_res_id(common_meta_data_list, lines, file_path)
def gen_string_meta_data_list(lines, file_path):
string_meta_data_list = []
for (num, line) in enumerate(lines):
strip_line = line.strip()
if strip_line == string_res_file_end_tag:
global end_tag_line_num
end_tag_line_num = num + 1
if check_ignore_line(strip_line):
continue
match_obj = res_item_pattern.search(strip_line)
if match_obj is None:
continue
string_meta_data_list.append(StringMetaData(file_path, num + 1, match_obj.group(3), match_obj.group(2)))
return string_meta_data_list
def gen_common_meta_data_list(string_meta_data_list, file_path):
common_meta_data_list = []
for meta_data in string_meta_data_list:
try:
index_in_common = common_meta_data_list.index(meta_data)
except ValueError:
index_in_common = -1
meta_data.to_be_removed = False
if index_in_common > -1:
common_meta_data_list[index_in_common].data_list.append(meta_data)
meta_data.to_be_removed = True
elif string_meta_data_list.count(meta_data) > 1:
poison_data = StringMetaData(file_path, meta_data.line_num, meta_data.value, None)
poison_data.data_list = []
poison_data.data_list.append(meta_data)
common_meta_data_list.append(poison_data)
meta_data.to_be_removed = True
return common_meta_data_list
def handle_lines(common_meta_data_list, lines):
removed = []
for i in common_meta_data_list[::-1]:
for j in i.data_list[::-1]:
removed.append(j.line_num)
removed.sort()
for i in removed[::-1]:
if lines[i].strip() == "":
continue
lines[i] = "\t<!--" + lines[i].strip() + "-->\n"
del lines[end_tag_line_num - 1]
def extract_common_res_id(common_meta_data_list, lines, file_path):
for m, i in enumerate(common_meta_data_list):
res_id = "common_res_id_%s" % m
lines.append(string_res_item % (res_id, i.value))
for n, j in enumerate(i.data_list):
replace_res_id()
lines.append(string_res_file_end_tag)
with open(file_path, "w") as f:
for l in lines:
f.write(l)
# TODO 暂时不实现了
def replace_res_id():
if False:
print ""
def check_ignore_line(line):
if len(line) == 0 or line.startswith("<!--"):
return True
return False
if __name__ == '__main__':
if len(sys.argv) < 2:
print "No argument inputted, please check."
else:
workspace_dir = sys.argv[1]
scan_dir(workspace_dir)
print "************************ done"