ettorerizza
3/6/2017 - 9:14 AM

This script takes as input a Json file of Open Refine and returns the same file in which each "transform" and each "mass edit" will be docum

This script takes as input a Json file of Open Refine and returns the same file in which each "transform" and each "mass edit" will be documented in a column

#!/usr/bin/python3
import json

with open("test.json", "r") as infile:
    data = json.load(infile)

def transform_to_addcolumn(data):
    data_trans = dict(data)
    data_trans["op"] = "core/column-addition"
    data_trans["expression"] = (
	"""jython:return \"\"\"%s on cell %s\"\"\" """) %(
	data['expression'].replace('grel:', ""), data['columnName'])
    data_trans["onError"] = "store-error"
    data_trans["description"] = "store operations in a column" + str(count+1)
    data_trans.update({"columnInsertIndex": count-1})
    data_trans.update({"newColumnName": "transform" + str(count)})
    data_trans.update({"baseColumnName": data["columnName"]})
    del data_trans["columnName"]
    del data_trans["repeat"]
    del data_trans["repeatCount"]
    return data_trans

def massedit_to_addcolumn(data):
    data_trans = dict(data)
    data_trans["op"] = "core/column-addition"
    data_trans["expression"] = (
	"""jython:return \"\"\"MASS EDIT %s TO %s ON COLUMN %s\"\"\" """) %(
	data["edits"][0]["from"][0], data["edits"][0]["to"], data["columnName"])
    data_trans.update({"onError" : "store-error"})
    data_trans["description"] = "store operations in a column" + str(count+1)
    data_trans.update({"columnInsertIndex": count-1})
    data_trans.update({"newColumnName": "transform" + str(count)})
    data_trans.update({"baseColumnName": data["columnName"]})
    del data_trans["edits"]
    del data_trans["columnName"]
    return data_trans
	
for count, el in enumerate(data):
    if el['op'] == "core/text-transform":
        data.insert(count + 1, transform_to_addcolumn(el))
    elif el['op'] == "core/mass-edit":
        data.insert(count + 1, massedit_to_addcolumn(el))
    
print(data)

with open("new_openrefine_operations.json", "w") as outfile:
    json.dump(data, outfile, indent=4)