vsouza
9/7/2015 - 11:25 PM

csv_diff.py

import pandas as pd


data = {'a': [1, 3, 4, 4], 'b': [1, 3, 2, 3]}
df = pd.DataFrame(data=data)
df.to_csv("data/old_data.csv")

data2 = {'a': [1, 3, 5, 4], 'b': [1, 3, 2, 3]}
df2 = pd.DataFrame(data=data2)
df2.to_csv("data/new_data.csv")


def report_diff(x):
    return x[0] if x[0] == x[1] else "{} ---> {}".format(*x)


old = pd.DataFrame.from_csv("data/old_data.csv")
new = pd.DataFrame.from_csv("data/new_data.csv")


diff_panel = pd.Panel(dict(old=old, new=new))
diff_output = diff_panel.apply(report_diff, axis=0)


def has_change(row):
    if "--->" in row.to_string():
    	print row['a']
    	print row['b']
        return "Y"
    else:
        return "N"


diff_output['has_change'] = diff_output.apply(has_change, axis=1)
diff_output[(diff_output.has_change == 'Y')]
final_df = pd.DataFrame(data=diff_output)


print final_df