# ---
# jupyter:
# jupytext:
# text_representation:
# extension: .py
# format_name: light
# format_version: '1.4'
# jupytext_version: 1.2.4
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# ---
# # Testing correlating training and test data
import pandas as pd
from random import shuffle
# # Create dummy data
# +
training_data = pd.DataFrame({
'trustworthiness': [3, 5, 4, 6, 8, 10, 9, 1, 2, 7],
'amount': [10, 0, 30, 50, 20, 40, 0, 5, 12, 9]
})
test_data = pd.DataFrame({
'trustworthiness': [1, 7, 2, 21, 22, 23],
'amount': [6, 10, 13, 50, 20, 40]
})
training_data
# -
# # Get overlap of training and test data (sorted)
# +
training_data.sort_values(by=["trustworthiness"], inplace=True)
test_data.sort_values(by=["trustworthiness"], inplace=True)
intersection = pd.merge(training_data, test_data, on="trustworthiness", how='inner')
intersection
# -
# # Get correlation
intersection["amount_x"].corr(intersection["amount_y"])