import numpy as np
import pandas as pd
from scipy import optimize
from sklearn.preprocessing import scale,MinMaxScaler
# https://github.com/JeffSackmann/tennis_atp
dforig = pd.read_csv('atp2015.csv')[['winner_name','loser_name','tourney_level']]
df = dforig[(dforig['tourney_level']!='C') & (dforig['tourney_level'] !='D')]
players = set(df['loser_name']) & (set(df['winner_name']))
df[df['winner_name'].isin(players) & df['loser_name'].isin(players)]
id2p = list(players)
p2id = {pname:id for (id,pname) in enumerate(id2p)}
fex = df.groupby(['winner_name','loser_name']).size()
data = []
for (w,l),v in fex.iteritems():
if w not in players: continue
if l not in players: continue
wlist = [0 for i in range(len(players))]
llist = [0 for i in range(len(players))]
wlist[ p2id[w] ] = 1
llist[ p2id[l] ] = 1
data.append([v] + wlist + llist)
w = np.array(data)
p0 = np.random.random(len(players))
# BTL model
def sigmoid(x):
return 1/(1+np.exp(-x))
def f1(p,w):
wins,Ii,Ij = w[:,0],w[:,1:len(players)+1],w[:,1+len(players):]
ll = -1*wins*( np.log( sigmoid( np.dot(Ii,p) - np.dot(Ij,p)) ) )
return np.sum(ll)
p1=optimize.minimize(f1, p0, args=(w,), method='BFGS', options={'disp':True, 'maxiter':10})
results = [(score,id2p[player_name]) for (player_name,score) in enumerate(p1.x)]
sorted_result = sorted(results, reverse=True)
for rank,(score,name) in enumerate(sorted_result):
print('{:3d} {:6.3f} {}'.format(rank,score,name))