robpot891
10/30/2017 - 8:44 PM

tennis_bt.py

import numpy as np
import pandas as pd
from scipy import optimize
from sklearn.preprocessing import scale,MinMaxScaler

# https://github.com/JeffSackmann/tennis_atp
dforig = pd.read_csv('atp2015.csv')[['winner_name','loser_name','tourney_level']]
df = dforig[(dforig['tourney_level']!='C') & (dforig['tourney_level'] !='D')]

players = set(df['loser_name']) & (set(df['winner_name']))
df[df['winner_name'].isin(players) & df['loser_name'].isin(players)]
id2p = list(players)
p2id = {pname:id for (id,pname) in enumerate(id2p)}
fex = df.groupby(['winner_name','loser_name']).size()

data = []
for (w,l),v in fex.iteritems():
  if w not in players: continue
  if l not in players: continue
  wlist = [0 for i in range(len(players))]
  llist = [0 for i in range(len(players))]
  wlist[ p2id[w] ] = 1
  llist[ p2id[l] ] = 1
  data.append([v] + wlist + llist)

w = np.array(data)

p0 = np.random.random(len(players))

# BTL model
def sigmoid(x):
  return 1/(1+np.exp(-x))
def f1(p,w):
  wins,Ii,Ij = w[:,0],w[:,1:len(players)+1],w[:,1+len(players):]
  ll = -1*wins*( np.log( sigmoid( np.dot(Ii,p) - np.dot(Ij,p)) ) )
  return np.sum(ll)

p1=optimize.minimize(f1, p0, args=(w,), method='BFGS', options={'disp':True, 'maxiter':10})
results = [(score,id2p[player_name]) for (player_name,score) in enumerate(p1.x)]
sorted_result = sorted(results, reverse=True)
for rank,(score,name) in enumerate(sorted_result):
  print('{:3d} {:6.3f}  {}'.format(rank,score,name))