-
Notifications
You must be signed in to change notification settings - Fork 0
/
walkRank.py
120 lines (99 loc) · 3.77 KB
/
walkRank.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
'''
Created on Jan 21, 2014
@author: Ted
'''
import networkx as nx
import csv
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#==============================================================================
# Maintenance Code
#==============================================================================
def getTeams(data):
# Returns list of unique teams in data
return sorted(pd.unique(data[['wteam','lteam']].values.ravel()))
#==============================================================================
# Graph Initialization Code
#==============================================================================
def buildGraph(data):
# Builds a graph with teams connected by games they played
G = nx.MultiDiGraph()
G.add_nodes_from(getTeams(data))
G.add_weighted_edges_from(rawEdgesFromGames(data))
normG = normEdges(G)
return normG
def rawEdgesFromGames(data):
localData = data.copy()
localData['diffs'] = data['wscore'] - data['lscore']
maxDiff = max(localData['diffs'])
edges = []
for i in range(len(data)):
game = localData.iloc[i]
winnerP = diffToP(game['diffs'],maxDiff)
edges = (edges + [(game['wteam'],game['lteam'],(1-winnerP))]
+ [(game['lteam'],game['wteam'],winnerP)])
return edges
def diffToP(diff,maxDiff):
#Assume we get sent the winning diff
return 0.5 + 0.5*(diff/maxDiff)
def normEdges(G):
normG = G.copy()
for node in normG.nodes():
normWeight = sum([x[2]['weight'] for x in normG.out_edges(node,False,True)])
for edge in normG.out_edges(node,True,True):
normG[node][edge[1]][edge[2]]['weight'] /= normWeight
return normG
#==============================================================================
# Random Walk Code
#==============================================================================
def selectOutNode(edges):
# Expects output of G.out_edges(node,False,True), i.e. no keys
thresh = random.uniform(0,1)
cumProb = 0.0
weightList = [(x[1],x[2]['weight']) for x in edges]
for node,weight in weightList:
cumProb += weight
if thresh < cumProb: break
return node
def randomWalk(G,n):
startNode = random.choice(G.nodes())
counts = dict.fromkeys(G.nodes(),0)
currentNode = startNode
i = 0
while i<n:
counts[currentNode] += 1
edges = G.out_edges(currentNode,False,True)
currentNode = selectOutNode(edges)
i+=1
return counts
#==============================================================================
# Main Code
#==============================================================================
def main():
dbHeader = "C:/Users/Ted/Dropbox"
projHeader = "/Kording Lab/Projects/MarchMadness"
rsfName = dbHeader + projHeader + "/Data/regular_season_results.csv"
teamfName = dbHeader + projHeader + "/Data/teams.csv"
tourneyName = dbHeader + projHeader + "/Data/tourney_results.csv"
walkName = dbHeader + projHeader + "/Data/walk.csv"
seasons = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S']
nsteps = 100000000
allData = pd.read_csv(rsfName)
# names = pd.read_csv(teamfName)['name'].to_dict()
names = pd.read_csv(teamfName)
of = open(walkName,'wb')
owrite = csv.writer(of)
owrite.writerow(['team','season','rank'])
for season in seasons:
print "Season: "+season
games = allData[allData['season'] == season]
G = buildGraph(games)
tempCts = randomWalk(G,nsteps)
sortCts = sorted(tempCts,key=tempCts.get,reverse=True)
for i in range(len(sortCts)):
owrite.writerow([sortCts[i],season,str(i)])
of.close()
if __name__ == "__main__":
main()