Skip to content

Commit

Permalink
Simulating matches
Browse files Browse the repository at this point in the history
  • Loading branch information
twelve-discovery committed Sep 15, 2020
1 parent 4f645f6 commit 3c03c5d
Show file tree
Hide file tree
Showing 41 changed files with 265 additions and 4 deletions.
84 changes: 84 additions & 0 deletions 10GoalsPerGame.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Poisson distribution of goals
"""

import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt

#Load in Wyscout data
#Data: https://figshare.com/collections/Soccer_match_event_dataset/4415000/2
#Article: https://www.nature.com/articles/s41597-019-0247-7
#Documentation: https://apidocs.wyscout.com/matches-wyid-events

with open('wyscout/events/events_Germany.json') as f:
data = json.load(f)

data_df = pd.DataFrame(data)

#Identify the goals and add them to a column
shots=data_df[data_df['subEventName'].isin(['Shot','Free kick shot','Penalty'])]

shots=shots.assign(Goal = 0)
for i,shot in shots.iterrows():
for shottags in shot['tags']:
#Tags contain that its a goal
if shottags['id']==101:
shots.at[i,'Goal']=1
sum(shots['Goal'])

match_list=shots['matchId'].unique().tolist()
num_matches=len(match_list)
shots_in_match=[]
goals_in_match=[]
for match in match_list:
shots_in_match.append(len(shots[shots['matchId']==match]))
goals_in_match.append(len(shots[np.logical_and(shots['matchId']==match, (shots['Goal']==1))]))


#Set up figure
fig=plt.figure()
from pylab import rcParams
rcParams['figure.figsize'] = 12/2.54, 8/2.54
ax=fig.add_subplot(1,1,1)


#Make histogram of goals/shots
mean_goals=np.mean(goals_in_match)
goals_dist,goals_bins=np.histogram(goals_in_match, bins = np.arange(-0.5,10.5))
goals_dist=goals_dist/num_matches

#Make Poisson distribution
g=np.arange(0,10)
Poisson_g=np.zeros(10)
for i,k in enumerate(g):
Poisson_g[i] = np.power(mean_goals,k)*np.exp(-mean_goals)/np.math.factorial(k)


#Plot data


plt.hist(g-0.5,9, weights=goals_dist)
plt.plot(g,Poisson_g, color='black')
ax.set_yticks(np.arange(0,0.3,0.1))
ax.spines['left'].set_visible(True)
ax.spines['bottom'].set_position('zero')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.set_xticks(np.arange(0,10,1))
ax.set_ylabel('Proportion of matches')
ax.set_xlabel('Number of goals scored')
plt.show()

#Save the figure to a pdf
fig.savefig('output/PoissonDistributionGoals.pdf' , dpi=None, bbox_inches="tight")



#Exercise:
#1, Make a histogram of shots per game
#2, Find the mean and standard deviation for shots per game
#3, Show that shots per game is roughtly normally distributed.
93 changes: 93 additions & 0 deletions 11SimulateMatches.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#This code is adapted from
#https://dashee87.github.io/football/python/predicting-football-results-with-statistical-modelling/

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn
from scipy.stats import poisson,skellam

epl = pd.read_csv("http://www.football-data.co.uk/mmz4281/1920/E0.csv")
ep = epl[['HomeTeam','AwayTeam','FTHG','FTAG']]
epl = epl.rename(columns={'FTHG': 'HomeGoals', 'FTAG': 'AwayGoals'})
epl.head()

epl = epl[:-10]
epl.mean()

# importing the tools required for the Poisson regression model
import statsmodels.api as sm
import statsmodels.formula.api as smf

goal_model_data = pd.concat([epl[['HomeTeam','AwayTeam','HomeGoals']].assign(home=1).rename(
columns={'HomeTeam':'team', 'AwayTeam':'opponent','HomeGoals':'goals'}),
epl[['AwayTeam','HomeTeam','AwayGoals']].assign(home=0).rename(
columns={'AwayTeam':'team', 'HomeTeam':'opponent','AwayGoals':'goals'})])

#Fit the model to the data
#Home advantage included
#Team and opponent as fixed effects.
poisson_model = smf.glm(formula="goals ~ home + team + opponent", data=goal_model_data,
family=sm.families.Poisson()).fit()
poisson_model.summary()


home_team='Man City'
away_team='Arsenal'

#Predict for Arsenal vs. Manchester City
home_score_rate=poisson_model.predict(pd.DataFrame(data={'team': home_team, 'opponent': away_team,
'home':1},index=[1]))
away_score_rate=poisson_model.predict(pd.DataFrame(data={'team': away_team, 'opponent': home_team,
'home':1},index=[1]))
print(home_team + ' against ' + away_team + ' expect to score: ' + str(home_score_rate))
print(away_team + ' against ' + home_team + ' expect to score: ' + str(away_score_rate))


#Lets just get a result
home_goals=np.random.poisson(home_score_rate)
away_goals=np.random.poisson(away_score_rate)
print(home_team + ': ' + str(home_goals[0]))
print(away_team + ': ' + str(away_goals[0]))


#Code to caluclate the goals for the match.
def simulate_match(foot_model, homeTeam, awayTeam, max_goals=10):

home_goals_avg = foot_model.predict(pd.DataFrame(data={'team': homeTeam,
'opponent': awayTeam,'home':1},
index=[1])).values[0]
away_goals_avg = foot_model.predict(pd.DataFrame(data={'team': awayTeam,
'opponent': homeTeam,'home':0},
index=[1])).values[0]
team_pred = [[poisson.pmf(i, team_avg) for i in range(0, max_goals+1)] for team_avg in [home_goals_avg, away_goals_avg]]
return(np.outer(np.array(team_pred[0]), np.array(team_pred[1])))

max_goals=5
score_matrix=simulate_match(poisson_model, home_team, away_team,max_goals)

fig=plt.figure()

#Make 2d histogram of results

from pylab import rcParams
rcParams['figure.figsize'] = 12/2.54, 8/2.54
ax=fig.add_subplot(1,1,1)

pos=ax.imshow(score_matrix, extent=[-0.5,max_goals+0.5,-0.5,max_goals+0.5], aspect='auto',cmap=plt.cm.Reds)
fig.colorbar(pos, ax=ax)
ax.set_title('Probability of outcome')
plt.xlim((-0.5,5.5))
plt.ylim((-0.5,5.5))
plt.tight_layout()
ax.set_xlabel('Goals scored by ' + away_team)
ax.set_ylabel('Goals scored by ' + home_team)
plt.show()
fig.savefig('output/2DOutcomes.pdf' , dpi=None, bbox_inches="tight")

#Home, draw, away probabilities
homewin=np.sum(np.tril(score_matrix, -1))
draw=np.sum(np.diag(score_matrix))
awaywin=np.sum(np.triu(score_matrix, 1))


4 changes: 3 additions & 1 deletion 7PassHeatMap.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
shots_match = df.loc[df['type_name'] == 'Shot'].set_index('id')

#Find shot times in seconds
#This should be adjusted to account for overlapping halves of the match.
shot_times = shots_match['minute']*60+shots_match['second']
shot_window = 15
shot_start = shot_times - shot_window
Expand Down Expand Up @@ -97,14 +98,15 @@ def in_range(pass_time,start,finish):
fig.savefig('Output/PassesBy' + team_required + '.pdf', dpi=100)
plt.show()

#Make x,y positions
x=[]
y=[]
for i,apass in passes.iterrows():
x.append(apass['location'][0])
y.append(pitchWidthY-apass['location'][1])

#Make a histogram of passes
H_Pass=np.histogram2d(y, x,bins=10,range=[[0, pitchWidthY],[0, pitchLengthX]])
H_Pass=np.histogram2d(y, x,bins=5,range=[[0, pitchWidthY],[0, pitchLengthX]])

from FCPython import createPitch
(fig,ax) = createPitch(pitchLengthX,pitchWidthY,'yards','gray')
Expand Down
6 changes: 5 additions & 1 deletion 8PassCompare.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pandas.io.json import json_normalize
from FCPython import createPitch
import json
Expand Down Expand Up @@ -109,6 +110,8 @@ def in_range(pass_time,start,finish):
passshot_df['Shots']= pd.to_numeric(passshot_df['Shots'])
passshot_df['Passes']= pd.to_numeric(passshot_df['Passes'])
passshot_df['Goals']= pd.to_numeric(passshot_df['Goals'])

#Fit the model
model_fit=smf.ols(formula='Shots ~ Passes', data=passshot_df[['Shots','Passes']]).fit()
print(model_fit.summary())
b=model_fit.params
Expand All @@ -126,12 +129,13 @@ def in_range(pass_time,start,finish):


#For goals (and strictly speaking even for shots) it is better to do a Poisson regression
poisson_model = smf.glm(formula="Goals ~ Passes", data=passshot_df,
poisson_model = smf.glm(formula="Goals ~ Passes + Team", data=passshot_df,
family=sm.families.Poisson()).fit()
poisson_model.summary()
b=poisson_model.params



#Make comparative pass maps
x_all=[]
y_all=[]
Expand Down
80 changes: 80 additions & 0 deletions 9ShotTimes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Plot Histogram of Times of Shots and Goals
"""
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import matplotlib

#Load in Wyscout data
with open('wyscout/events/events_Germany.json') as f:
data = json.load(f)

data_df = pd.DataFrame(data)

#Identify the goals and add them to a column
shots=data_df[data_df['subEventName']=='Shot']
shots.assign(Goal = 0)
for i,shot in shots.iterrows():
for shottags in shot['tags']:
#Tags contain that its a goal
if shottags['id']==101:
shots.at[i,'Goal']=1

half='1H'
isgoal=0

#Find the particular shots I am interested in
if isgoal:
the_shots=shots[np.logical_and((shots['matchPeriod']==half), (shots['Goal']==1))]['eventSec']
else:
the_shots=shots[(shots['matchPeriod']==half)]['eventSec']

#Basic shot statistics
total_shots=len(the_shots)
number_of_matches=len(np.unique(shots['matchId']))
shots_per_match=total_shots/number_of_matches
shots_per_min=total_shots/48




from pylab import rcParams
rcParams['figure.figsize'] = 12/2.54, 8/2.54

matplotlib.font_manager.FontProperties(family='Helvetica',size=11)

#Set up figure
fig=plt.figure()
ax=fig.add_subplot(1,1,1)

#Plot histogram of shots
plt.hist(the_shots/60, bins = range(0,49))
plt.plot([0, 48],[shots_per_min, shots_per_min], color='black')

ax.spines['left'].set_visible(True)
ax.spines['bottom'].set_position('zero')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.set_xticks(np.arange(0,48,5))
ax.set_xlabel('Time elapsed in ' + half[0] + ' half')
if isgoal==0:
ax.set_ylabel('Number of shots over the season')
ax.set_yticks(np.arange(0,120,20))
ax.set_ylim(0,130)
else:
ax.set_ylabel('Number of goals over the season')
ax.set_yticks(np.arange(0,20,2))
ax.set_ylim(0,20)


plt.show()

#Save the figure to a pdf
if isgoal:
fig.savefig('Output/TimesOfGoals' + half +'.pdf' , dpi=None, bbox_inches="tight")
else:
fig.savefig('Output/TimesOfShots' + half +'.pdf' , dpi=None, bbox_inches="tight")
Binary file added Output/2DOutcomes.pdf
Binary file not shown.
Binary file added Output/HeatmapOfPassesEngland Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatArgentina Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatAustralia Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatBrazil Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatCameroon Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatCanada Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatChile Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatChina PR Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatEngland Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatFrance Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatGermany Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatItaly Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatJamaica Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatJapan Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatKorea Republic Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatNetherlands Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatNew Zealand Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatNigeria Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatNorway Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatScotland Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatSouth Africa Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatSpain Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatSweden Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatThailand Women's.pdf
Binary file not shown.
Binary file modified Output/PassHeatUnited States Women's.pdf
Binary file not shown.
Binary file added Output/PassesByEngland Women's.pdf
Binary file not shown.
Binary file added Output/PoissonDistributionGoals.pdf
Binary file not shown.
Binary file modified Output/ShotsPassesWithFit.pdf
Binary file not shown.
Binary file added Output/TimesOfGoals1H.pdf
Binary file not shown.
Binary file added Output/TimesOfGoals2H.pdf
Binary file not shown.
Binary file added Output/TimesOfShots.pdf
Binary file not shown.
Binary file added Output/TimesOfShots1H.pdf
Binary file not shown.
Binary file added Output/TimesOfShots2H.pdf
Binary file not shown.
Binary file added TimesOfShots.pdf
Binary file not shown.
2 changes: 0 additions & 2 deletions Wyscout/Put Wyscout data here.md

This file was deleted.

0 comments on commit 3c03c5d

Please sign in to comment.