-
Notifications
You must be signed in to change notification settings - Fork 108
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
4f645f6
commit 3c03c5d
Showing
41 changed files
with
265 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Poisson distribution of goals | ||
""" | ||
|
||
import pandas as pd | ||
import numpy as np | ||
import json | ||
import matplotlib.pyplot as plt | ||
|
||
#Load in Wyscout data | ||
#Data: https://figshare.com/collections/Soccer_match_event_dataset/4415000/2 | ||
#Article: https://www.nature.com/articles/s41597-019-0247-7 | ||
#Documentation: https://apidocs.wyscout.com/matches-wyid-events | ||
|
||
with open('wyscout/events/events_Germany.json') as f: | ||
data = json.load(f) | ||
|
||
data_df = pd.DataFrame(data) | ||
|
||
#Identify the goals and add them to a column | ||
shots=data_df[data_df['subEventName'].isin(['Shot','Free kick shot','Penalty'])] | ||
|
||
shots=shots.assign(Goal = 0) | ||
for i,shot in shots.iterrows(): | ||
for shottags in shot['tags']: | ||
#Tags contain that its a goal | ||
if shottags['id']==101: | ||
shots.at[i,'Goal']=1 | ||
sum(shots['Goal']) | ||
|
||
match_list=shots['matchId'].unique().tolist() | ||
num_matches=len(match_list) | ||
shots_in_match=[] | ||
goals_in_match=[] | ||
for match in match_list: | ||
shots_in_match.append(len(shots[shots['matchId']==match])) | ||
goals_in_match.append(len(shots[np.logical_and(shots['matchId']==match, (shots['Goal']==1))])) | ||
|
||
|
||
#Set up figure | ||
fig=plt.figure() | ||
from pylab import rcParams | ||
rcParams['figure.figsize'] = 12/2.54, 8/2.54 | ||
ax=fig.add_subplot(1,1,1) | ||
|
||
|
||
#Make histogram of goals/shots | ||
mean_goals=np.mean(goals_in_match) | ||
goals_dist,goals_bins=np.histogram(goals_in_match, bins = np.arange(-0.5,10.5)) | ||
goals_dist=goals_dist/num_matches | ||
|
||
#Make Poisson distribution | ||
g=np.arange(0,10) | ||
Poisson_g=np.zeros(10) | ||
for i,k in enumerate(g): | ||
Poisson_g[i] = np.power(mean_goals,k)*np.exp(-mean_goals)/np.math.factorial(k) | ||
|
||
|
||
#Plot data | ||
|
||
|
||
plt.hist(g-0.5,9, weights=goals_dist) | ||
plt.plot(g,Poisson_g, color='black') | ||
ax.set_yticks(np.arange(0,0.3,0.1)) | ||
ax.spines['left'].set_visible(True) | ||
ax.spines['bottom'].set_position('zero') | ||
ax.spines['top'].set_visible(False) | ||
ax.spines['right'].set_visible(False) | ||
ax.set_xticks(np.arange(0,10,1)) | ||
ax.set_ylabel('Proportion of matches') | ||
ax.set_xlabel('Number of goals scored') | ||
plt.show() | ||
|
||
#Save the figure to a pdf | ||
fig.savefig('output/PoissonDistributionGoals.pdf' , dpi=None, bbox_inches="tight") | ||
|
||
|
||
|
||
#Exercise: | ||
#1, Make a histogram of shots per game | ||
#2, Find the mean and standard deviation for shots per game | ||
#3, Show that shots per game is roughtly normally distributed. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
#This code is adapted from | ||
#https://dashee87.github.io/football/python/predicting-football-results-with-statistical-modelling/ | ||
|
||
import pandas as pd | ||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
import seaborn | ||
from scipy.stats import poisson,skellam | ||
|
||
epl = pd.read_csv("http://www.football-data.co.uk/mmz4281/1920/E0.csv") | ||
ep = epl[['HomeTeam','AwayTeam','FTHG','FTAG']] | ||
epl = epl.rename(columns={'FTHG': 'HomeGoals', 'FTAG': 'AwayGoals'}) | ||
epl.head() | ||
|
||
epl = epl[:-10] | ||
epl.mean() | ||
|
||
# importing the tools required for the Poisson regression model | ||
import statsmodels.api as sm | ||
import statsmodels.formula.api as smf | ||
|
||
goal_model_data = pd.concat([epl[['HomeTeam','AwayTeam','HomeGoals']].assign(home=1).rename( | ||
columns={'HomeTeam':'team', 'AwayTeam':'opponent','HomeGoals':'goals'}), | ||
epl[['AwayTeam','HomeTeam','AwayGoals']].assign(home=0).rename( | ||
columns={'AwayTeam':'team', 'HomeTeam':'opponent','AwayGoals':'goals'})]) | ||
|
||
#Fit the model to the data | ||
#Home advantage included | ||
#Team and opponent as fixed effects. | ||
poisson_model = smf.glm(formula="goals ~ home + team + opponent", data=goal_model_data, | ||
family=sm.families.Poisson()).fit() | ||
poisson_model.summary() | ||
|
||
|
||
home_team='Man City' | ||
away_team='Arsenal' | ||
|
||
#Predict for Arsenal vs. Manchester City | ||
home_score_rate=poisson_model.predict(pd.DataFrame(data={'team': home_team, 'opponent': away_team, | ||
'home':1},index=[1])) | ||
away_score_rate=poisson_model.predict(pd.DataFrame(data={'team': away_team, 'opponent': home_team, | ||
'home':1},index=[1])) | ||
print(home_team + ' against ' + away_team + ' expect to score: ' + str(home_score_rate)) | ||
print(away_team + ' against ' + home_team + ' expect to score: ' + str(away_score_rate)) | ||
|
||
|
||
#Lets just get a result | ||
home_goals=np.random.poisson(home_score_rate) | ||
away_goals=np.random.poisson(away_score_rate) | ||
print(home_team + ': ' + str(home_goals[0])) | ||
print(away_team + ': ' + str(away_goals[0])) | ||
|
||
|
||
#Code to caluclate the goals for the match. | ||
def simulate_match(foot_model, homeTeam, awayTeam, max_goals=10): | ||
|
||
home_goals_avg = foot_model.predict(pd.DataFrame(data={'team': homeTeam, | ||
'opponent': awayTeam,'home':1}, | ||
index=[1])).values[0] | ||
away_goals_avg = foot_model.predict(pd.DataFrame(data={'team': awayTeam, | ||
'opponent': homeTeam,'home':0}, | ||
index=[1])).values[0] | ||
team_pred = [[poisson.pmf(i, team_avg) for i in range(0, max_goals+1)] for team_avg in [home_goals_avg, away_goals_avg]] | ||
return(np.outer(np.array(team_pred[0]), np.array(team_pred[1]))) | ||
|
||
max_goals=5 | ||
score_matrix=simulate_match(poisson_model, home_team, away_team,max_goals) | ||
|
||
fig=plt.figure() | ||
|
||
#Make 2d histogram of results | ||
|
||
from pylab import rcParams | ||
rcParams['figure.figsize'] = 12/2.54, 8/2.54 | ||
ax=fig.add_subplot(1,1,1) | ||
|
||
pos=ax.imshow(score_matrix, extent=[-0.5,max_goals+0.5,-0.5,max_goals+0.5], aspect='auto',cmap=plt.cm.Reds) | ||
fig.colorbar(pos, ax=ax) | ||
ax.set_title('Probability of outcome') | ||
plt.xlim((-0.5,5.5)) | ||
plt.ylim((-0.5,5.5)) | ||
plt.tight_layout() | ||
ax.set_xlabel('Goals scored by ' + away_team) | ||
ax.set_ylabel('Goals scored by ' + home_team) | ||
plt.show() | ||
fig.savefig('output/2DOutcomes.pdf' , dpi=None, bbox_inches="tight") | ||
|
||
#Home, draw, away probabilities | ||
homewin=np.sum(np.tril(score_matrix, -1)) | ||
draw=np.sum(np.diag(score_matrix)) | ||
awaywin=np.sum(np.triu(score_matrix, 1)) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Plot Histogram of Times of Shots and Goals | ||
""" | ||
import pandas as pd | ||
import numpy as np | ||
import json | ||
import matplotlib.pyplot as plt | ||
import matplotlib | ||
|
||
#Load in Wyscout data | ||
with open('wyscout/events/events_Germany.json') as f: | ||
data = json.load(f) | ||
|
||
data_df = pd.DataFrame(data) | ||
|
||
#Identify the goals and add them to a column | ||
shots=data_df[data_df['subEventName']=='Shot'] | ||
shots.assign(Goal = 0) | ||
for i,shot in shots.iterrows(): | ||
for shottags in shot['tags']: | ||
#Tags contain that its a goal | ||
if shottags['id']==101: | ||
shots.at[i,'Goal']=1 | ||
|
||
half='1H' | ||
isgoal=0 | ||
|
||
#Find the particular shots I am interested in | ||
if isgoal: | ||
the_shots=shots[np.logical_and((shots['matchPeriod']==half), (shots['Goal']==1))]['eventSec'] | ||
else: | ||
the_shots=shots[(shots['matchPeriod']==half)]['eventSec'] | ||
|
||
#Basic shot statistics | ||
total_shots=len(the_shots) | ||
number_of_matches=len(np.unique(shots['matchId'])) | ||
shots_per_match=total_shots/number_of_matches | ||
shots_per_min=total_shots/48 | ||
|
||
|
||
|
||
|
||
from pylab import rcParams | ||
rcParams['figure.figsize'] = 12/2.54, 8/2.54 | ||
|
||
matplotlib.font_manager.FontProperties(family='Helvetica',size=11) | ||
|
||
#Set up figure | ||
fig=plt.figure() | ||
ax=fig.add_subplot(1,1,1) | ||
|
||
#Plot histogram of shots | ||
plt.hist(the_shots/60, bins = range(0,49)) | ||
plt.plot([0, 48],[shots_per_min, shots_per_min], color='black') | ||
|
||
ax.spines['left'].set_visible(True) | ||
ax.spines['bottom'].set_position('zero') | ||
ax.spines['top'].set_visible(False) | ||
ax.spines['right'].set_visible(False) | ||
ax.set_xticks(np.arange(0,48,5)) | ||
ax.set_xlabel('Time elapsed in ' + half[0] + ' half') | ||
if isgoal==0: | ||
ax.set_ylabel('Number of shots over the season') | ||
ax.set_yticks(np.arange(0,120,20)) | ||
ax.set_ylim(0,130) | ||
else: | ||
ax.set_ylabel('Number of goals over the season') | ||
ax.set_yticks(np.arange(0,20,2)) | ||
ax.set_ylim(0,20) | ||
|
||
|
||
plt.show() | ||
|
||
#Save the figure to a pdf | ||
if isgoal: | ||
fig.savefig('Output/TimesOfGoals' + half +'.pdf' , dpi=None, bbox_inches="tight") | ||
else: | ||
fig.savefig('Output/TimesOfShots' + half +'.pdf' , dpi=None, bbox_inches="tight") |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file was deleted.
Oops, something went wrong.