In [1]:
import numpy as np
import matplotlib.pyplot as plot
import seaborn as sb
import h5py as h5

%matplotlib inline


In [2]:
football = h5.File('football.h5')
hockey = h5.File('hockey.h5')

##### I have truncated the Hockey results, since there were a couple of crazy high scoring games, e.g. https://en.wikipedia.org/wiki/2016_Kontinental_Hockey_League_All-Star_Game, and I have also removed any games that were settled via penalties or overtime (since then the score is always odd). I don't make these cuts when calculating the rate of upsets¶
In [14]:
totalScoresFootball = (football['Hgoals'])[...]+(football['Agoals'])[...]
footballMean = np.mean(totalScoresFootball)

totalScoresHockey = (hockey['Hgoals'])[...]+(hockey['Agoals'])[...]


In [26]:
fig,axs = plot.subplots(1,3)

axs = axs.reshape((-1))

axs[0].hist(totalScoresFootball,bins=15,normed=True,color='b',alpha=0.6,histtype='stepfilled',label='Football')

axs[0].axvline(footballMean,color='k',label='Mean = '+str(footballMean))
axs[2].axvline(hockeyMean,color='k',label='Mean = '+str(hockeyMean))

for ax in axs:
ax.legend()
ax.set_xlabel('Scoring Events',size=14)

fig.set_size_inches(18,5)

##### Now, we can answer our question. We'll trust that the bookies know what they're doing, and we'll say that a team is expected to lose if a bet on that team stands to return more than three times as much than a bet on the other team. Bets are stated in decimals (e.g. odds of 5.09 would return £50.90 for a £10 bet)¶
In [67]:
def countUnexpectedResults(homeOdds,awayOdds,homeScore,awayScore):

expectedHomeWin = 3.*homeOdds < awayOdds
expectedAwayWin = homeOdds > 3.*awayOdds

homeWin = homeScore > awayScore
awayWin = homeScore < awayScore

unexpectedResult = (expectedHomeWin & awayWin) | (expectedAwayWin & homeWin)

return np.count_nonzero(unexpectedResult)

In [68]:
nUnexpectedFootball = countUnexpectedResults((football['Hodds'])[...],
(football['Aodds'])[...],
(football['Hgoals'])[...],
(football['Agoals'])[...])

nUnexpectedHockey = countUnexpectedResults((hockey['Hodds'])[...],
(hockey['Aodds'])[...],
(hockey['Hgoals'])[...],
(hockey['Agoals'])[...])

##### We can now calculate the expected rate of unexpected results¶
In [69]:
nTotalFootball = len(totalScoresFootball)
nTotalHockey = len(totalScoresHockey)

rateUnexpectedFootball = nUnexpectedFootball/float(nTotalFootball)
rateUnexpectedHockey = nUnexpectedHockey/float(nTotalHockey)

ruFootballError = np.sqrt(nUnexpectedFootball)/float(nTotalFootball)
ruHockeyError = np.sqrt(nUnexpectedHockey)/float(nTotalHockey)

In [75]:
fig,ax = plot.subplots(1,1)

ax.errorbar([0],[rateUnexpectedFootball],[ruFootballError],color='b',fmt='|',linewidth=8)