diff --git a/docs/datasources/WhoScored.ipynb b/docs/datasources/WhoScored.ipynb
index e19d0e25..2a44e585 100644
--- a/docs/datasources/WhoScored.ipynb
+++ b/docs/datasources/WhoScored.ipynb
@@ -7,7 +7,22 @@
"metadata": {
"nbsphinx": "hidden"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_413602/2059154722.py:1: DeprecationWarning: \n",
+ "Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),\n",
+ "(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)\n",
+ "but was not found to be installed on your system.\n",
+ "If this would cause problems for you,\n",
+ "please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466\n",
+ " \n",
+ " import pandas as pd\n"
+ ]
+ }
+ ],
"source": [
"import pandas as pd\n",
"pd.set_option('display.max_columns', None)"
@@ -65,8 +80,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/cw/dtaijupiter/NoCsBack/dtai/pieterr/Projects/soccerdata/soccerdata/_common.py:462: UserWarning: Season id \"2021\" is ambiguous: interpreting as \"20-21\"\n",
- " warnings.warn(msg)\n"
+ "/cw/dtaijupiter/NoCsBack/dtai/pieterr/Projects/soccerdata/soccerdata/_common.py:493: UserWarning: Season id \"2021\" is ambiguous: interpreting as \"20-21\"\n",
+ " warnings.warn(msg, stacklevel=1)\n"
]
},
{
@@ -112,7 +127,7 @@
" Path to the Chrome executable.\n",
" headless : bool, default: True\n",
" If True, will run Chrome in headless mode. Setting this to False might\n",
- " help to avoid getting blocked.\n",
+ " help to avoid getting blocked. Only supported for Selenium <4.13.\n",
" \n"
]
}
@@ -161,11 +176,48 @@
"
| \n",
" | \n",
" | \n",
+ " stage_id | \n",
" game_id | \n",
+ " status | \n",
+ " start_time | \n",
+ " home_team_id | \n",
" home_team | \n",
+ " home_yellow_cards | \n",
+ " home_red_cards | \n",
+ " away_team_id | \n",
" away_team | \n",
+ " away_yellow_cards | \n",
+ " away_red_cards | \n",
+ " has_incidents_summary | \n",
+ " has_preview | \n",
+ " score_changed_at | \n",
+ " elapsed | \n",
+ " last_scorer | \n",
+ " is_top_match | \n",
+ " home_team_country_code | \n",
+ " away_team_country_code | \n",
+ " comment_count | \n",
+ " is_lineup_confirmed | \n",
+ " is_stream_available | \n",
+ " match_is_opta | \n",
+ " home_team_country_name | \n",
+ " away_team_country_name | \n",
" date | \n",
- " url | \n",
+ " home_score | \n",
+ " away_score | \n",
+ " incidents | \n",
+ " bets | \n",
+ " aggregate_winner_field | \n",
+ " winner_field | \n",
+ " period | \n",
+ " extra_result_field | \n",
+ " home_extratime_score | \n",
+ " away_extratime_score | \n",
+ " home_penalty_score | \n",
+ " away_penalty_score | \n",
+ " started_at_utc | \n",
+ " first_half_ended_at_utc | \n",
+ " second_half_started_at_utc | \n",
" stage | \n",
" \n",
" \n",
@@ -178,6 +230,43 @@
" | \n",
" | \n",
" | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
"
\n",
" \n",
" \n",
@@ -185,101 +274,582 @@
" ENG-Premier League | \n",
" 2021 | \n",
" 2020-09-12 Crystal Palace-Southampton | \n",
+ " 18685 | \n",
" 1485186 | \n",
+ " 6 | \n",
+ " 2020-09-12T15:00:00 | \n",
+ " 162 | \n",
" Crystal Palace | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 18 | \n",
" Southampton | \n",
- " 2020-09-12 15:00:00 | \n",
- " https://www.whoscored.com/Matches/1485186/Live... | \n",
- " NaN | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " True | \n",
+ " True | \n",
+ " 2020-09-12 15:14:31Z | \n",
+ " FT | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " gb-eng | \n",
+ " gb-eng | \n",
+ " 6 | \n",
+ " True | \n",
+ " False | \n",
+ " False | \n",
+ " England | \n",
+ " England | \n",
+ " 2020-09-12 14:00:00+00:00 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " [{'minute': '13', 'type': 1, 'subType': 1, 'pl... | \n",
+ " None | \n",
+ " None | \n",
+ " 0.0 | \n",
+ " 7 | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " 2020-09-12T14:01:42Z | \n",
+ " None | \n",
+ " 2020-09-12T15:04:01Z | \n",
+ " None | \n",
" \n",
" \n",
" 2020-09-12 Fulham-Arsenal | \n",
+ " 18685 | \n",
" 1485187 | \n",
+ " 6 | \n",
+ " 2020-09-12T12:30:00 | \n",
+ " 170 | \n",
" Fulham | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 13 | \n",
" Arsenal | \n",
- " 2020-09-12 12:30:00 | \n",
- " https://www.whoscored.com/Matches/1485187/Live... | \n",
- " NaN | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " True | \n",
+ " True | \n",
+ " 2020-09-12 13:48:13Z | \n",
+ " FT | \n",
+ " 1.0 | \n",
+ " True | \n",
+ " gb-eng | \n",
+ " gb-eng | \n",
+ " 15 | \n",
+ " True | \n",
+ " False | \n",
+ " False | \n",
+ " England | \n",
+ " England | \n",
+ " 2020-09-12 11:30:00+00:00 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " [{'minute': '8', 'type': 1, 'subType': 1, 'pla... | \n",
+ " None | \n",
+ " None | \n",
+ " 1.0 | \n",
+ " 7 | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " 2020-09-12T11:32:23Z | \n",
+ " None | \n",
+ " 2020-09-12T12:35:50Z | \n",
+ " None | \n",
"
\n",
" \n",
" 2020-09-12 Liverpool-Leeds United | \n",
+ " 18685 | \n",
" 1485188 | \n",
+ " 6 | \n",
+ " 2020-09-12T17:30:00 | \n",
+ " 26 | \n",
" Liverpool | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 19 | \n",
" Leeds United | \n",
- " 2020-09-12 17:30:00 | \n",
- " https://www.whoscored.com/Matches/1485188/Live... | \n",
- " NaN | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " True | \n",
+ " True | \n",
+ " 2020-09-12 19:15:39Z | \n",
+ " FT | \n",
+ " 0.0 | \n",
+ " True | \n",
+ " gb-eng | \n",
+ " gb-eng | \n",
+ " 61 | \n",
+ " True | \n",
+ " False | \n",
+ " False | \n",
+ " England | \n",
+ " England | \n",
+ " 2020-09-12 16:30:00+00:00 | \n",
+ " 4 | \n",
+ " 3 | \n",
+ " [{'minute': '4', 'type': 1, 'subType': 2, 'pla... | \n",
+ " None | \n",
+ " None | \n",
+ " 0.0 | \n",
+ " 7 | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " 2020-09-12T16:30:21Z | \n",
+ " None | \n",
+ " 2020-09-12T17:32:57Z | \n",
+ " None | \n",
"
\n",
" \n",
- " 2020-09-12 West Ham United-Newcastle United | \n",
+ " 2020-09-12 West Ham United-Newcastle | \n",
+ " 18685 | \n",
" 1485191 | \n",
+ " 6 | \n",
+ " 2020-09-12T20:00:00 | \n",
+ " 29 | \n",
" West Ham United | \n",
- " Newcastle United | \n",
- " 2020-09-12 20:00:00 | \n",
- " https://www.whoscored.com/Matches/1485191/Live... | \n",
- " NaN | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 23 | \n",
+ " Newcastle | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " True | \n",
+ " True | \n",
+ " 2020-09-12 21:45:39Z | \n",
+ " FT | \n",
+ " 1.0 | \n",
+ " False | \n",
+ " gb-eng | \n",
+ " gb-eng | \n",
+ " 10 | \n",
+ " True | \n",
+ " False | \n",
+ " False | \n",
+ " England | \n",
+ " England | \n",
+ " 2020-09-12 19:00:00+00:00 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " [{'minute': '56', 'type': 1, 'subType': 1, 'pl... | \n",
+ " None | \n",
+ " None | \n",
+ " 1.0 | \n",
+ " 7 | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " 2020-09-12T19:00:32Z | \n",
+ " None | \n",
+ " 2020-09-12T20:03:20Z | \n",
+ " None | \n",
"
\n",
" \n",
" 2020-09-13 Tottenham-Everton | \n",
+ " 18685 | \n",
" 1485189 | \n",
+ " 6 | \n",
+ " 2020-09-13T16:30:00 | \n",
+ " 30 | \n",
" Tottenham | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 31 | \n",
" Everton | \n",
- " 2020-09-13 16:30:00 | \n",
- " https://www.whoscored.com/Matches/1485189/Live... | \n",
- " NaN | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " True | \n",
+ " True | \n",
+ " 2020-09-13 17:41:16Z | \n",
+ " FT | \n",
+ " 1.0 | \n",
+ " True | \n",
+ " gb-eng | \n",
+ " gb-eng | \n",
+ " 32 | \n",
+ " True | \n",
+ " False | \n",
+ " False | \n",
+ " England | \n",
+ " England | \n",
+ " 2020-09-13 15:30:00+00:00 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " [{'minute': '55', 'type': 1, 'subType': 1, 'pl... | \n",
+ " None | \n",
+ " None | \n",
+ " 1.0 | \n",
+ " 7 | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " 2020-09-13T15:30:20Z | \n",
+ " None | \n",
+ " 2020-09-13T16:31:33Z | \n",
+ " None | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " game_id \\\n",
+ " stage_id \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton 18685 \n",
+ " 2020-09-12 Fulham-Arsenal 18685 \n",
+ " 2020-09-12 Liverpool-Leeds United 18685 \n",
+ " 2020-09-12 West Ham United-Newcastle 18685 \n",
+ " 2020-09-13 Tottenham-Everton 18685 \n",
+ "\n",
+ " game_id \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton 1485186 \n",
+ " 2020-09-12 Fulham-Arsenal 1485187 \n",
+ " 2020-09-12 Liverpool-Leeds United 1485188 \n",
+ " 2020-09-12 West Ham United-Newcastle 1485191 \n",
+ " 2020-09-13 Tottenham-Everton 1485189 \n",
+ "\n",
+ " status \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton 6 \n",
+ " 2020-09-12 Fulham-Arsenal 6 \n",
+ " 2020-09-12 Liverpool-Leeds United 6 \n",
+ " 2020-09-12 West Ham United-Newcastle 6 \n",
+ " 2020-09-13 Tottenham-Everton 6 \n",
+ "\n",
+ " start_time \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton 2020-09-12T15:00:00 \n",
+ " 2020-09-12 Fulham-Arsenal 2020-09-12T12:30:00 \n",
+ " 2020-09-12 Liverpool-Leeds United 2020-09-12T17:30:00 \n",
+ " 2020-09-12 West Ham United-Newcastle 2020-09-12T20:00:00 \n",
+ " 2020-09-13 Tottenham-Everton 2020-09-13T16:30:00 \n",
+ "\n",
+ " home_team_id \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton 162 \n",
+ " 2020-09-12 Fulham-Arsenal 170 \n",
+ " 2020-09-12 Liverpool-Leeds United 26 \n",
+ " 2020-09-12 West Ham United-Newcastle 29 \n",
+ " 2020-09-13 Tottenham-Everton 30 \n",
+ "\n",
+ " home_team \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton Crystal Palace \n",
+ " 2020-09-12 Fulham-Arsenal Fulham \n",
+ " 2020-09-12 Liverpool-Leeds United Liverpool \n",
+ " 2020-09-12 West Ham United-Newcastle West Ham United \n",
+ " 2020-09-13 Tottenham-Everton Tottenham \n",
+ "\n",
+ " home_yellow_cards \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton 2 \n",
+ " 2020-09-12 Fulham-Arsenal 2 \n",
+ " 2020-09-12 Liverpool-Leeds United 1 \n",
+ " 2020-09-12 West Ham United-Newcastle 2 \n",
+ " 2020-09-13 Tottenham-Everton 1 \n",
+ "\n",
+ " home_red_cards \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton 0 \n",
+ " 2020-09-12 Fulham-Arsenal 0 \n",
+ " 2020-09-12 Liverpool-Leeds United 0 \n",
+ " 2020-09-12 West Ham United-Newcastle 0 \n",
+ " 2020-09-13 Tottenham-Everton 0 \n",
+ "\n",
+ " away_team_id \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton 18 \n",
+ " 2020-09-12 Fulham-Arsenal 13 \n",
+ " 2020-09-12 Liverpool-Leeds United 19 \n",
+ " 2020-09-12 West Ham United-Newcastle 23 \n",
+ " 2020-09-13 Tottenham-Everton 31 \n",
+ "\n",
+ " away_team \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton Southampton \n",
+ " 2020-09-12 Fulham-Arsenal Arsenal \n",
+ " 2020-09-12 Liverpool-Leeds United Leeds United \n",
+ " 2020-09-12 West Ham United-Newcastle Newcastle \n",
+ " 2020-09-13 Tottenham-Everton Everton \n",
+ "\n",
+ " away_yellow_cards \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton 1 \n",
+ " 2020-09-12 Fulham-Arsenal 2 \n",
+ " 2020-09-12 Liverpool-Leeds United 0 \n",
+ " 2020-09-12 West Ham United-Newcastle 2 \n",
+ " 2020-09-13 Tottenham-Everton 0 \n",
+ "\n",
+ " away_red_cards \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton 0 \n",
+ " 2020-09-12 Fulham-Arsenal 0 \n",
+ " 2020-09-12 Liverpool-Leeds United 0 \n",
+ " 2020-09-12 West Ham United-Newcastle 0 \n",
+ " 2020-09-13 Tottenham-Everton 0 \n",
+ "\n",
+ " has_incidents_summary \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton True \n",
+ " 2020-09-12 Fulham-Arsenal True \n",
+ " 2020-09-12 Liverpool-Leeds United True \n",
+ " 2020-09-12 West Ham United-Newcastle True \n",
+ " 2020-09-13 Tottenham-Everton True \n",
+ "\n",
+ " has_preview \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton True \n",
+ " 2020-09-12 Fulham-Arsenal True \n",
+ " 2020-09-12 Liverpool-Leeds United True \n",
+ " 2020-09-12 West Ham United-Newcastle True \n",
+ " 2020-09-13 Tottenham-Everton True \n",
+ "\n",
+ " score_changed_at \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton 2020-09-12 15:14:31Z \n",
+ " 2020-09-12 Fulham-Arsenal 2020-09-12 13:48:13Z \n",
+ " 2020-09-12 Liverpool-Leeds United 2020-09-12 19:15:39Z \n",
+ " 2020-09-12 West Ham United-Newcastle 2020-09-12 21:45:39Z \n",
+ " 2020-09-13 Tottenham-Everton 2020-09-13 17:41:16Z \n",
+ "\n",
+ " elapsed \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton FT \n",
+ " 2020-09-12 Fulham-Arsenal FT \n",
+ " 2020-09-12 Liverpool-Leeds United FT \n",
+ " 2020-09-12 West Ham United-Newcastle FT \n",
+ " 2020-09-13 Tottenham-Everton FT \n",
+ "\n",
+ " last_scorer \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton 0.0 \n",
+ " 2020-09-12 Fulham-Arsenal 1.0 \n",
+ " 2020-09-12 Liverpool-Leeds United 0.0 \n",
+ " 2020-09-12 West Ham United-Newcastle 1.0 \n",
+ " 2020-09-13 Tottenham-Everton 1.0 \n",
+ "\n",
+ " is_top_match \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton False \n",
+ " 2020-09-12 Fulham-Arsenal True \n",
+ " 2020-09-12 Liverpool-Leeds United True \n",
+ " 2020-09-12 West Ham United-Newcastle False \n",
+ " 2020-09-13 Tottenham-Everton True \n",
+ "\n",
+ " home_team_country_code \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton gb-eng \n",
+ " 2020-09-12 Fulham-Arsenal gb-eng \n",
+ " 2020-09-12 Liverpool-Leeds United gb-eng \n",
+ " 2020-09-12 West Ham United-Newcastle gb-eng \n",
+ " 2020-09-13 Tottenham-Everton gb-eng \n",
+ "\n",
+ " away_team_country_code \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton gb-eng \n",
+ " 2020-09-12 Fulham-Arsenal gb-eng \n",
+ " 2020-09-12 Liverpool-Leeds United gb-eng \n",
+ " 2020-09-12 West Ham United-Newcastle gb-eng \n",
+ " 2020-09-13 Tottenham-Everton gb-eng \n",
+ "\n",
+ " comment_count \\\n",
"league season game \n",
- "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton 1485186 \n",
- " 2020-09-12 Fulham-Arsenal 1485187 \n",
- " 2020-09-12 Liverpool-Leeds United 1485188 \n",
- " 2020-09-12 West Ham United-Newcastle United 1485191 \n",
- " 2020-09-13 Tottenham-Everton 1485189 \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton 6 \n",
+ " 2020-09-12 Fulham-Arsenal 15 \n",
+ " 2020-09-12 Liverpool-Leeds United 61 \n",
+ " 2020-09-12 West Ham United-Newcastle 10 \n",
+ " 2020-09-13 Tottenham-Everton 32 \n",
"\n",
- " home_team \\\n",
+ " is_lineup_confirmed \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton True \n",
+ " 2020-09-12 Fulham-Arsenal True \n",
+ " 2020-09-12 Liverpool-Leeds United True \n",
+ " 2020-09-12 West Ham United-Newcastle True \n",
+ " 2020-09-13 Tottenham-Everton True \n",
+ "\n",
+ " is_stream_available \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton False \n",
+ " 2020-09-12 Fulham-Arsenal False \n",
+ " 2020-09-12 Liverpool-Leeds United False \n",
+ " 2020-09-12 West Ham United-Newcastle False \n",
+ " 2020-09-13 Tottenham-Everton False \n",
+ "\n",
+ " match_is_opta \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton False \n",
+ " 2020-09-12 Fulham-Arsenal False \n",
+ " 2020-09-12 Liverpool-Leeds United False \n",
+ " 2020-09-12 West Ham United-Newcastle False \n",
+ " 2020-09-13 Tottenham-Everton False \n",
+ "\n",
+ " home_team_country_name \\\n",
"league season game \n",
- "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton Crystal Palace \n",
- " 2020-09-12 Fulham-Arsenal Fulham \n",
- " 2020-09-12 Liverpool-Leeds United Liverpool \n",
- " 2020-09-12 West Ham United-Newcastle United West Ham United \n",
- " 2020-09-13 Tottenham-Everton Tottenham \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton England \n",
+ " 2020-09-12 Fulham-Arsenal England \n",
+ " 2020-09-12 Liverpool-Leeds United England \n",
+ " 2020-09-12 West Ham United-Newcastle England \n",
+ " 2020-09-13 Tottenham-Everton England \n",
"\n",
- " away_team \\\n",
- "league season game \n",
- "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton Southampton \n",
- " 2020-09-12 Fulham-Arsenal Arsenal \n",
- " 2020-09-12 Liverpool-Leeds United Leeds United \n",
- " 2020-09-12 West Ham United-Newcastle United Newcastle United \n",
- " 2020-09-13 Tottenham-Everton Everton \n",
+ " away_team_country_name \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton England \n",
+ " 2020-09-12 Fulham-Arsenal England \n",
+ " 2020-09-12 Liverpool-Leeds United England \n",
+ " 2020-09-12 West Ham United-Newcastle England \n",
+ " 2020-09-13 Tottenham-Everton England \n",
"\n",
" date \\\n",
"league season game \n",
- "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton 2020-09-12 15:00:00 \n",
- " 2020-09-12 Fulham-Arsenal 2020-09-12 12:30:00 \n",
- " 2020-09-12 Liverpool-Leeds United 2020-09-12 17:30:00 \n",
- " 2020-09-12 West Ham United-Newcastle United 2020-09-12 20:00:00 \n",
- " 2020-09-13 Tottenham-Everton 2020-09-13 16:30:00 \n",
- "\n",
- " url \\\n",
- "league season game \n",
- "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton https://www.whoscored.com/Matches/1485186/Live... \n",
- " 2020-09-12 Fulham-Arsenal https://www.whoscored.com/Matches/1485187/Live... \n",
- " 2020-09-12 Liverpool-Leeds United https://www.whoscored.com/Matches/1485188/Live... \n",
- " 2020-09-12 West Ham United-Newcastle United https://www.whoscored.com/Matches/1485191/Live... \n",
- " 2020-09-13 Tottenham-Everton https://www.whoscored.com/Matches/1485189/Live... \n",
- "\n",
- " stage \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton 2020-09-12 14:00:00+00:00 \n",
+ " 2020-09-12 Fulham-Arsenal 2020-09-12 11:30:00+00:00 \n",
+ " 2020-09-12 Liverpool-Leeds United 2020-09-12 16:30:00+00:00 \n",
+ " 2020-09-12 West Ham United-Newcastle 2020-09-12 19:00:00+00:00 \n",
+ " 2020-09-13 Tottenham-Everton 2020-09-13 15:30:00+00:00 \n",
+ "\n",
+ " home_score \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton 1 \n",
+ " 2020-09-12 Fulham-Arsenal 0 \n",
+ " 2020-09-12 Liverpool-Leeds United 4 \n",
+ " 2020-09-12 West Ham United-Newcastle 0 \n",
+ " 2020-09-13 Tottenham-Everton 0 \n",
+ "\n",
+ " away_score \\\n",
"league season game \n",
- "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton NaN \n",
- " 2020-09-12 Fulham-Arsenal NaN \n",
- " 2020-09-12 Liverpool-Leeds United NaN \n",
- " 2020-09-12 West Ham United-Newcastle United NaN \n",
- " 2020-09-13 Tottenham-Everton NaN "
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton 0 \n",
+ " 2020-09-12 Fulham-Arsenal 3 \n",
+ " 2020-09-12 Liverpool-Leeds United 3 \n",
+ " 2020-09-12 West Ham United-Newcastle 2 \n",
+ " 2020-09-13 Tottenham-Everton 1 \n",
+ "\n",
+ " incidents \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton [{'minute': '13', 'type': 1, 'subType': 1, 'pl... \n",
+ " 2020-09-12 Fulham-Arsenal [{'minute': '8', 'type': 1, 'subType': 1, 'pla... \n",
+ " 2020-09-12 Liverpool-Leeds United [{'minute': '4', 'type': 1, 'subType': 2, 'pla... \n",
+ " 2020-09-12 West Ham United-Newcastle [{'minute': '56', 'type': 1, 'subType': 1, 'pl... \n",
+ " 2020-09-13 Tottenham-Everton [{'minute': '55', 'type': 1, 'subType': 1, 'pl... \n",
+ "\n",
+ " bets \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton None \n",
+ " 2020-09-12 Fulham-Arsenal None \n",
+ " 2020-09-12 Liverpool-Leeds United None \n",
+ " 2020-09-12 West Ham United-Newcastle None \n",
+ " 2020-09-13 Tottenham-Everton None \n",
+ "\n",
+ " aggregate_winner_field \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton None \n",
+ " 2020-09-12 Fulham-Arsenal None \n",
+ " 2020-09-12 Liverpool-Leeds United None \n",
+ " 2020-09-12 West Ham United-Newcastle None \n",
+ " 2020-09-13 Tottenham-Everton None \n",
+ "\n",
+ " winner_field \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton 0.0 \n",
+ " 2020-09-12 Fulham-Arsenal 1.0 \n",
+ " 2020-09-12 Liverpool-Leeds United 0.0 \n",
+ " 2020-09-12 West Ham United-Newcastle 1.0 \n",
+ " 2020-09-13 Tottenham-Everton 1.0 \n",
+ "\n",
+ " period \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton 7 \n",
+ " 2020-09-12 Fulham-Arsenal 7 \n",
+ " 2020-09-12 Liverpool-Leeds United 7 \n",
+ " 2020-09-12 West Ham United-Newcastle 7 \n",
+ " 2020-09-13 Tottenham-Everton 7 \n",
+ "\n",
+ " extra_result_field \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton None \n",
+ " 2020-09-12 Fulham-Arsenal None \n",
+ " 2020-09-12 Liverpool-Leeds United None \n",
+ " 2020-09-12 West Ham United-Newcastle None \n",
+ " 2020-09-13 Tottenham-Everton None \n",
+ "\n",
+ " home_extratime_score \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton None \n",
+ " 2020-09-12 Fulham-Arsenal None \n",
+ " 2020-09-12 Liverpool-Leeds United None \n",
+ " 2020-09-12 West Ham United-Newcastle None \n",
+ " 2020-09-13 Tottenham-Everton None \n",
+ "\n",
+ " away_extratime_score \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton None \n",
+ " 2020-09-12 Fulham-Arsenal None \n",
+ " 2020-09-12 Liverpool-Leeds United None \n",
+ " 2020-09-12 West Ham United-Newcastle None \n",
+ " 2020-09-13 Tottenham-Everton None \n",
+ "\n",
+ " home_penalty_score \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton None \n",
+ " 2020-09-12 Fulham-Arsenal None \n",
+ " 2020-09-12 Liverpool-Leeds United None \n",
+ " 2020-09-12 West Ham United-Newcastle None \n",
+ " 2020-09-13 Tottenham-Everton None \n",
+ "\n",
+ " away_penalty_score \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton None \n",
+ " 2020-09-12 Fulham-Arsenal None \n",
+ " 2020-09-12 Liverpool-Leeds United None \n",
+ " 2020-09-12 West Ham United-Newcastle None \n",
+ " 2020-09-13 Tottenham-Everton None \n",
+ "\n",
+ " started_at_utc \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton 2020-09-12T14:01:42Z \n",
+ " 2020-09-12 Fulham-Arsenal 2020-09-12T11:32:23Z \n",
+ " 2020-09-12 Liverpool-Leeds United 2020-09-12T16:30:21Z \n",
+ " 2020-09-12 West Ham United-Newcastle 2020-09-12T19:00:32Z \n",
+ " 2020-09-13 Tottenham-Everton 2020-09-13T15:30:20Z \n",
+ "\n",
+ " first_half_ended_at_utc \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton None \n",
+ " 2020-09-12 Fulham-Arsenal None \n",
+ " 2020-09-12 Liverpool-Leeds United None \n",
+ " 2020-09-12 West Ham United-Newcastle None \n",
+ " 2020-09-13 Tottenham-Everton None \n",
+ "\n",
+ " second_half_started_at_utc \\\n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton 2020-09-12T15:04:01Z \n",
+ " 2020-09-12 Fulham-Arsenal 2020-09-12T12:35:50Z \n",
+ " 2020-09-12 Liverpool-Leeds United 2020-09-12T17:32:57Z \n",
+ " 2020-09-12 West Ham United-Newcastle 2020-09-12T20:03:20Z \n",
+ " 2020-09-13 Tottenham-Everton 2020-09-13T16:31:33Z \n",
+ "\n",
+ " stage \n",
+ "league season game \n",
+ "ENG-Premier League 2021 2020-09-12 Crystal Palace-Southampton None \n",
+ " 2020-09-12 Fulham-Arsenal None \n",
+ " 2020-09-12 Liverpool-Leeds United None \n",
+ " 2020-09-12 West Ham United-Newcastle None \n",
+ " 2020-09-13 Tottenham-Everton None "
]
},
"execution_count": 5,
@@ -475,31 +1045,32 @@
" | \n",
" | \n",
" | \n",
+ " game_id | \n",
" period | \n",
" minute | \n",
+ " second | \n",
" expanded_minute | \n",
" type | \n",
" outcome_type | \n",
+ " team_id | \n",
" team | \n",
+ " player_id | \n",
" player | \n",
- " qualifiers | \n",
" x | \n",
" y | \n",
" end_x | \n",
" end_y | \n",
" goal_mouth_y | \n",
" goal_mouth_z | \n",
+ " blocked_x | \n",
+ " blocked_y | \n",
+ " qualifiers | \n",
" is_touch | \n",
" is_shot | \n",
" is_goal | \n",
+ " card_type | \n",
" related_event_id | \n",
" related_player_id | \n",
- " blocked_x | \n",
- " blocked_y | \n",
- " card_type | \n",
- " game_id | \n",
- " team_id | \n",
- " player_id | \n",
" \n",
" \n",
" league | \n",
@@ -531,6 +1102,7 @@
" | \n",
" | \n",
" | \n",
+ " | \n",
"
\n",
" \n",
" \n",
@@ -539,149 +1111,162 @@
" 2021 | \n",
" 2021-01-12 Burnley-Manchester United | \n",
" 2253458317 | \n",
+ " 1485184 | \n",
" PreMatch | \n",
" 0 | \n",
+ " 0.0 | \n",
" 0 | \n",
" FormationSet | \n",
" Successful | \n",
+ " 184 | \n",
" Burnley | \n",
" NaN | \n",
- " [{'type': {'displayName': 'TeamPlayerFormation... | \n",
+ " NaN | \n",
" 0.0 | \n",
" 0.0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " False | \n",
" NaN | \n",
" NaN | \n",
+ " [{'type': {'displayName': 'TeamPlayerFormation... | \n",
+ " False | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " 1485184 | \n",
- " 184 | \n",
- " NaN | \n",
" \n",
" \n",
" 2253458375 | \n",
+ " 1485184 | \n",
" PreMatch | \n",
" 0 | \n",
+ " 0.0 | \n",
" 0 | \n",
" FormationSet | \n",
" Successful | \n",
+ " 32 | \n",
" Man Utd | \n",
" NaN | \n",
- " [{'type': {'displayName': 'CaptainPlayerId', '... | \n",
+ " NaN | \n",
" 0.0 | \n",
" 0.0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " False | \n",
- " NaN | \n",
" NaN | \n",
" NaN | \n",
+ " [{'type': {'displayName': 'CaptainPlayerId', '... | \n",
+ " False | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " 1485184 | \n",
- " 32 | \n",
" NaN | \n",
"
\n",
" \n",
" 2253487469 | \n",
+ " 1485184 | \n",
" FirstHalf | \n",
" 0 | \n",
+ " 0.0 | \n",
" 0 | \n",
" Start | \n",
" Successful | \n",
+ " 184 | \n",
" Burnley | \n",
" NaN | \n",
- " [] | \n",
+ " NaN | \n",
" 0.0 | \n",
" 0.0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " False | \n",
" NaN | \n",
" NaN | \n",
+ " [] | \n",
+ " False | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " 1485184 | \n",
- " 184 | \n",
- " NaN | \n",
"
\n",
" \n",
" 2253487473 | \n",
+ " 1485184 | \n",
" FirstHalf | \n",
" 0 | \n",
+ " 0.0 | \n",
" 0 | \n",
" Start | \n",
" Successful | \n",
+ " 32 | \n",
" Man Utd | \n",
" NaN | \n",
- " [] | \n",
+ " NaN | \n",
" 0.0 | \n",
" 0.0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " False | \n",
- " NaN | \n",
" NaN | \n",
" NaN | \n",
+ " [] | \n",
+ " False | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " 1485184 | \n",
- " 32 | \n",
" NaN | \n",
"
\n",
" \n",
" 2253487625 | \n",
+ " 1485184 | \n",
" FirstHalf | \n",
" 0 | \n",
+ " 0.0 | \n",
" 0 | \n",
" Pass | \n",
" Successful | \n",
+ " 184 | \n",
" Burnley | \n",
+ " 79050.0 | \n",
" Ashley Westwood | \n",
- " [{'type': {'displayName': 'Angle', 'value': 21... | \n",
" 50.3 | \n",
" 50.3 | \n",
" 30.5 | \n",
" 50.3 | \n",
" NaN | \n",
" NaN | \n",
- " True | \n",
" NaN | \n",
" NaN | \n",
+ " [{'type': {'displayName': 'Angle', 'value': 21... | \n",
+ " True | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " 1485184 | \n",
- " 184 | \n",
- " 79050.0 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
+ " game_id \\\n",
+ "league season game id \n",
+ "ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 1485184 \n",
+ " 2253458375 1485184 \n",
+ " 2253487469 1485184 \n",
+ " 2253487473 1485184 \n",
+ " 2253487625 1485184 \n",
+ "\n",
" period \\\n",
"league season game id \n",
"ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 PreMatch \n",
@@ -698,6 +1283,14 @@
" 2253487473 0 \n",
" 2253487625 0 \n",
"\n",
+ " second \\\n",
+ "league season game id \n",
+ "ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 0.0 \n",
+ " 2253458375 0.0 \n",
+ " 2253487469 0.0 \n",
+ " 2253487473 0.0 \n",
+ " 2253487625 0.0 \n",
+ "\n",
" expanded_minute \\\n",
"league season game id \n",
"ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 0 \n",
@@ -722,6 +1315,14 @@
" 2253487473 Successful \n",
" 2253487625 Successful \n",
"\n",
+ " team_id \\\n",
+ "league season game id \n",
+ "ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 184 \n",
+ " 2253458375 32 \n",
+ " 2253487469 184 \n",
+ " 2253487473 32 \n",
+ " 2253487625 184 \n",
+ "\n",
" team \\\n",
"league season game id \n",
"ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 Burnley \n",
@@ -730,6 +1331,14 @@
" 2253487473 Man Utd \n",
" 2253487625 Burnley \n",
"\n",
+ " player_id \\\n",
+ "league season game id \n",
+ "ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 NaN \n",
+ " 2253458375 NaN \n",
+ " 2253487469 NaN \n",
+ " 2253487473 NaN \n",
+ " 2253487625 79050.0 \n",
+ "\n",
" player \\\n",
"league season game id \n",
"ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 NaN \n",
@@ -738,14 +1347,6 @@
" 2253487473 NaN \n",
" 2253487625 Ashley Westwood \n",
"\n",
- " qualifiers \\\n",
- "league season game id \n",
- "ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 [{'type': {'displayName': 'TeamPlayerFormation... \n",
- " 2253458375 [{'type': {'displayName': 'CaptainPlayerId', '... \n",
- " 2253487469 [] \n",
- " 2253487473 [] \n",
- " 2253487625 [{'type': {'displayName': 'Angle', 'value': 21... \n",
- "\n",
" x \\\n",
"league season game id \n",
"ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 0.0 \n",
@@ -794,6 +1395,30 @@
" 2253487473 NaN \n",
" 2253487625 NaN \n",
"\n",
+ " blocked_x \\\n",
+ "league season game id \n",
+ "ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 NaN \n",
+ " 2253458375 NaN \n",
+ " 2253487469 NaN \n",
+ " 2253487473 NaN \n",
+ " 2253487625 NaN \n",
+ "\n",
+ " blocked_y \\\n",
+ "league season game id \n",
+ "ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 NaN \n",
+ " 2253458375 NaN \n",
+ " 2253487469 NaN \n",
+ " 2253487473 NaN \n",
+ " 2253487625 NaN \n",
+ "\n",
+ " qualifiers \\\n",
+ "league season game id \n",
+ "ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 [{'type': {'displayName': 'TeamPlayerFormation... \n",
+ " 2253458375 [{'type': {'displayName': 'CaptainPlayerId', '... \n",
+ " 2253487469 [] \n",
+ " 2253487473 [] \n",
+ " 2253487625 [{'type': {'displayName': 'Angle', 'value': 21... \n",
+ "\n",
" is_touch \\\n",
"league season game id \n",
"ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 False \n",
@@ -818,38 +1443,6 @@
" 2253487473 NaN \n",
" 2253487625 NaN \n",
"\n",
- " related_event_id \\\n",
- "league season game id \n",
- "ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 NaN \n",
- " 2253458375 NaN \n",
- " 2253487469 NaN \n",
- " 2253487473 NaN \n",
- " 2253487625 NaN \n",
- "\n",
- " related_player_id \\\n",
- "league season game id \n",
- "ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 NaN \n",
- " 2253458375 NaN \n",
- " 2253487469 NaN \n",
- " 2253487473 NaN \n",
- " 2253487625 NaN \n",
- "\n",
- " blocked_x \\\n",
- "league season game id \n",
- "ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 NaN \n",
- " 2253458375 NaN \n",
- " 2253487469 NaN \n",
- " 2253487473 NaN \n",
- " 2253487625 NaN \n",
- "\n",
- " blocked_y \\\n",
- "league season game id \n",
- "ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 NaN \n",
- " 2253458375 NaN \n",
- " 2253487469 NaN \n",
- " 2253487473 NaN \n",
- " 2253487625 NaN \n",
- "\n",
" card_type \\\n",
"league season game id \n",
"ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 NaN \n",
@@ -858,29 +1451,21 @@
" 2253487473 NaN \n",
" 2253487625 NaN \n",
"\n",
- " game_id \\\n",
- "league season game id \n",
- "ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 1485184 \n",
- " 2253458375 1485184 \n",
- " 2253487469 1485184 \n",
- " 2253487473 1485184 \n",
- " 2253487625 1485184 \n",
- "\n",
- " team_id \\\n",
- "league season game id \n",
- "ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 184 \n",
- " 2253458375 32 \n",
- " 2253487469 184 \n",
- " 2253487473 32 \n",
- " 2253487625 184 \n",
+ " related_event_id \\\n",
+ "league season game id \n",
+ "ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 NaN \n",
+ " 2253458375 NaN \n",
+ " 2253487469 NaN \n",
+ " 2253487473 NaN \n",
+ " 2253487625 NaN \n",
"\n",
- " player_id \n",
- "league season game id \n",
- "ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 NaN \n",
- " 2253458375 NaN \n",
- " 2253487469 NaN \n",
- " 2253487473 NaN \n",
- " 2253487625 79050.0 "
+ " related_player_id \n",
+ "league season game id \n",
+ "ENG-Premier League 2021 2021-01-12 Burnley-Manchester United 2253458317 NaN \n",
+ " 2253458375 NaN \n",
+ " 2253487469 NaN \n",
+ " 2253487473 NaN \n",
+ " 2253487625 NaN "
]
},
"execution_count": 7,
@@ -911,7 +1496,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 8,
"id": "dfd8f019",
"metadata": {},
"outputs": [
@@ -956,10 +1541,18 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 9,
"id": "2078b018",
"metadata": {},
"outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/cw/dtaijupiter/NoCsBack/dtai/pieterr/Projects/soccerdata/.venv/lib/python3.11/site-packages/socceraction/spadl/opta.py:219: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n",
+ " ).bfill()\n"
+ ]
+ },
{
"data": {
"text/html": [
@@ -1090,7 +1683,7 @@
" 38.828 | \n",
" 11 | \n",
" 0 | \n",
- " 0 | \n",
+ " 4 | \n",
" 4 | \n",
" Robbie Brady | \n",
" Burnley | \n",
@@ -1112,7 +1705,7 @@
"1 31.080 38.220 36.312 15.844 0 1 0 \n",
"2 38.220 43.365 15.844 12.512 21 1 0 \n",
"3 43.365 90.300 12.512 49.708 0 1 0 \n",
- "4 90.300 105.000 49.708 38.828 11 0 0 \n",
+ "4 90.300 105.000 49.708 38.828 11 0 4 \n",
"\n",
" action_id player team \n",
"0 0 Ashley Westwood Burnley \n",
@@ -1122,7 +1715,7 @@
"4 4 Robbie Brady Burnley "
]
},
- "execution_count": 13,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -1134,10 +1727,18 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 10,
"id": "10f8a086",
"metadata": {},
"outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/cw/dtaijupiter/NoCsBack/dtai/pieterr/Projects/soccerdata/.venv/lib/python3.11/site-packages/socceraction/spadl/opta.py:219: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n",
+ " ).bfill()\n"
+ ]
+ },
{
"data": {
"text/html": [
@@ -1294,7 +1895,7 @@
"4 Matthew Lowton Burnley "
]
},
- "execution_count": 14,
+ "execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
@@ -1306,7 +1907,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 11,
"id": "1cff9142",
"metadata": {},
"outputs": [
@@ -1701,8 +2302,8 @@
" qualifiers | \n",
" related_player_id | \n",
" touch | \n",
- " shot | \n",
" goal | \n",
+ " shot | \n",
" type_name | \n",
" \n",
" \n",
@@ -1848,7 +2449,7 @@
"3 23.3 {178: True, 213: '5.0', 212: '21.7', 141: '23.... \n",
"4 73.1 {1: True, 213: '0.7', 56: 'Center', 178: True,... \n",
"\n",
- " related_player_id touch shot goal type_name \n",
+ " related_player_id touch goal shot type_name \n",
"0 NaN False False False start \n",
"1 NaN False False False start \n",
"2 NaN True False False pass \n",
@@ -1896,7 +2497,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "soccerdata",
+ "display_name": "/home/pieterr/Jupiter/Projects/soccerdata",
"language": "python",
"name": "soccerdata"
},
@@ -1910,7 +2511,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.6"
+ "version": "3.11.1"
},
"toc": {
"base_numbering": 1,
diff --git a/soccerdata/_common.py b/soccerdata/_common.py
index bfab09f6..09cb5a9b 100644
--- a/soccerdata/_common.py
+++ b/soccerdata/_common.py
@@ -452,6 +452,8 @@ def _download_and_save( # noqa: C901
response = self._driver.execute_script(
"return document.body.innerHTML;"
).encode("utf-8")
+ if response == b"":
+ raise Exception("Empty response.")
else:
if not isinstance(var, str):
raise NotImplementedError("Only implemented for single variables.")
diff --git a/soccerdata/whoscored.py b/soccerdata/whoscored.py
index 19d16009..2c108ba1 100644
--- a/soccerdata/whoscored.py
+++ b/soccerdata/whoscored.py
@@ -6,7 +6,7 @@
import time
from datetime import datetime
from pathlib import Path
-from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union
+from typing import Callable, Dict, Iterable, List, Optional, Union
import numpy as np
import pandas as pd
@@ -14,12 +14,8 @@
from selenium.common.exceptions import (
ElementClickInterceptedException,
NoSuchElementException,
- TimeoutException,
)
from selenium.webdriver.common.by import By
-from selenium.webdriver.remote.webelement import WebElement
-from selenium.webdriver.support import expected_conditions as ec
-from selenium.webdriver.support.ui import WebDriverWait
from ._common import (
BaseSeleniumReader,
@@ -127,6 +123,44 @@ def _parse_datetime(ts: str) -> datetime:
return datetime.strptime(ts, "%A, %b %d %Y %H:%M")
+def _parse_url(url: str) -> Dict:
+ """Parse a URL from WhoScored.
+
+ Parameters
+ ----------
+ url : str
+ URL to parse.
+
+ Raises
+ ------
+ ValueError
+ If the URL could not be parsed.
+
+ Returns
+ -------
+ dict
+ """
+ patt = (
+ r"^(?:https:\/\/www.whoscored.com)?\/"
+ + r"(?:Regions\/(\d+)\/)?"
+ + r"(?:Tournaments\/(\d+)\/)?"
+ + r"(?:Seasons\/(\d+)\/)?"
+ + r"(?:Stages\/(\d+)\/)?"
+ + r"(?:Matches\/(\d+)\/)?"
+ )
+ matches = re.search(patt, url)
+ if matches:
+ return {
+ "region_id": matches.group(1),
+ "league_id": matches.group(2),
+ "season_id": matches.group(3),
+ "stage_id": matches.group(4),
+ "match_id": matches.group(5),
+ }
+ else:
+ raise ValueError(f"Could not parse URL: {url}")
+
+
class WhoScored(BaseSeleniumReader):
"""Provides pd.DataFrames from data available at http://whoscored.com.
@@ -224,7 +258,6 @@ def read_leagues(self) -> pd.DataFrame:
"region": region["name"],
"league_id": league["id"],
"league": league["name"],
- "url": league["url"],
}
)
@@ -249,7 +282,11 @@ def read_seasons(self) -> pd.DataFrame:
seasons = []
for lkey, league in df_leagues.iterrows():
- url = WHOSCORED_URL + league.url
+ url = (
+ WHOSCORED_URL
+ + f"/Regions/{league['region_id']}"
+ + f"/Tournaments/{league['league_id']}"
+ )
filemask = "seasons/{}.html"
filepath = self.data_dir / filemask.format(lkey)
reader = self.get(url, filepath, var=None)
@@ -258,12 +295,15 @@ def read_seasons(self) -> pd.DataFrame:
tree = html.parse(reader)
for node in tree.xpath("//select[contains(@id,'seasons')]/option"):
# extract team IDs from links
+ season_url = node.get("value")
+ season_id = _parse_url(season_url)["season_id"]
seasons.append(
{
- "url": node.get("value"),
"league": lkey,
- "league_id": league.league_id,
"season": season_code(node.text),
+ "region_id": league.region_id,
+ "league_id": league.league_id,
+ "season_id": season_id,
}
)
@@ -275,88 +315,76 @@ def read_seasons(self) -> pd.DataFrame:
)
return df
- def _parse_season_stages(self) -> List[Dict]:
- match_selector = (
- "//div[contains(@id,'tournament-fixture')]//div[contains(@class,'divtable-row')]"
- )
- WebDriverWait(self._driver, 30, poll_frequency=1).until(
- ec.presence_of_element_located((By.XPATH, match_selector))
- )
- node_stages_selector = "//select[contains(@id,'stages')]/option"
- node_stages = self._driver.find_elements(By.XPATH, node_stages_selector)
- stages = []
- for stage in node_stages:
- if not re.search(r"Grp. ([A-Z])$", stage.text):
- # there is always a page with all group stage games combined
- stages.append({"url": stage.get_attribute("value"), "name": stage.text})
- return stages
-
- def _parse_schedule_page(self) -> Tuple[List[Dict], Optional[WebElement]]:
- match_selector = (
- "//div[contains(@id,'tournament-fixture')]//div[contains(@class,'divtable-row')]"
- )
- date_selector = "./div[contains(@class,'divtable-header')]"
- time_selector = "./div[contains(@class,'time')]"
- home_team_selector = "./div[contains(@class,'team home')]//a"
- away_team_selector = "./div[contains(@class,'team away')]//a"
- result_selector = "./div[contains(@class,'result')]//a"
+ def read_season_stages(self, force_cache: bool = False) -> pd.DataFrame:
+ """Retrieve the season stages for the selected leagues.
- try:
- WebDriverWait(self._driver, 30, poll_frequency=1).until(
- ec.presence_of_element_located((By.XPATH, match_selector))
+ Parameters
+ ----------
+ force_cache : bool
+ By default no cached data is used for the current season.
+ If True, will force the use of cached data anyway.
+
+ Returns
+ -------
+ pd.DataFrame
+ """
+ df_seasons = self.read_seasons()
+ filemask = "seasons/{}_{}.html"
+
+ season_stages = []
+ for (lkey, skey), season in df_seasons.iterrows():
+ current_season = not self._is_complete(lkey, skey)
+
+ # get season page
+ url = (
+ WHOSCORED_URL
+ + f"/Regions/{season['region_id']}"
+ + f"/Tournaments/{season['league_id']}"
+ + f"/Seasons/{season['season_id']}"
)
- date_str = None
- schedule_page = []
- for node in self._driver.find_elements(By.XPATH, match_selector):
- if node.get_attribute("data-id"):
- match_id = int(node.get_attribute("data-id"))
- time_str = node.find_element(By.XPATH, time_selector).get_attribute(
- "textContent"
- )
- match_url = node.find_element(By.XPATH, result_selector).get_attribute("href")
- schedule_page.append(
- {
- "date": _parse_datetime(f"{date_str} {time_str}"),
- "home_team": node.find_element(By.XPATH, home_team_selector).text,
- "away_team": node.find_element(By.XPATH, away_team_selector).text,
- "game_id": match_id,
- "url": match_url,
- }
- )
- else:
- date_str = node.find_element(By.XPATH, date_selector).text
- logger.info("Scraping game schedule for %s", date_str)
- except TimeoutException:
- schedule_page = []
+ filepath = self.data_dir / filemask.format(lkey, skey)
+ reader = self.get(url, filepath, var=None, no_cache=current_season and not force_cache)
+ tree = html.parse(reader)
- try:
- next_page_selector = (
- "//div[contains(@id,'date-controller')]"
- "/a[contains(@class,'previous') and not(contains(@class, 'is-disabled'))]"
+ # get default season stage
+ fixtures_url = tree.xpath("//a[text()='Fixtures']/@href")[0]
+ stage_id = _parse_url(fixtures_url)["stage_id"]
+ season_stages.append(
+ {
+ "league": lkey,
+ "season": skey,
+ "region_id": season.region_id,
+ "league_id": season.league_id,
+ "season_id": season.season_id,
+ "stage_id": stage_id,
+ "stage": None,
+ }
)
- next_page = self._driver.find_element(By.XPATH, next_page_selector)
- except NoSuchElementException:
- next_page = None
- return schedule_page, next_page
-
- def _parse_schedule(self, stage: Optional[str] = None) -> List[Dict]:
- schedule = []
- # Parse first page
- page_schedule, next_page = self._parse_schedule_page()
- schedule.extend(page_schedule)
- # Go to next page
- while next_page is not None:
- try:
- next_page.click()
- time.sleep(5)
- logger.debug("Next page")
- except ElementClickInterceptedException:
- self._handle_banner()
- # Parse next page
- page_schedule, next_page = self._parse_schedule_page()
- schedule.extend(page_schedule)
- schedule = [dict(item, stage=stage) for item in schedule]
- return schedule
+
+ # extract additional stages
+ for node in tree.xpath("//select[contains(@id,'stages')]/option"):
+ stage_url = node.get("value")
+ stage_id = _parse_url(stage_url)["stage_id"]
+ season_stages.append(
+ {
+ "league": lkey,
+ "season": skey,
+ "region_id": season.region_id,
+ "league_id": season.league_id,
+ "season_id": season.season_id,
+ "stage_id": stage_id,
+ "stage": node.text,
+ }
+ )
+
+ df = (
+ pd.DataFrame(season_stages)
+ .drop_duplicates(subset=["league", "season", "stage_id"], keep="last")
+ .set_index(["league", "season"])
+ .sort_index()
+ .loc[itertools.product(self.leagues, self.seasons)]
+ )
+ return df
def read_schedule(self, force_cache: bool = False) -> pd.DataFrame: # noqa: C901
"""Retrieve the game schedule for the selected leagues and seasons.
@@ -371,74 +399,82 @@ def read_schedule(self, force_cache: bool = False) -> pd.DataFrame: # noqa: C90
-------
pd.DataFrame
"""
- df_seasons = self.read_seasons()
- filemask = "matches/{}_{}.csv"
+ df_season_stages = self.read_season_stages(force_cache=force_cache)
+ filemask_schedule = "matches/{}_{}_{}_{}.json"
all_schedules = []
- for (lkey, skey), season in df_seasons.iterrows():
- filepath = self.data_dir / filemask.format(lkey, skey)
- url = WHOSCORED_URL + season.url
-
- schedule = []
- is_current_season = not self._is_complete(lkey, skey)
- no_cache = (not filepath.exists()) or self.no_cache
- if (is_current_season and not force_cache) or no_cache:
- # Scrape the season's schedule
- self._driver.get(url)
-
- # Check if season consists of multiple stages
- stages = self._parse_season_stages()
-
- # Handle a multi-stage season
- if len(stages) > 0:
- for stage in stages:
- url = WHOSCORED_URL + stage["url"].replace("Show", "Fixtures")
- self._driver.get(url)
- try:
- WebDriverWait(self._driver, 30, poll_frequency=1).until(
- ec.presence_of_element_located(
- (By.XPATH, "//div[@id='tournament-fixture']")
- )
- )
- except TimeoutException:
- # Tournaments sometimes do not have a fixtures page,
- # the summary page has to be used instead
- url = WHOSCORED_URL + stage["url"]
- self._driver.get(url)
- logger.info("Scraping game schedule with stage=%s from %s", stage, url)
- schedule.extend(self._parse_schedule(stage=stage["name"]))
-
- # Handle a single-stage season
- else:
- fixtures_nav_selector = "//a[text()='Fixtures']"
- fixtures_nav = self._driver.find_element(By.XPATH, fixtures_nav_selector)
- self._driver.get(fixtures_nav.get_attribute("href"))
- try:
- WebDriverWait(self._driver, 30, poll_frequency=1).until(
- ec.presence_of_element_located(
- (By.XPATH, "//div[@id='tournament-fixture']")
- )
- )
- except TimeoutException:
- # Tournaments sometimes do not have a fixtures page,
- # the summary page has to be used instead
- summary_nav_selector = "//a[text()='Fixtures']"
- summary_nav = self._driver.find_element(By.XPATH, summary_nav_selector)
- self._driver.get(summary_nav.get_attribute("href"))
- logger.info("Scraping game schedule from %s", url)
- schedule.extend(self._parse_schedule())
-
- # Cache the data
- df_schedule = pd.DataFrame(schedule).assign(league=lkey, season=skey)
- if not self.no_store:
- df_schedule.to_csv(filepath, index=False)
-
+ for (lkey, skey), stage in df_season_stages.iterrows():
+ current_season = not self._is_complete(lkey, skey)
+ stage_id = stage["stage_id"]
+ stage_name = stage["stage"]
+
+ # get the calendar of the season stage
+ season_stage_url = (
+ WHOSCORED_URL
+ + f"/Regions/{stage['region_id']}"
+ + f"/Tournaments/{stage['league_id']}"
+ + f"/Seasons/{stage['season_id']}"
+ + f"/Stages/{stage['stage_id']}"
+ )
+ if stage_name is not None:
+ calendar_filepath = self.data_dir / "matches/{}_{}_{}.html".format(
+ lkey, skey, stage_id
+ )
+ logger.info(
+ "Retrieving calendar for %s %s (%s)",
+ lkey,
+ skey,
+ stage_name,
+ )
else:
- # Load cached data
- logger.info("Retrieving game schedule of %s - %s from the cache", lkey, skey)
- df_schedule = pd.read_csv(filepath)
+ calendar_filepath = self.data_dir / f"matches/{lkey}_{skey}.html"
+ logger.info(
+ "Retrieving calendar for %s %s",
+ lkey,
+ skey,
+ )
+ calendar = self.get(
+ season_stage_url,
+ calendar_filepath,
+ var="wsCalendar",
+ no_cache=current_season and not force_cache,
+ )
+ mask = json.load(calendar)["mask"]
+
+ # get the fixtures for each month
+ it = [(year, month) for year in mask.keys() for month in mask[year].keys()]
+ for i, (year, month) in enumerate(it):
+ filepath = self.data_dir / filemask_schedule.format(lkey, skey, stage_id, month)
+ url = WHOSCORED_URL + f"/tournaments/{stage_id}/data/?d={year}{(int(month)+1):02d}"
+
+ if stage_name is not None:
+ logger.info(
+ "[%s/%s] Retrieving fixtures for %s %s (%s)",
+ i + 1,
+ len(it),
+ lkey,
+ skey,
+ stage_name,
+ )
+ else:
+ logger.info(
+ "[%s/%s] Retrieving fixtures for %s %s",
+ i + 1,
+ len(it),
+ lkey,
+ skey,
+ )
- all_schedules.append(df_schedule)
+ reader = self.get(
+ url, filepath, var=None, no_cache=current_season and not force_cache
+ )
+ data = json.load(reader)
+ for tournament in data["tournaments"]:
+ df_schedule = pd.DataFrame(tournament["matches"])
+ df_schedule["league"] = lkey
+ df_schedule["season"] = skey
+ df_schedule["stage"] = stage_name
+ all_schedules.append(df_schedule)
if len(all_schedules) == 0:
return pd.DataFrame(index=["league", "season", "game"])
@@ -446,15 +482,24 @@ def read_schedule(self, force_cache: bool = False) -> pd.DataFrame: # noqa: C90
# Construct the output dataframe
df = (
pd.concat(all_schedules)
- .drop_duplicates()
+ .drop_duplicates(subset=["id"])
.replace(
{
- "home_team": TEAMNAME_REPLACEMENTS,
- "away_team": TEAMNAME_REPLACEMENTS,
+ "homeTeamName": TEAMNAME_REPLACEMENTS,
+ "awayTeamName": TEAMNAME_REPLACEMENTS,
+ }
+ )
+ .rename(
+ columns={
+ "homeTeamName": "home_team",
+ "awayTeamName": "away_team",
+ "id": "game_id",
+ "startTimeUtc": "date",
}
)
.assign(date=lambda x: pd.to_datetime(x["date"]))
.assign(game=lambda df: df.apply(make_game_id, axis=1))
+ .pipe(standardize_colnames)
.set_index(["league", "season", "game"])
.sort_index()
)