-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_top_4_system.py
138 lines (124 loc) · 4.86 KB
/
generate_top_4_system.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import pandas as pd
import numpy as np
import random
import os
# Set random seed for reproducibility
np.random.seed(42)
# Top 4 teams
top_teams = [
'Manchester City',
'Arsenal',
'Manchester United',
'Liverpool'
]
# Detailed player names for each team
team_players = {
'Manchester City': [
'Erling Haaland', 'Kevin De Bruyne', 'Rodri', 'Phil Foden',
'Jack Grealish', 'Bernardo Silva', 'Ruben Dias', 'John Stones',
'Kyle Walker', 'Ederson', 'Julian Alvarez', 'Rico Lewis'
],
'Arsenal': [
'Bukayo Saka', 'Martin Odegaard', 'Gabriel Jesus', 'Gabriel Martinelli',
'William Saliba', 'Declan Rice', 'Aaron Ramsdale', 'Gabriel Magalhaes',
'Ben White', 'Oleksandr Zinchenko', 'Kai Havertz', 'Eddie Nketiah'
],
'Manchester United': [
'Bruno Fernandes', 'Marcus Rashford', 'Rasmus Hojlund', 'Mason Mount',
'Lisandro Martinez', 'Harry Maguire', 'Raphael Varane', 'Casemiro',
'Scott McTominay', 'David de Gea', 'Antony', 'Luke Shaw'
],
'Liverpool': [
'Mohamed Salah', 'Virgil van Dijk', 'Alisson', 'Trent Alexander-Arnold',
'Andy Robertson', 'Darwin Nunez', 'Cody Gakpo', 'Luis Diaz',
'Dominik Szoboszlai', 'Wataru Endo', 'Joel Matip', 'Ibrahima Konate'
]
}
# Positions with their typical distribution for top teams
positions = ['Goalkeeper', 'Defender', 'Midfielder', 'Forward']
position_weights = [0.15, 0.30, 0.30, 0.25]
# Function to generate realistic player stats
def generate_player_stats(position, player_name=None):
# Adjust base rating and stats for known players
if player_name in [
'Erling Haaland', 'Kevin De Bruyne', 'Mohamed Salah', 'Virgil van Dijk',
'Bruno Fernandes', 'Bukayo Saka', 'Martin Odegaard'
]:
base_rating = random.uniform(85, 92)
else:
base_rating = random.uniform(70, 85)
if position == 'Goalkeeper':
return {
'Rating': base_rating,
'Goals': random.randint(0, 1),
'Assists': random.randint(0, 2),
'Passes_Completed': random.randint(50, 300),
'Pass_Accuracy': random.uniform(75, 95),
'Shot_Accuracy': random.uniform(40, 60),
'Tackles_Won': random.randint(20, 100)
}
elif position == 'Defender':
return {
'Rating': base_rating,
'Goals': random.randint(0, 5),
'Assists': random.randint(0, 5),
'Passes_Completed': random.randint(100, 500),
'Pass_Accuracy': random.uniform(80, 95),
'Shot_Accuracy': random.uniform(50, 75),
'Tackles_Won': random.randint(50, 200)
}
elif position == 'Midfielder':
return {
'Rating': base_rating,
'Goals': random.randint(3, 15),
'Assists': random.randint(5, 20),
'Passes_Completed': random.randint(200, 700),
'Pass_Accuracy': random.uniform(85, 95),
'Shot_Accuracy': random.uniform(60, 85),
'Tackles_Won': random.randint(30, 150)
}
else: # Forward
return {
'Rating': base_rating,
'Goals': random.randint(10, 30),
'Assists': random.randint(5, 15),
'Passes_Completed': random.randint(50, 300),
'Pass_Accuracy': random.uniform(70, 85),
'Shot_Accuracy': random.uniform(75, 90),
'Tackles_Won': random.randint(10, 50)
}
# Generate player data
all_players = []
for team in top_teams:
team_player_names = team_players[team]
for player_name in team_player_names:
# Determine position based on known player
if any(name in player_name for name in ['Goalkeeper', 'Alisson', 'Ederson', 'de Gea', 'Ramsdale']):
position = 'Goalkeeper'
elif any(name in player_name for name in ['van Dijk', 'Dias', 'Saliba', 'Maguire', 'Martinez', 'Varane']):
position = 'Defender'
elif any(name in player_name for name in ['De Bruyne', 'Fernandes', 'Odegaard', 'Saka', 'Rice', 'Casemiro']):
position = 'Midfielder'
else:
position = 'Forward'
# Generate player
player = {
'Player_Name': player_name,
'Club': team,
'Position': position
}
# Add stats
player.update(generate_player_stats(position, player_name))
all_players.append(player)
# Create DataFrame
df = pd.DataFrame(all_players)
# Save to CSV
os.makedirs('/Users/niladridas/Documents/ml-soccer/data', exist_ok=True)
df.to_csv('/Users/niladridas/Documents/ml-soccer/data/player_stats.csv', index=False)
print(f"Generated dataset with {len(df)} players across {len(top_teams)} top teams")
print("\nDataset Preview:")
print(df)
print("\nTeam Distribution:")
print(df['Club'].value_counts())
print("\nPosition Distribution:")
print(df['Position'].value_counts())