-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
executable file
·241 lines (189 loc) · 8.38 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
# -----------------------------------------------------------------------------
#
# Prerequisites:
#
# Install the required packages in your Python enviroment. Usually these works
#
# pip install pandas
# pip install matplotlib
# pip install sklearn
# pip install -U scikit-learn
#
#
# Recomended for the IDE
#
# pip install mypy
# -----------------------------------------------------------------------------
# Pandas
import pandas as pd
# Seagull
from src.Seagull.Seagull import Seagull
# Plots
from src.Plot.Plot import Plot
from src.Plot.Heatmap.Heatmap import Heatmap
from src.Plot.Barplots.Horizontal_Barplot import Horizontal_Barplot
# Analysis
from src.Analysis.Analysis import Analysis
from src.Analysis.Numerical_Categorical import Numerical_Categorical
from src import constants
# Example of how to construct a Seagull object and play around with it
def main():
# -------------------------------------------------------------------------
# Constants initialization
# -------------------------------------------------------------------------
# Tell in which folder are we saving the images
SAVING_FOLDER = constants.OUT_FOLDER
# -------------------------------------------------------------------------
# Tell which modules do you want to test
TEST_ALL = False
TEST_BASICS = False
TEST_RW = True
TEST_CATEGORIES = False
TEST_MIXED = False
TEST_HEATMAPS = False
TEST_CSV = False
TEST_BARPLOTS = False
TEST_NUMCATS = False
TEST_VODOROY = False
# -------------------------------------------------------------------------
# Example Heatmaps:
#
# - Create a default spotify dataset. This is composed of three tables
# Table 0: Artists
# Table 1: Songs
# Table 2: Which artist compose each song
#
# - Prepare the data for a heatmap.
# We will plot the top 20 artist, and which month of the year they
# release their songs.
#
# - Show how to plot a heatmap
# -------------------------------------------------------------------------
if(TEST_ALL or TEST_HEATMAPS):
print()
print(" -- Example Heatmap -- ")
print()
# ----------------------------------------------------------------------
# Load the spotify data into the three different datasets
# ----------------------------------------------------------------------
spotify_instances = Seagull.get_spotify_datasets()
spotifyArtitstDF = spotify_instances[0]
spotifyArtitstDF.print_overview()
spotifySongsDF = spotify_instances[1]
spotifySongsDF.print_overview()
spotifyComposersDF = spotify_instances[2]
spotifyComposersDF.print_overview()
# ----------------------------------------------------------------------
# Prepare the data for the plots
#
# ---- Get the top 20 artists
# ---- Count how many songs each release each month
# ---- Normalize the data from 0 to 1
#
# ----------------------------------------------------------------------
# How many artists do you want
totalTopArtists = 20
# Prepare the dataframe that will be use in the Heatmap later
heatmapDataDF = Seagull(totalTopArtists , 13)
heatmapDataDF.renameColumns(["Artist", "January", "February", "March", "April", "May", "June",
"July", "August", "September", "October","November","December"])
# Get the top 20 artists
topArtistsDF = spotifyArtitstDF.copy()
topArtistsDF.keepColumnTopValues(2, topValues = totalTopArtists)
topArtistsDF.print_overview()
# Initialize the heatmap data with the artists names
heatmapDataDF[:,0] = topArtistsDF[:,1]
# Initialize the rest of the heatmap data with zeros which are integers
for i in range(12):
heatmapDataDF.setColumnZeroes(i+1)
heatmapDataDF.print_overview()
# For each artist, get the number of songs released in each month
for i in range(totalTopArtists):
# Get the artist ID
artistID = topArtistsDF[i,0]
artistName = topArtistsDF[i,1]
# Search for the song made by this artist ID
songsIDs = spotifyComposersDF.getPanda().iloc[spotifyComposersDF.getPanda().iloc[:, 1].values == artistID, 0].values
# For each song, get the month of the year
for j in range(len(songsIDs)):
songID = songsIDs[j]
# Get the month of the year
currentSong = spotifySongsDF.getPanda().iloc[spotifySongsDF.getPanda().iloc[:, 0].values == songID, ]
currentMonth = currentSong.iloc[0,4]
# Add one to the heatmap
heatmapDataDF[i,currentMonth] = heatmapDataDF[i,currentMonth] + 1
# Show the heatmap data
heatmapDataDF.print_overview()
# Normalize the data by rows
# In this case, we are interested in the percentage of songs released in each month
# and check whether there is a trend of top 20 artist releasing, for example, in summer
heatmapDataDF.normalize(column=False, avoidFirstColumn=True)
heatmapDataDF.print_overview()
# Until here, the data is ready.
# Now let's do the plotting
# This is the default initialization of the heatmap object.
myImportantHeatmap = Heatmap(SAVING_FOLDER)
# Update the heatmap with the new data and show it
myImportantHeatmap.update_from_seagull(heatmapDataDF)
# Lets give it new labels
myImportantHeatmap.set_name("Top_20_artists_by_month")
myImportantHeatmap.set_title("Top 20 artists and the month of the year they release their songs")
myImportantHeatmap.set_x_label("Month")
myImportantHeatmap.set_y_label("Top 20 artists")
# Show the figure, this open up a window in runtime
myImportantHeatmap.show()
# Show the plot in the terminal via string representation
print(myImportantHeatmap)
# Save the plot
myImportantHeatmap.save()
# -------------------------------------------------------------------------
# Example CSV:
#
# - Create a Seagull object from a csv file
# - Print an overview of such object
# -------------------------------------------------------------------------
if(TEST_ALL or TEST_CSV):
print()
print(" -- Example CSV -- ")
print()
# ---------------------------------------------------------------------
# Prepare the dataframe
# ---------------------------------------------------------------------
irisDF = Seagull()
irisDF.loadFromCSV(constants.IRIS_PATH)
irisDF.print_overview()
# -------------------------------------------------------------------------
# Example 4:
#
# - Do a barplot
# -------------------------------------------------------------------------
if(TEST_ALL or TEST_BARPLOTS):
print()
print(" -- Example Barplots -- ")
print()
# ---------------------------------------------------------------------
# Init a random barplot, show it, and save it
# ---------------------------------------------------------------------
myHB = Horizontal_Barplot(SAVING_FOLDER)
myHB.show()
print(myHB)
myHB.save()
# -------------------------------------------------------------------------
# Example 4:
#
# - Do a barplot
# -------------------------------------------------------------------------
if(TEST_ALL or TEST_NUMCATS):
print()
print(" -- Example Numerical Categorical Analysis -- ")
print()
myAnalysis = Numerical_Categorical(folder_path = SAVING_FOLDER)
results = myAnalysis.get_parametric_pvalue()
print()
print(results)
print()
print(myAnalysis)
myAnalysis.save()
# Call the main function
if __name__ == "__main__":
main()