-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbounceforounce.py
120 lines (84 loc) · 4.17 KB
/
bounceforounce.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# This program takes .csv files created by beerscrape.py (using beermenus.com) to find beers with mist alc per volume per price
# Created as an exercise in working with pandas
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
# Used to determine if a word is contained in the type column
def checkword(x,word):
#return any(word in wor for wor in x['type'].split())
return word in x['type']
# allow for the mean of the coulumn in a stated group. If group doent exits returns none
def group_mean(groupobj,group,column,mask):
if any(mask):
return groupobj.get_group(group)[column].mean()
else:
return 'none'
plt.figure()
plt.ion()
# csvnames = ['bigtopbeverage.csv','draketavern.csv','neshaminycreekbrewing.csv','thegingermanboston.csv']
# #csvnames = ['bigtopbeverage.csv','draketavern.csv','neshaminycreekbrewing.csv']
# csvnames = ['draketavern.csv','neshaminycreekbrewing.csv','thegingermanboston.csv']
csvnames = glob.glob('*.csv') # Get all csv in the current folder
for nme in csvnames:
#beers = pd.read_csv('thegingermanboston.csv')
beers = pd.read_csv(nme)
beers.name = beers.iloc[-1]['beer name']
# create a mask that will get rid of every beer where there is a NaN in ABV, Qaunity, oz, or price
#beermask = np.isnan(beers['ABV']) | np.isnan(beers['quanitity']) | np.isnan(beers['oz']) | np.isnan(beers['price'])
beermask = (beers['ABV']>0) & (beers['quanitity']>0) & (beers['oz']>0) & (beers['price']>0)
#beermask = ~ beermask
# dataframe of beers containing no NaNs
bf=beers[beermask].copy()
# quanity times oz is the total volume (oz), times ABV/100 is the amount of alchohol, then divided by price
# to get alcohol per dollar
bf['alc'] = bf['quanitity']*bf['oz']*bf['ABV']/100/bf['price']
bf.name = beers.name
# create a dataframe sorted from lowest alc to highest
bfs = bf.sort_values(by='alc')
# Check for single servings instead
plt.title('Single Servings')
singlemask = (bfs['quanitity']==1) & (bfs['oz']<25)
bfsingle = bfs[singlemask].copy()
bfsingle.name=bf.name
if not any(singlemask):
continue #if there is no single serving skip this csv and continue for loop
bf=bfsingle
bf.name=bfsingle.name
# create a new column for beer family. Assume at 1st all are other until a specific family is found
bf['family'] = 'other'
bf['type'] = bf['type'].fillna('')
# create a boolean mask where only tpes including word IPA are true
mask_ipa = bf.apply(checkword, axis=1, args=('IPA',))
bf.loc[mask_ipa, 'family'] = 'IPA'
mask_stout = bf.apply(checkword, axis=1, args=('Stout',))
mask_stout = bf.apply(checkword, axis=1, args=('Porter',)) | mask_stout
bf.loc[mask_stout, 'family'] = 'Stout'
mask_cider = bf.apply(checkword, axis=1, args=('Cider',))
bf.loc[mask_cider, 'family'] = 'Cider'
# Count total beers in each catagory
sumother = bf['family'].str.count('other').sum()
sumcider = bf['family'].str.count('Cider').sum()
sumstout = bf['family'].str.count('Stout').sum()
sumipa = bf['family'].str.count('IPA').sum()
# create groups for different family of beers
famgroup = bf.groupby('family')
print(bf.name)
print('total ciders= ',sumcider, ' Avg alc= ', group_mean(famgroup,'Cider','alc',mask_cider),
' avg price= ',group_mean(famgroup,'Cider','price',mask_cider))
print('total stouts= ',sumstout, ' Avg alc= ',group_mean(famgroup,'Stout','alc',mask_stout),
' avg price= ',group_mean(famgroup,'Stout','price',mask_stout))
print('total IPAs= ',sumipa, ' Avg alc= ',group_mean(famgroup,'IPA','alc',mask_ipa),
' avg price= ', group_mean(famgroup,'IPA','price',mask_ipa))
print('others= ',sumother, ' Avg alc= ',group_mean(famgroup,'other','alc',[sumother!=0]),
' avg price= ', group_mean(famgroup,'other','price',[sumother!=0]))
print('')
print(bfs.iloc[-1])
print('')
#bf['alc'].plot.box()
bf['alc'].plot.hist(alpha=0.5,bins=20, label=bf.name)
# for x in ['Stout','Cider','IPA']:
# famgroup.get_group(x)['alc'].plot.hist(alpha=0.8,bins=20)
plt.legend(loc='upper right')
plt.xlabel('Alc/$')
plt.show()