-
Notifications
You must be signed in to change notification settings - Fork 0
/
wr_simulate.py
182 lines (154 loc) · 6.68 KB
/
wr_simulate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
## Monte-Carlo Simulations
import os
import pickle
import random
import argparse
import csv
import numpy as np
import pandas as pd
import squigglepy as sq
from datetime import date
from email.policy import default
from scipy import stats
from pprint import pprint
from functools import partial
from collections import defaultdict
parser = argparse.ArgumentParser(description='Generate welfare ranges')
parser.add_argument('--species', type=str, help="What species do you want to simulate the welfare range of?")
parser.add_argument('--unknown_prob', type=float, help="What probability do you assign Unknown judgements for this species?", default=0)
parser.add_argument('--weight_no', type=str, help="Do you want to give non-zero probability to lean no and likely no?")
parser.add_argument('--hc_weight', type=float, help="What weight do high-confidence proxies get relative to other proxies?")
parser.add_argument('--scenarios', type=int, help='How many Monte Carlo simulations to run?', default=10000)
parser.add_argument('--csv', type=str, help='Define the relative path to the CSV with the species scores information')
parser.add_argument('--path', type=str, help='Define a custom path for the saved model outputs', default='')
parser.add_argument('--save', type=bool, help='Set to False to not save (overwrite) model outputs', default=True)
parser.add_argument('--update_every', type=int, help='How many steps to run before updating?', default=1000)
parser.add_argument('--verbose', type=bool, help='Set to True to get scenario-specific output', default=False)
args = parser.parse_args()
SPECIES = args.species
UNKNOWN_PROB = args.unknown_prob
WEIGHT_NO = args.weight_no
HC_WEIGHT = args.hc_weight
N_SCENARIOS = args.scenarios
VERBOSE = args.verbose
CSV = args.csv
SAVE = args.save
PATH = args.path
update_every = args.update_every
SENT_SPECIES = ['bees', 'cockroaches', 'fruit_flies', 'ants', \
'c_elegans', 'crabs', 'crayfish', 'earthworms', \
'sea_hares', 'moon_jellyfish', 'spiders', 'octopuses', \
'plants', 'prokaryotes', 'protists', 'chickens', \
'cows', 'humans', 'sometimes_operates', 'bsf', \
'carp', 'salmon', 'silkworms', 'pigs']
SCENARIO_RANGES = [1, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99] # Generate printed output for these percentiles
judgements = pd.read_csv(os.path.join('input_data', 'WR Judgments.csv'))
hc_csv = os.path.join('input_data', 'WR High-Confidence Proxies.csv')
hc_proxies = set()
with open(hc_csv, newline='') as f:
reader = csv.reader(f)
hc_proxies_lists = list(reader)
for item in hc_proxies_lists:
hc_proxies.add(item[0])
sent_hc_csv = os.path.join('input_data', 'Sentience High-Confidence Proxies.csv')
sent_hc_proxies = set()
with open(sent_hc_csv, newline='') as f:
reader = csv.reader(f)
sent_hc_proxies_lists = list(reader)
for item in sent_hc_proxies_lists:
sent_hc_proxies.add(item[0])
overlap_csv = os.path.join('input_data', 'Proxy Overlap.csv')
overlap_dict = {}
with open(overlap_csv) as f:
reader = csv.reader(f, delimiter=',')
for idx, rec in enumerate(reader):
if idx == 0:
continue
else:
sent_proxy = rec[0].strip()
in_both = rec[1].strip()
corr_proxy = rec[2].strip()
if in_both == "y":
if corr_proxy not in overlap_dict:
overlap_dict[corr_proxy] = []
overlap_dict[corr_proxy].append(sent_proxy)
if WEIGHT_NO == "Yes":
judgment_prob_map = {'likely no': {'lower': 0, 'upper': 0.25},
'lean no': {'lower': 0.25, 'upper': 0.50},
'lean yes': {'lower': 0.50, 'upper': 0.75},
'likely yes': {'lower': 0.75, 'upper': 1.00},
'unknown': UNKNOWN_PROB}
else:
judgment_prob_map = {'likely no': {'lower': 0, 'upper': 0},
'lean no': {'lower': 0, 'upper': 0},
'lean yes': {'lower': 0.50, 'upper': 0.75},
'likely yes': {'lower': 0.75, 'upper': 1.00},
'unknown': UNKNOWN_PROB}
species_scores = judgements[["proxies", SPECIES]]
simulated_probs = {}
simulated_scores = {}
if SPECIES in SENT_SPECIES:
sent_scores = pickle.load(open('{}_simulated_scores.p'.format(os.path.join('output_data', "sent_{}".format(SPECIES))), 'rb'))
for proxy in species_scores["proxies"].to_list():
simulated_probs[proxy] = []
simulated_scores[proxy] = []
for s in range(N_SCENARIOS):
if s % update_every == 0:
if VERBOSE:
print('-')
print('### SCENARIO {} ###'.format(s + 1))
else:
print('... Completed {}/{}'.format(s + 1, N_SCENARIOS))
#Set up for iteration over proxies
num_proxies = len(species_scores)
bern_probs = []
proxies_arr = []
judgements = species_scores[SPECIES]
#Iterate over proxies
for ii in range(0,num_proxies):
proxy = species_scores.proxies[ii]
if SPECIES in SENT_SPECIES and proxy in overlap_dict.keys():
sent_proxies = overlap_dict[proxy]
a = 0
count = 0
for sent_proxy in sent_proxies:
count += 1
score = sent_scores[sent_proxy][s]
if proxy in hc_proxies:
if sent_proxy in sent_hc_proxies:
a += score
else:
a += score*HC_WEIGHT
else:
if sent_proxy in sent_hc_proxies:
a += score/HC_WEIGHT
else:
a += score
avg_score = a/count
simulated_scores[proxy].append(avg_score)
else:
judgement = judgements[ii]
if judgement == 'unknown':
proxy_prob = judgment_prob_map[judgement]
else:
lower_prob = judgment_prob_map[judgement]['lower']
upper_prob = judgment_prob_map[judgement]['upper']
proxy_prob = random.uniform(lower_prob, upper_prob)
bern_probs.append(proxy_prob)
proxies_arr.append(proxy)
#Obtain bernoulli draws
num_non_sent_proxies = len(bern_probs)
draws = stats.bernoulli.rvs(bern_probs,size=num_non_sent_proxies)
#Store bernoulli draw results
for ii in range(0,num_non_sent_proxies):
proxy = proxies_arr[ii]
has_proxy = draws[ii]
if proxy in hc_proxies:
score = HC_WEIGHT*has_proxy
else:
score = has_proxy
simulated_probs[proxy].append(proxy_prob)
simulated_scores[proxy].append(score)
if SAVE:
print('... Saving 1/1')
pickle.dump(simulated_scores, open('{}simulated_scores.p'.format(PATH), 'wb'))