-
Notifications
You must be signed in to change notification settings - Fork 0
/
anoms.sql
113 lines (87 loc) · 5.58 KB
/
anoms.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
-- okay, now I've determined they must be calculating the baseline on a per station basis
-- so let's do that too.
-- first, add column baseline to tavg that contains the baseline for that station
alter table tavg add column baseline float8;
-- then create tmp relation and fill it with the baselines for each station in tavg
-- note that this will ignore stations not appearing in 1961-1990
create table tmp (stnid float8, baseline float8, cnt integer);
insert into tmp (select stnid, avg(mean), count(*) from tavg where year >= 1961 and year <= 1990 group by stnid);
-- out of curiousity, I explored the number of stations the baseline is based on
-- mean: 20
select avg(cnt) from tmp;
-- std dev: 9
select stddev(cnt) from tmp;
-- min: 1
select min(cnt) from tmp;
-- max: 52
select max(cnt) from tmp;
-- also, how many stations will now contribute to the yearly averages?
-- count: 6719/7280 (but at least 2 are corrupt...)
select count(*) from tmp;
-- now transfer those baselines back to tavg
update tavg set baseline = tmp.baseline from tmp where tmp.stnid = tavg.stnid;
-- now icorporate baseline into column "anom"
-- note that after this anom has 9,206 null values (corresponding to null baselines)
-- so that's how many measurements we're ignoring by doing this anomaly business this way
alter table tavg add column anom float8;
update tavg set anom = mean - baseline;
-- now I'll redo the averaging (or random selection) over grid boxes using anom instead of mean
-- find yearly average temperatures by first averaging over the stations in each grid box
-- note: this seriously overweights stations in sparsely populated boxes
-- do they really think this is reasonable?
create table yrly_grid_anoms (year integer, anom float8);
explain insert into yrly_grid_anoms (select tmp.year, avg(tmp.anom) from (select t.year as year, s.gridid as box, avg(t.anom) as anom from tavg as t, stations as s where t.stnid = s.stnid group by t.year, s.gridid) as tmp group by tmp.year);
-- export yrly_grid_avgs so I can graph it in R
-- this is practically identical to their plot
\copy yrly_grid_anoms to 'yrly_grid_anoms.csv' csv header;
-- make the plot without the grid assumption (but with the anomaly assumption)
-- just need to calculate yearly avg anoms and output
create table yrly_anoms (year integer, anom float8);
insert into yrly_anoms (select year, avg(anom) from tavg group by year);
\copy yrly_anoms to 'yrly_anoms.csv' csv header;
-- now redo using different baseline periods
-- baselineA: 1901-2000
-- baselineB: 1991-2000
-- baselineC: 1981-2010
-- baselineD: 2001-2010
-- first let's create a new table to hold the baseline calculations so we don't impact the tavg
-- table too much
create table tavg_baselines (stnid float8, year integer, mean float8, baselineA float8, anomA float8, baselineB float8, anomB float8, baselineC float8, anomC float8, baselineD float8, anomD float8);
insert into tavg_baselines(stnid, year, mean) (select stnid, year, mean from tavg);
-- now redo the baseline calculations four times, once for 1901-2000 and once for 1991-2000
-- once for 1981-2010 and once for 2001-2010
-- 1901-2000
create table tmpA (stnid float8, baseline float8, cnt integer);
insert into tmpA (select stnid, avg(mean), count(*) from tavg_baselines where year >= 1901 and year <= 2000 group by stnid);
update tavg_baselines set baselineA = tmpA.baseline from tmpA where tmpA.stnid = tavg_baselines.stnid;
-- 1991-2000
create table tmpB (stnid float8, baseline float8, cnt integer);
insert into tmpB (select stnid, avg(mean), count(*) from tavg_baselines where year >= 1991 and year <= 2000 group by stnid);
update tavg_baselines set baselineB = tmpB.baseline from tmpB where tmpB.stnid = tavg_baselines.stnid;
-- 1981-2010
create table tmpC (stnid float8, baseline float8, cnt integer);
insert into tmpC (select stnid, avg(mean), count(*) from tavg_baselines where year >= 1981 and year <= 2010 group by stnid);
update tavg_baselines set baselineC = tmpC.baseline from tmpC where tmpC.stnid = tavg_baselines.stnid;
-- 2001-2010
create table tmpD (stnid float8, baseline float8, cnt integer);
insert into tmpD (select stnid, avg(mean), count(*) from tavg_baselines where year >= 2001 and year <= 2010 group by stnid);
update tavg_baselines set baselineD = tmpD.baseline from tmpD where tmpD.stnid = tavg_baselines.stnid;
-- use those baselines to compute the anom columns
update tavg_baselines set anomA = mean - baselineA;
update tavg_baselines set anomB = mean - baselineB;
update tavg_baselines set anomC = mean - baselineC;
update tavg_baselines set anomD = mean - baselineD;
-- use the anoms to compute the yearly averages
create table yrly_anom_baselines (year integer, anomA float8, anomB float8, anomC float8, anomD float8);
insert into yrly_anom_baselines (select year, avg(anomA), avg(anomB), avg(anomC), avg(anomD) from tavg_baselines group by year);
-- export yrly_anom_baselines for plotting
\copy yrly_anom_baselines to 'yrly_anom_baselines4.csv' csv header;
create table tavg_experiment (stnid float8, year integer, mean float8, baseline float8, anom float8);
insert into tavg_experiment(stnid, year, mean) (select stnid, year, mean from tavg);
create table tmpE (stnid float8, baseline float8);
insert into tmpE (select stnid, avg(mean) from tavg_experiment group by stnid);
update tavg_experiment set baseline = tmpE.baseline from tmpE where tmpE.stnid = tavg_experiment.stnid;
update tavg_experiment set anom = mean - baseline;
create table yrly_anom_experiment (year integer, anom float8);
insert into yrly_anom_experiment (select year, avg(anom) from tavg_experiment group by year);
\copy yrly_anom_experiment to 'yrly_anom_varying.csv' csv header;