Skip to content

Commit

Permalink
Merge pull request #45 from NREL/scale_postprocessing_results
Browse files Browse the repository at this point in the history
Add columns from building characteristics report, including units_represented
  • Loading branch information
nmerket authored May 23, 2019
2 parents 9f0e582 + 0e37712 commit d28d1b3
Show file tree
Hide file tree
Showing 8 changed files with 99 additions and 11 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
name: Run PyTest
command: |
source env/bin/activate
pytest
pytest -v
- run:
name: Run coverage tests
when: always
Expand Down
31 changes: 25 additions & 6 deletions buildstockbatch/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,30 @@ def flatten_datapoint_json(d):
}
for k1, k2s in cols_to_keep.items():
for k2 in k2s:
new_d['{}.{}'.format(k1, k2)] = d.get(k1, {}).get(k2)
for k1 in ('BuildExistingModel', 'SimulationOutputReport'):
for k2, v in d.get(k1, {}).items():
new_d['{}.{}'.format(k1, k2)] = v
new_d[f'{k1}.{k2}'] = d.get(k1, {}).get(k2)

# copy over all the key and values from BuildExistingModel
col1 = 'BuildExistingModel'
for k, v in d.get(col1, {}).items():
new_d[f'{col1}.{k}'] = v

# if there are some key, values in BuildingCharacteristicsReport that aren't part of BuildExistingModel, copy them
# and make it part of BuildExistingModel
col2 = 'BuildingCharacteristicsReport'
for k, v in d.get(col2, {}).items():
if k not in d.get(col1, {}):
new_d[f'{col1}.{k}'] = v # Using col1 to make it part of BuildExistingModel

# if there is no units_represented key, default to 1
units = int(new_d.get(f'{col1}.units_represented', 1))
new_d[f'{col1}.units_represented'] = units
col3 = 'SimulationOutputReport'
for k, v in d.get(col3, {}).items():
new_d[f'{col3}.{k}'] = v

new_d['building_id'] = new_d['BuildExistingModel.building_id']
del new_d['BuildExistingModel.building_id']

return new_d


Expand Down Expand Up @@ -613,11 +631,12 @@ def write_output(group_pq):
if not os.path.isfile(full_path):
continue
new_pq = pd.read_parquet(full_path, engine='pyarrow')
new_pq.rename(columns=to_camelcase, inplace=True)

building_id_match = re.search(r'bldg(\d+)', folder)
assert building_id_match, f"The building results folder format should be: ~bldg(\\d+). Got: {folder} "
new_pq['building_id'] = int(building_id_match.group(1))
new_pq.rename(columns=lambda x: x.replace(':', '_').replace('[', "").replace("]", ""), inplace=True)
building_id = int(building_id_match.group(1))
new_pq['building_id'] = building_id
parquets.append(new_pq)

pq_size = (sum([sys.getsizeof(pq) for pq in parquets]) + sys.getsizeof(parquets)) / (1024 * 1024)
Expand Down
77 changes: 73 additions & 4 deletions buildstockbatch/test/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,75 @@ def test_missing_simulation_output_report_applicable(basic_residential_project_f
assert((~df['simulation_output_report.applicable']).any())


def test_combine_files_flexible(basic_residential_project_file):
# Allows addition/removable/rename of columns. For columns that remain unchanged, verifies that the data matches
# with stored test_results. If this test passes but test_combine_files fails, then test_results/parquet and
# test_results/results_csvs need to be updated with new data *if* columns were indeed supposed to be added/
# removed/renamed.

project_filename, results_dir = basic_residential_project_file()

with patch.object(BuildStockBatchBase, 'weather_dir', None), \
patch.object(BuildStockBatchBase, 'get_dask_client') as get_dask_client_mock, \
patch.object(BuildStockBatchBase, 'results_dir', results_dir):
bsb = BuildStockBatchBase(project_filename)
bsb.process_results()
get_dask_client_mock.assert_called_once()

def simplify_columns(colname):
return colname.lower().replace('_', '')

# test results.csv files
reference_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'test_results', 'results_csvs')
test_path = os.path.join(results_dir, 'results_csvs')

test_csv = pd.read_csv(os.path.join(test_path, 'results_up00.csv.gz')).rename(columns=simplify_columns).\
sort_values('buildingid').reset_index().drop(columns=['index'])
reference_csv = pd.read_csv(os.path.join(reference_path, 'results_up00.csv.gz')).rename(columns=simplify_columns).\
sort_values('buildingid').reset_index().drop(columns=['index'])
mutul_cols = list(set(test_csv.columns).intersection(set(reference_csv)))
pd.testing.assert_frame_equal(test_csv[mutul_cols], reference_csv[mutul_cols])

test_csv = pd.read_csv(os.path.join(test_path, 'results_up01.csv.gz')).rename(columns=simplify_columns).\
sort_values('buildingid').reset_index().drop(columns=['index'])
reference_csv = pd.read_csv(os.path.join(reference_path, 'results_up01.csv.gz')).rename(columns=simplify_columns).\
sort_values('buildingid').reset_index().drop(columns=['index'])
mutul_cols = list(set(test_csv.columns).intersection(set(reference_csv)))
pd.testing.assert_frame_equal(test_csv[mutul_cols], reference_csv[mutul_cols])

# test parquet files
reference_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'test_results', 'parquet')
test_path = os.path.join(results_dir, 'parquet')

test_pq = pd.read_parquet(os.path.join(test_path, 'baseline', 'results_up00.parquet')).\
rename(columns=simplify_columns).sort_values('buildingid').reset_index().drop(columns=['index'])
reference_pq = pd.read_parquet(os.path.join(reference_path, 'baseline', 'results_up00.parquet')).\
rename(columns=simplify_columns).sort_values('buildingid').reset_index().drop(columns=['index'])
mutul_cols = list(set(test_pq.columns).intersection(set(reference_pq)))
pd.testing.assert_frame_equal(test_pq[mutul_cols], reference_pq[mutul_cols])

test_pq = pd.read_parquet(os.path.join(test_path, 'upgrades', 'upgrade=1', 'results_up01.parquet')).\
rename(columns=simplify_columns).sort_values('buildingid').reset_index().drop(columns=['index'])
reference_pq = pd.read_parquet(os.path.join(reference_path, 'upgrades', 'upgrade=1', 'results_up01.parquet')).\
rename(columns=simplify_columns).sort_values('buildingid').reset_index().drop(columns=['index'])
mutul_cols = list(set(test_pq.columns).intersection(set(reference_pq)))
pd.testing.assert_frame_equal(test_pq[mutul_cols], reference_pq[mutul_cols])

test_pq = pd.read_parquet(os.path.join(test_path, 'timeseries', 'upgrade=0', 'Group0.parquet')).\
rename(columns=simplify_columns).sort_values(['buildingid', 'time']).reset_index().drop(columns=['index'])
reference_pq = pd.read_parquet(os.path.join(reference_path, 'timeseries', 'upgrade=0', 'Group0.parquet')).\
rename(columns=simplify_columns).sort_values(['buildingid', 'time']).reset_index().drop(columns=['index'])
mutul_cols = list(set(test_pq.columns).intersection(set(reference_pq)))
pd.testing.assert_frame_equal(test_pq[mutul_cols], reference_pq[mutul_cols])

test_pq = pd.read_parquet(os.path.join(test_path, 'timeseries', 'upgrade=1', 'Group0.parquet')).\
rename(columns=simplify_columns).sort_values(['buildingid', 'time']).reset_index().drop(columns=['index'])
reference_pq = pd.read_parquet(os.path.join(reference_path, 'timeseries', 'upgrade=1', 'Group0.parquet')).\
rename(columns=simplify_columns).sort_values(['buildingid', 'time']).reset_index().drop(columns=['index'])
mutul_cols = list(set(test_pq.columns).intersection(set(reference_pq)))
pd.testing.assert_frame_equal(test_pq[mutul_cols], reference_pq[mutul_cols])


def test_provide_buildstock_csv(basic_residential_project_file):
with tempfile.TemporaryDirectory() as buildstock_csv_dir:
buildstock_csv = os.path.join(buildstock_csv_dir, 'buildstock.csv')
Expand Down Expand Up @@ -192,15 +261,15 @@ def test_combine_files(basic_residential_project_file):
pd.testing.assert_frame_equal(test_pq, reference_pq)

test_pq = pd.read_parquet(os.path.join(test_path, 'timeseries', 'upgrade=0', 'Group0.parquet')).\
sort_values(['building_id', 'Time']).reset_index().drop(columns=['index'])
sort_values(['building_id', 'time']).reset_index().drop(columns=['index'])
reference_pq = pd.read_parquet(os.path.join(reference_path, 'timeseries', 'upgrade=0', 'Group0.parquet'))\
.sort_values(['building_id', 'Time']).reset_index().drop(columns=['index'])
.sort_values(['building_id', 'time']).reset_index().drop(columns=['index'])
pd.testing.assert_frame_equal(test_pq, reference_pq)

test_pq = pd.read_parquet(os.path.join(test_path, 'timeseries', 'upgrade=1', 'Group0.parquet'))\
.sort_values(['building_id', 'Time']).reset_index().drop(columns=['index'])
.sort_values(['building_id', 'time']).reset_index().drop(columns=['index'])
reference_pq = pd.read_parquet(os.path.join(reference_path, 'timeseries', 'upgrade=1', 'Group0.parquet'))\
.sort_values(['building_id', 'Time']).reset_index().drop(columns=['index'])
.sort_values(['building_id', 'time']).reset_index().drop(columns=['index'])
pd.testing.assert_frame_equal(test_pq, reference_pq)


Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified buildstockbatch/test/test_results/results_csvs/results_up00.csv.gz
Binary file not shown.
Binary file not shown.

0 comments on commit d28d1b3

Please sign in to comment.