Skip to content

Commit

Permalink
add HDF5 compression for background objects (#328)
Browse files Browse the repository at this point in the history
  • Loading branch information
guynir42 authored Jul 16, 2024
1 parent 02db299 commit 36f6af8
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 9 deletions.
5 changes: 3 additions & 2 deletions models/background.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,9 @@ def save( self, filename=None, **kwargs ):
f"Variance shape {self.variance.shape} does not match image shape {self.image_shape}"
)

bggrp.create_dataset( 'counts', data=self.counts )
bggrp.create_dataset( 'variance', data=self.variance )
opts = dict(compression='gzip', compression_opts=1, chunks=(128, 128))
bggrp.create_dataset( 'counts', data=self.counts, **opts )
bggrp.create_dataset( 'variance', data=self.variance, **opts )
elif self.format == 'polynomial':
raise NotImplementedError('Currently we do not support a polynomial background model. ')
bggrp.create_dataset( 'coeffs', data=self.counts )
Expand Down
3 changes: 0 additions & 3 deletions models/base.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
import warnings
import sys
import os
import math
import types
import hashlib
import pathlib
import logging
import json
import shutil
import datetime
from uuid import UUID

Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def pytest_sessionfinish(session, exitstatus):

dbsession.commit()

verify_archive_database_empty = False # set to False to avoid spurious errors at end of tests (when debugging)
verify_archive_database_empty = True # set to False to avoid spurious errors at end of tests (when debugging)

if any_objects and verify_archive_database_empty:
raise RuntimeError('There are objects in the database. Some tests are not properly cleaning up!')
Expand Down
2 changes: 1 addition & 1 deletion tests/fixtures/datastore_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def make_datastore(
if image_cache_path is not None and output_path != image_cache_path:
warnings.warn(f'cache path {image_cache_path} does not match output path {output_path}')
else:
cache_base_name = output_path[:-10] # remove the '.image.fits' part
cache_base_name = output_path[:-16] # remove the '.image.fits.json' part
ds.cache_base_name = output_path
SCLogger.debug(f'Saving image to cache at: {output_path}')
use_cache = True # the two other conditions are true to even get to this part...
Expand Down
15 changes: 13 additions & 2 deletions tests/models/test_background.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
import os

import sep
import time

import pytest
import numpy as np
import h5py
Expand Down Expand Up @@ -72,9 +77,15 @@ def test_save_load_backgrounds(decam_raw_image, code_version):
with pytest.raises(RuntimeError, match='Counts shape .* does not match image shape .*'):
b2.save()

b2.counts = np.random.normal(bg_mean, 1, size=image.data.shape)
b2.variance = np.random.normal(bg_var, 1, size=image.data.shape)
# use actual background measurements so we can get a realistic estimate of the compression
back = sep.Background(image.data)
b2.counts = back.back()
b2.variance = back.rms() ** 2

t0 = time.perf_counter()
b2.save()
# print(f'Background save time: {time.perf_counter() - t0:.3f} s')
# print(f'Background file size: {os.path.getsize(b2.get_fullpath()) / 1024 ** 2:.3f} MB')

# check the filename contains the provenance hash
assert prov.id[:6] in b2.get_fullpath()
Expand Down
4 changes: 4 additions & 0 deletions util/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# ======================================================================
# Functions for copying FileOnDisk objects to/from cache


def copy_to_cache(FoD, cache_dir, filepath=None):
"""Save a copy of the object (and, potentially, associated files) into a cache directory.
Expand Down Expand Up @@ -79,6 +80,7 @@ def copy_to_cache(FoD, cache_dir, filepath=None):

return json_filepath


def copy_list_to_cache(obj_list, cache_dir, filepath=None):
"""Copy a correlated list of objects to the cache directory.
Expand Down Expand Up @@ -137,6 +139,7 @@ def copy_list_to_cache(obj_list, cache_dir, filepath=None):

return json_filepath


def copy_from_cache(cls, cache_dir, filepath):
"""Copy and reconstruct an object from the cache directory.
Expand Down Expand Up @@ -200,6 +203,7 @@ def copy_from_cache(cls, cache_dir, filepath):

return output


def copy_list_from_cache(cls, cache_dir, filepath):
"""Copy and reconstruct a list of objects from the cache directory.
Expand Down

0 comments on commit 36f6af8

Please sign in to comment.