Skip to content

Commit

Permalink
limit ref maker to PTF reference images only (upto 2010)
Browse files Browse the repository at this point in the history
  • Loading branch information
guynir42 committed Jun 27, 2024
1 parent ad15b87 commit 0b2c0e6
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 36 deletions.
20 changes: 10 additions & 10 deletions pipeline/ref_maker.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def __init__(self, **kwargs):
self.coadd_ex_prov = None # the provenance used to make the products of the coadd image
self.ref_upstream_hash = None # a hash identifying all upstreams of the reference provenance
self.ref_prov = None # the provenance of the reference itself
self.ref_set = None # the RefSet object that was found / created
self.refset = None # the RefSet object that was found / created

# these attributes tell us the place in the sky where we want to look for objects (given to run())
# optionally it also specifies which filter we want the reference to be in
Expand Down Expand Up @@ -393,18 +393,18 @@ def make_refset(self, session=None):

# now load or create a RefSet
for i in range(5): # a concurrent merge sort of loop
self.ref_set = dbsession.scalars(sa.select(RefSet).where(RefSet.name == self.pars.name)).first()
self.refset = dbsession.scalars(sa.select(RefSet).where(RefSet.name == self.pars.name)).first()

if self.ref_set is not None:
if self.refset is not None:
break
else: # not found any RefSet with this name
try:
self.ref_set = RefSet(
self.refset = RefSet(
name=self.pars.name,
description=self.pars.description,
upstream_hash=self.ref_upstream_hash,
)
dbsession.add(self.ref_set)
dbsession.add(self.refset)
dbsession.commit()
except IntegrityError as e:
# there was a violation on unique constraint on the "name" column:
Expand All @@ -416,20 +416,20 @@ def make_refset(self, session=None):
else: # if we didn't break out of the loop, there must have been some integrity error
raise e

if self.ref_set is None:
if self.refset is None:
raise RuntimeError(f'Failed to find or create a RefSet with the name "{self.pars.name}"!')

if self.ref_set.upstream_hash != self.ref_upstream_hash:
if self.refset.upstream_hash != self.ref_upstream_hash:
raise RuntimeError(
f'Found a RefSet with the name "{self.pars.name}", but it has a different upstream_hash!'
)

# If the provenance is not already on the RefSet, add it (or raise, if allow_append=False)
if self.ref_prov.id not in [p.id for p in self.ref_set.provenances]:
if self.ref_prov.id not in [p.id for p in self.refset.provenances]:
if self.pars.allow_append:
prov_list = self.ref_set.provenances
prov_list = self.refset.provenances
prov_list.append(self.ref_prov)
self.ref_set.provenances = prov_list # not sure if appending directly will trigger an update to DB
self.refset.provenances = prov_list # not sure if appending directly will trigger an update to DB
dbsession.commit()
else:
raise RuntimeError(
Expand Down
14 changes: 7 additions & 7 deletions pipeline/top_level.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,21 +428,21 @@ def make_provenance_tree(self, exposure, overrides=None, session=None, commit=Tr
is_testing = exp_prov.is_testing

ref_provs = None # allow multiple reference provenances for each refset
ref_set_name = self.subtractor.pars.refset
refset_name = self.subtractor.pars.refset
# If refset is None, we will just fail to produce a subtraction, but everything else works...
# Note that the upstreams for the subtraction provenance will be wrong, because we don't have
# any reference provenances to link to. But this is what you get when putting refset=None.
# Just know that the "output provenance" (e.g., of the Measurements) will never actually exist,
# even though you can use it to make the Report provenance (just so you have something to refer to).
if ref_set_name is not None:
if refset_name is not None:

ref_set = session.scalars(sa.select(RefSet).where(RefSet.name == ref_set_name)).first()
if ref_set is None:
raise ValueError(f'No reference set with name {ref_set_name} found in the database!')
refset = session.scalars(sa.select(RefSet).where(RefSet.name == refset_name)).first()
if refset is None:
raise ValueError(f'No reference set with name {refset_name} found in the database!')

ref_provs = ref_set.provenances
ref_provs = refset.provenances
if ref_provs is None or len(ref_provs) == 0:
raise ValueError(f'No provenances found for reference set {ref_set_name}!')
raise ValueError(f'No provenances found for reference set {refset_name}!')

provs['referencing'] = ref_provs # notice that this is a list, not a single provenance!
for step in PROCESS_OBJECTS: # produce the provenance for this step
Expand Down
4 changes: 2 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def pytest_sessionstart(session):
# Will be executed before the first test

# this is only to make the warnings into errors, so it is easier to track them down...
# warnings.filterwarnings('error', append=True) # comment this out in regular usage
warnings.filterwarnings('error', append=True) # comment this out in regular usage

setup_warning_filters() # load the list of warnings that are to be ignored (not just in tests)
# below are additional warnings that are ignored only during tests:
Expand Down Expand Up @@ -109,7 +109,7 @@ def pytest_sessionfinish(session, exitstatus):

dbsession.commit()

verify_archive_database_empty = False # set to False to avoid spurious errors at end of tests (when debugging)
verify_archive_database_empty = True # set to False to avoid spurious errors at end of tests (when debugging)

if any_objects and verify_archive_database_empty:
raise RuntimeError('There are objects in the database. Some tests are not properly cleaning up!')
Expand Down
10 changes: 5 additions & 5 deletions tests/fixtures/decam.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ def decam_reference(decam_ref_datastore, refmaker_factory):
# ],
# is_testing=True,
# )
prov = maker.ref_set.provenances[0]
prov = maker.refset.provenances[0]
prov = session.merge(prov)

ref = Reference()
Expand Down Expand Up @@ -477,17 +477,17 @@ def decam_refset(refmaker_factory):

refmaker.make_refset()

yield refmaker.ref_set
yield refmaker.refset

# delete all the references and the refset
with SmartSession() as session:
refmaker.ref_set = session.merge(refmaker.ref_set)
for prov in refmaker.ref_set.provenances:
refmaker.refset = session.merge(refmaker.refset)
for prov in refmaker.refset.provenances:
refs = session.scalars(sa.select(Reference).where(Reference.provenance_id == prov.id)).all()
for ref in refs:
session.delete(ref)

session.delete(refmaker.ref_set)
session.delete(refmaker.refset)

session.commit()

Expand Down
6 changes: 3 additions & 3 deletions tests/fixtures/ptf.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,16 +568,16 @@ def ptf_refset(refmaker_factory):

refmaker.make_refset() # this makes a refset without making any references

yield refmaker.ref_set
yield refmaker.refset

# delete all the references and the refset
with SmartSession() as session:
for prov in refmaker.ref_set.provenances:
for prov in refmaker.refset.provenances:
refs = session.scalars(sa.select(Reference).where(Reference.provenance_id == prov.id)).all()
for ref in refs:
session.delete(ref)

session.delete(refmaker.ref_set)
session.delete(refmaker.refset)

session.commit()

Expand Down
26 changes: 17 additions & 9 deletions tests/pipeline/test_making_references.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def test_making_refsets():

new_ref = maker.run(ra=0, dec=0, filter='R')
assert new_ref is None # cannot find a specific reference here
refset = maker.ref_set
refset = maker.refset

assert refset is not None # can produce a reference set without finding a reference
assert all(isinstance(p, Provenance) for p in maker.im_provs)
Expand Down Expand Up @@ -111,7 +111,7 @@ def test_making_refsets():
new_ref = maker.run(ra=0, dec=0, filter='R')
assert new_ref is None # still can't find images there

refset = maker.ref_set
refset = maker.refset
up_hash2 = refset.upstream_hash
assert up_hash1 == up_hash2 # the underlying data MUST be the same
assert len(refset.provenances) == 2
Expand All @@ -126,7 +126,7 @@ def test_making_refsets():
new_ref = maker.run(ra=0, dec=0, filter='R')
assert new_ref is None # still can't find images there

refset2 = maker.ref_set
refset2 = maker.refset
assert len(refset2.provenances) == 1
assert refset2.provenances[0].id == refset.provenances[1].id # these ref-sets share the same provenance!

Expand All @@ -145,14 +145,22 @@ def test_making_references(ptf_reference_images):
ref5 = None

try:
maker = RefMaker(maker={'name': name, 'instruments': ['PTF'], 'min_number': 4, 'max_number': 10})
maker = RefMaker(
maker={
'name': name,
'instruments': ['PTF'],
'min_number': 4,
'max_number': 10,
'end_time': '2010-01-01',
}
)
add_test_parameters(maker) # make sure we have a test parameter on everything
maker.coadd_pipeline.coadder.pars.test_parameter = uuid.uuid4().hex # do not load an existing image

t0 = time.perf_counter()
ref = maker.run(ra=188, dec=4.5, filter='R')
first_time = time.perf_counter() - t0
first_refset = maker.ref_set
first_refset = maker.refset
first_image = ref.image
assert ref is not None

Expand All @@ -165,7 +173,7 @@ def test_making_references(ptf_reference_images):
t0 = time.perf_counter()
ref2 = maker.run(ra=188, dec=4.5, filter='R')
second_time = time.perf_counter() - t0
second_refset = maker.ref_set
second_refset = maker.refset
second_image = ref2.image
assert second_time < first_time * 0.1 # should be much faster, we are reloading the reference set
assert ref2.id == ref.id
Expand All @@ -177,7 +185,7 @@ def test_making_references(ptf_reference_images):
t0 = time.perf_counter()
ref3 = maker.run(ra=188, dec=4.5, filter='R')
third_time = time.perf_counter() - t0
third_refset = maker.ref_set
third_refset = maker.refset
third_image = ref3.image
assert third_time < first_time * 0.1 # should be faster, we are loading the same reference
assert third_refset.id != first_refset.id
Expand All @@ -189,7 +197,7 @@ def test_making_references(ptf_reference_images):
t0 = time.perf_counter()
ref4 = maker.run(ra=188, dec=4.5, filter='R')
fourth_time = time.perf_counter() - t0
fourth_refset = maker.ref_set
fourth_refset = maker.refset
fourth_image = ref4.image
assert fourth_time < first_time * 0.1 # should be faster, we can still re-use the underlying coadd image
assert fourth_refset.id != first_refset.id
Expand All @@ -202,7 +210,7 @@ def test_making_references(ptf_reference_images):
t0 = time.perf_counter()
ref5 = maker.run(ra=188, dec=4.5, filter='R')
fifth_time = time.perf_counter() - t0
fifth_refset = maker.ref_set
fifth_refset = maker.refset
fifth_image = ref5.image
assert np.log10(fifth_time) == pytest.approx(np.log10(first_time), rel=0.2) # should take about the same time
assert ref5.id != ref.id
Expand Down

0 comments on commit 0b2c0e6

Please sign in to comment.