From 480c291181eb15db38114e15abb12a439101876e Mon Sep 17 00:00:00 2001
From: Rob Knop <rknop@pobox.com>
Date: Wed, 31 Jul 2024 06:35:57 -0700
Subject: [PATCH] Provenance Tagging (#338)

---
 ...4_07_25_1851-05bb57675701_provenancetag.py |  45 ++
 default_config.yaml                           |   2 +
 improc/tools.py                               |   8 +-
 models/base.py                                |   7 +-
 models/image.py                               |  32 ++
 models/provenance.py                          | 146 +++++-
 pipeline/data_store.py                        |  15 +-
 pipeline/top_level.py                         | 117 ++++-
 tests/conftest.py                             |  92 +++-
 tests/fixtures/conductor.py                   |  11 -
 tests/fixtures/datastore_factory.py           |  76 ++-
 tests/fixtures/decam.py                       |  35 +-
 tests/fixtures/pipeline_objects.py            |  26 +-
 tests/fixtures/ptf.py                         | 106 +++--
 tests/models/test_provenance.py               |   3 +-
 tests/pipeline/test_pipeline.py               |  60 ++-
 tests/webap/test_webap.py                     | 157 +++++++
 util/cache.py                                 |  51 +-
 util/radec.py                                 |   4 +-
 util/util.py                                  |   2 +-
 webap/seechange_webap.py                      | 444 +++++++++++++-----
 webap/static/seechange.js                     | 131 ++++--
 22 files changed, 1274 insertions(+), 296 deletions(-)
 create mode 100644 alembic/versions/2024_07_25_1851-05bb57675701_provenancetag.py
 create mode 100644 tests/webap/test_webap.py

diff --git a/alembic/versions/2024_07_25_1851-05bb57675701_provenancetag.py b/alembic/versions/2024_07_25_1851-05bb57675701_provenancetag.py
new file mode 100644
index 00000000..0550a796
--- /dev/null
+++ b/alembic/versions/2024_07_25_1851-05bb57675701_provenancetag.py
@@ -0,0 +1,45 @@
+"""provenancetag
+
+Revision ID: 05bb57675701
+Revises: d86b7dee2172
+Create Date: 2024-07-25 18:51:53.756271
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '05bb57675701'
+down_revision = 'd86b7dee2172'
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('provenance_tags',
+    sa.Column('tag', sa.String(), nullable=False),
+    sa.Column('provenance_id', sa.String(), nullable=True),
+    sa.Column('created_at', sa.DateTime(timezone=True), nullable=False),
+    sa.Column('modified', sa.DateTime(timezone=True), nullable=False),
+    sa.Column('id', sa.BigInteger(), autoincrement=True, nullable=False),
+    sa.ForeignKeyConstraint(['provenance_id'], ['provenances.id'], name='provenance_tags_provenance_id_fkey', ondelete='CASCADE'),
+    sa.PrimaryKeyConstraint('id'),
+    sa.UniqueConstraint('tag', 'provenance_id', name='_provenancetag_prov_tag_uc')
+    )
+    op.create_index(op.f('ix_provenance_tags_created_at'), 'provenance_tags', ['created_at'], unique=False)
+    op.create_index(op.f('ix_provenance_tags_id'), 'provenance_tags', ['id'], unique=False)
+    op.create_index(op.f('ix_provenance_tags_provenance_id'), 'provenance_tags', ['provenance_id'], unique=False)
+    op.create_index(op.f('ix_provenance_tags_tag'), 'provenance_tags', ['tag'], unique=False)
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index(op.f('ix_provenance_tags_tag'), table_name='provenance_tags')
+    op.drop_index(op.f('ix_provenance_tags_provenance_id'), table_name='provenance_tags')
+    op.drop_index(op.f('ix_provenance_tags_id'), table_name='provenance_tags')
+    op.drop_index(op.f('ix_provenance_tags_created_at'), table_name='provenance_tags')
+    op.drop_table('provenance_tags')
+    # ### end Alembic commands ###
diff --git a/default_config.yaml b/default_config.yaml
index e0e4c600..759e2f97 100644
--- a/default_config.yaml
+++ b/default_config.yaml
@@ -93,6 +93,8 @@ pipeline:
   save_before_subtraction: true
   # automatically save all the products at the end of the pipeline run
   save_at_finish: true
+  # the ProvenanceTag that the products of the pipline should be associated with
+  provenance_tag: current
 
 preprocessing:
   # these steps need to be done on the images: either they came like that or we do it in the pipeline
diff --git a/improc/tools.py b/improc/tools.py
index e37b8ebc..5f4c3226 100644
--- a/improc/tools.py
+++ b/improc/tools.py
@@ -239,10 +239,10 @@ def strip_wcs_keywords( hdr ):
 
     """
 
-    basematch = re.compile( "^C(RVAL|RPIX|UNIT|DELT|TYPE)[12]$" )
-    cdmatch = re.compile( "^CD[12]_[12]$" )
-    sipmatch = re.compile( "^[AB]P?_(ORDER|(\d+)_(\d+))$" )
-    tpvmatch = re.compile( "^P[CV]\d+_\d+$" )
+    basematch = re.compile( r"^C(RVAL|RPIX|UNIT|DELT|TYPE)[12]$" )
+    cdmatch = re.compile( r"^CD[12]_[12]$" )
+    sipmatch = re.compile( r"^[AB]P?_(ORDER|(\d+)_(\d+))$" )
+    tpvmatch = re.compile( r"^P[CV]\d+_\d+$" )
 
     tonuke = set()
     for kw in hdr.keys():
diff --git a/models/base.py b/models/base.py
index 4e9e96fc..95ba3a31 100644
--- a/models/base.py
+++ b/models/base.py
@@ -196,8 +196,9 @@ def get_all_database_objects(display=False, session=None):
         A dictionary with the object class names as keys and the IDs list as values.
 
     """
-    from models.provenance import Provenance, CodeVersion, CodeHash
+    from models.provenance import Provenance, ProvenanceTag, CodeVersion, CodeHash
     from models.datafile import DataFile
+    from models.knownexposure import KnownExposure, PipelineWorker
     from models.exposure import Exposure
     from models.image import Image
     from models.source_list import SourceList
@@ -214,10 +215,10 @@ def get_all_database_objects(display=False, session=None):
     from models.user import AuthUser, PasswordLink
 
     models = [
-        CodeHash, CodeVersion, Provenance, DataFile, Exposure, Image,
+        CodeHash, CodeVersion, Provenance, ProvenanceTag, DataFile, Exposure, Image,
         SourceList, PSF, WorldCoordinates, ZeroPoint, Cutouts, Measurements, Object,
         CalibratorFile, CalibratorFileDownloadLock, CatalogExcerpt, Reference, SensorSection,
-        AuthUser, PasswordLink
+        AuthUser, PasswordLink, KnownExposure, PipelineWorker
     ]
 
     output = {}
diff --git a/models/image.py b/models/image.py
index 94315753..5338a341 100644
--- a/models/image.py
+++ b/models/image.py
@@ -562,8 +562,40 @@ def merge_all(self, session):
         Must provide a session to merge into. Need to commit at the end.
 
         Returns the merged image with all its products on the same session.
+
+        DEVELOPER NOTE: changing what gets merged in this function
+        requires a corresponding change in
+        pipeline/data_store.py::DataStore.save_and_commit
+
         """
         new_image = self.safe_merge(session=session)
+
+        # Note -- this next block of code is useful for trying to debug
+        #  sqlalchemy weirdness.  However, because it calls the __repr__
+        #  method of various objects, it actually causes tests to fail.
+        #  In particular, there are tests that use 'ZTF' as the instrument,
+        #  but the code has no ZTF instrument defined, so calling
+        #  Image.__repr__ throws an error.  As such, comment the
+        #  code out below, but leave it here in case somebody wants
+        #  to temporarily re-enable it for debugging purposes.
+        #
+        # import io
+        # strio = io.StringIO()
+        # strio.write( "In image.merge_all; objects in session:\n" )
+        # if len( session.new ) > 0 :
+        #     strio.write( "    NEW:\n" )
+        #     for obj in session.new:
+        #         strio.write( f"        {obj}\n" )
+        # if len( session.dirty ) > 0:
+        #     strio.write( "    DIRTY:\n" )
+        #     for obj in session.dirty:
+        #         strio.write( f"        {obj}\n" )
+        # if len( session.deleted ) > 0:
+        #     strio.write( "    DELETED:\n" )
+        #     for obj in session.deleted:
+        #         strio.write( f"        {obj}\n" )
+        # SCLogger.debug( strio.getvalue() )
+
         session.flush()  # make sure new_image gets an ID
 
         if self.sources is not None:
diff --git a/models/provenance.py b/models/provenance.py
index 13e24843..bbd536ff 100644
--- a/models/provenance.py
+++ b/models/provenance.py
@@ -2,16 +2,19 @@
 import json
 import base64
 import hashlib
+from collections import defaultdict
 import sqlalchemy as sa
+import sqlalchemy.orm as orm
 from sqlalchemy import event
 from sqlalchemy.orm import relationship
 from sqlalchemy.dialects.postgresql import JSONB
 from sqlalchemy.exc import IntegrityError
+from sqlalchemy.schema import UniqueConstraint
 
 from util.util import get_git_hash
 
 import models.base
-from models.base import Base, SeeChangeBase, SmartSession, safe_merge
+from models.base import Base, AutoIDMixin, SeeChangeBase, SmartSession, safe_merge
 
 
 class CodeHash(Base):
@@ -375,3 +378,144 @@ def insert_new_dataset(mapper, connection, target):
     foreign_keys="Provenance.code_version_id",
     passive_deletes=True,
 )
+
+
+class ProvenanceTagExistsError(Exception):
+    pass
+
+class ProvenanceTag(Base, AutoIDMixin):
+    """A human-readable tag to associate with provenances.
+
+    A well-defined provenane tag will have a provenance defined for every step, but there will
+    only be a *single* provenance for each step (except for refrenceing, where there could be
+    multiple provenances defined).  The class method validate can check this for duplicates.
+
+    """
+
+    __tablename__ = "provenance_tags"
+
+    __table_args__ = ( UniqueConstraint( 'tag', 'provenance_id', name='_provenancetag_prov_tag_uc' ), )
+
+    tag = sa.Column(
+        sa.String,
+        nullable=False,
+        index=True,
+        doc='Human-readable tag name; one tag has many provenances associated with it.'
+    )
+
+    provenance_id = sa.Column(
+        sa.ForeignKey( 'provenances.id', ondelete="CASCADE", name='provenance_tags_provenance_id_fkey' ),
+        index=True,
+        doc='Provenance ID.  Each tag/process should only have one provenance.'
+    )
+
+    provenance = orm.relationship(
+        'Provenance',
+        cascade='save-update, merge, refresh-expire, expunge',
+        lazy='selectin',
+        doc=( "Provenance" )
+    )
+
+    def __repr__( self ):
+        return ( '<ProvenanceTag('
+                 f'tag={self.tag}, '
+                 f'provenance_id={self.provenance_id}>' )
+
+    @classmethod
+    def newtag( cls, tag, provs, session=None ):
+        """Add a new ProvenanceTag.  Will thrown an error if it already exists.
+
+        Usually, this is called from pipeline.top_level.make_provenance_tree, not directly.
+
+        Always commits.
+
+        Parameters
+        ----------
+          tag: str
+            The human-readable provenance tag.  For cleanliness, should be ASCII, no spaces.
+
+          provs: list of str or Provenance
+            The provenances to include in this tag.  Usually, you want to make sure to include
+            a provenance for every process in the pipeline: exposure, referencing, preprocessing,
+            extraction, subtraction, detection, cutting, measuring, [TODO MORE: deepscore, alert]
+
+            -oo- load_exposure, download, import_image, alignment or aligning, coaddition
+
+        """
+
+        with SmartSession( session ) as sess:
+            # Get all the provenance IDs we're going to insert
+            provids = set()
+            for prov in provs:
+                if isinstance( prov, Provenance ):
+                    provids.add( prov.id )
+                elif isinstance( prov, str ):
+                    provobj = sess.get( Provenance, prov )
+                    if provobj is None:
+                        raise ValueError( f"Unknown Provenance ID {prov}" )
+                    provids.add( provobj.id )
+                else:
+                    raise TypeError( f"Everything in the provs list must be Provenance or str, not {type(prov)}" )
+
+            try:
+                # Make sure that this tag doesn't already exist.  To avoid race
+                #  conditions of two processes creating it at once (which,
+                #  given how we expect the code to be used, should probably
+                #  not happen in practice), lock the table before searching
+                #  and only unlock after inserting.
+                sess.connection().execute( sa.text( "LOCK TABLE provenance_tags" ) )
+                current = sess.query( ProvenanceTag ).filter( ProvenanceTag.tag == tag )
+                if current.count() != 0:
+                    sess.rollback()
+                    raise ProvenanceTagExistsError( f"ProvenanceTag {tag} already exists." )
+
+                for provid in provids:
+                    sess.add( ProvenanceTag( tag=tag, provenance_id=provid ) )
+
+                sess.commit()
+            finally:
+                # Make sure no lock is left behind; exiting the with block
+                #   ought to do this, but be paranoid.
+                sess.rollback()
+
+    @classmethod
+    def validate( cls, tag, processes=None, session=None ):
+        """Verify that a given tag doesn't have multiply defined processes.
+
+        One exception: referenceing can have multiply defined processes.
+
+        Raises an exception if things don't work.
+
+        Parameters
+        ----------
+          tag: str
+            The tag to validate
+
+          processes: list of str
+            The processes to make sure are present.  If None, won't make sure
+            that any processes are present, will just make sure there are no
+            duplicates.
+
+        """
+
+        repeatok = { 'referencing' }
+
+        with SmartSession( session ) as sess:
+            ptags = ( sess.query( (ProvenanceTag.id,Provenance.process) )
+                      .filter( ProvenanceTag.provenance_id==Provenance.id )
+                      .filter( ProvenanceTag.tag==tag )
+                     ).all()
+
+        count = defaultdict( lambda: 0 )
+        for ptagid, process in ptags:
+            count[ process ] += 1
+
+        multiples = [ i for i in count.keys() if count[i] > 1 and i not in repeatok ]
+        if len(multiples) > 0:
+            raise ValueError( f"Database integrity error: ProcessTag {tag} has more than one "
+                              f"provenance for processes {multiples}" )
+
+        if processes is not None:
+            missing = [ i for i in processes if i not in count.keys() ]
+            if len( missing ) > 0:
+                raise ValueError( f"Some processes missing from ProcessTag {tag}: {missing}" )
diff --git a/pipeline/data_store.py b/pipeline/data_store.py
index f5f83270..8efddf58 100644
--- a/pipeline/data_store.py
+++ b/pipeline/data_store.py
@@ -1,3 +1,4 @@
+import io
 import warnings
 import datetime
 import sqlalchemy as sa
@@ -1379,6 +1380,9 @@ def save_and_commit(self, exists_ok=False, overwrite=True, no_archive=False,
         True), as the image headers get "first-look" values, not
         necessarily the latest and greatest if we tune either process.
 
+        DEVELOPER NOTE: this code has to stay synced properly with
+        models/image.py::Image.merge_all
+
         Parameters
         ----------
         exists_ok: bool, default False
@@ -1431,8 +1435,15 @@ def save_and_commit(self, exists_ok=False, overwrite=True, no_archive=False,
             if obj is None:
                 continue
 
-            SCLogger.debug( f'save_and_commit considering a {obj.__class__.__name__} with filepath '
-                            f'{obj.filepath if isinstance(obj,FileOnDiskMixin) else "<none>"}' )
+            strio = io.StringIO()
+            strio.write( f"save_and_commit of {att} considering a {obj.__class__.__name__}" )
+            if isinstance( obj, FileOnDiskMixin ):
+                strio.write( f" with filepath {obj.filepath}" )
+            elif isinstance( obj, list ):
+                strio.write( f" of types {[type(i) for i in obj]}" )
+            SCLogger.debug( strio.getvalue() )
+            # SCLogger.debug( f'save_and_commit of {att} considering a {obj.__class__.__name__} with filepath '
+            #                 f'{obj.filepath if isinstance(obj,FileOnDiskMixin) else "<none>"}' )
 
             if isinstance(obj, FileOnDiskMixin):
                 mustsave = True
diff --git a/pipeline/top_level.py b/pipeline/top_level.py
index dfa9ddf6..e20733db 100644
--- a/pipeline/top_level.py
+++ b/pipeline/top_level.py
@@ -1,3 +1,4 @@
+import io
 import datetime
 import time
 import warnings
@@ -16,7 +17,7 @@
 from pipeline.measuring import Measurer
 
 from models.base import SmartSession, merge_concurrent
-from models.provenance import Provenance
+from models.provenance import Provenance, ProvenanceTag, ProvenanceTagExistsError
 from models.refset import RefSet
 from models.exposure import Exposure
 from models.report import Report
@@ -75,6 +76,16 @@ def __init__(self, **kwargs):
             critical=False,
         )
 
+        self.provenance_tag = self.add_par(
+            'provenance_tag',
+            'current',
+            ( None, str ),
+            "The ProvenanceTag that data products should be associated with.  Will be "
+            "created it doesn't exist;  if it does exist, will verify that all the "
+            "provenances we're running with are properly tagged there.",
+            critical=False
+        )
+
         self._enforce_no_new_attrs = True  # lock against new parameters
 
         self.override(kwargs)
@@ -221,21 +232,14 @@ def setup_datastore(self, *args, **kwargs):
         try:  # must make sure the report is on the DB
             report = Report(exposure=ds.exposure, section_id=ds.section_id)
             report.start_time = datetime.datetime.utcnow()
-            prov = Provenance(
-                process='report',
-                code_version=ds.exposure.provenance.code_version,
-                parameters={},
-                upstreams=[provs['measuring']],
-                is_testing=ds.exposure.provenance.is_testing,
-            )
-            report.provenance = prov
+            report.provenance = provs['report']
             with SmartSession(session) as dbsession:
                 # check how many times this report was generated before
                 prev_rep = dbsession.scalars(
                     sa.select(Report).where(
                         Report.exposure_id == ds.exposure.id,
                         Report.section_id == ds.section_id,
-                        Report.provenance_id == prov.id,
+                        Report.provenance_id == provs['report'].id,
                     )
                 ).all()
                 report.num_prev_reports = len(prev_rep)
@@ -385,7 +389,7 @@ def run_with_session(self):
         with SmartSession() as session:
             self.run(session=session)
 
-    def make_provenance_tree(self, exposure, overrides=None, session=None, commit=True):
+    def make_provenance_tree( self, exposure, overrides=None, session=None, no_provtag=False, commit=True ):
         """Use the current configuration of the pipeline and all the objects it has
         to generate the provenances for all the processing steps.
         This will conclude with the reporting step, which simply has an upstreams
@@ -397,18 +401,32 @@ def make_provenance_tree(self, exposure, overrides=None, session=None, commit=Tr
         exposure : Exposure
             The exposure to use to get the initial provenance.
             This provenance should be automatically created by the exposure.
+
         overrides: dict, optional
-            A dictionary of provenances to override any of the steps in the pipeline.
-            For example, set overrides={'preprocessing': prov} to use a specific provenance
-            for the basic Image provenance.
+            A dictionary of provenances to override any of the steps in
+            the pipeline.  For example, set overrides={'preprocessing':
+            prov} to use a specific provenance for the basic Image
+            provenance.
+
         session : SmartSession, optional
-            The function needs to work with the database to merge existing provenances.
-            If a session is given, it will use that, otherwise it will open a new session,
-            which will also close automatically at the end of the function.
+            The function needs to work with the database to merge
+            existing provenances.  If a session is given, it will use
+            that, otherwise it will open a new session, which will also
+            close automatically at the end of the function.
+
+        no_provtag: bool, default False
+            If True, won't create a provenance tag, and won't ensure
+            that the provenances created match the provenance_tag
+            parameter to the pipeline.  If False, will create the
+            provenance tag if it doesn't exist.  If it does exist, will
+            verify that all the provenances in the created provenance
+            tree are what's tagged
+
         commit: bool, optional, default True
-            By default, the provenances are merged and committed inside this function.
-            To disable this, set commit=False. This may leave the provenances in a
-            transient state, and is most likely not what you want.
+            By default, the provenances are merged and committed inside
+            this function.  To disable this, set commit=False. This may
+            leave the provenances in a transient state, and is most
+            likely not what you want.
 
         Returns
         -------
@@ -416,13 +434,17 @@ def make_provenance_tree(self, exposure, overrides=None, session=None, commit=Tr
             A dictionary of all the provenances that were created in this function,
             keyed according to the different steps in the pipeline.
             The provenances are all merged to the session.
+
         """
         if overrides is None:
             overrides = {}
 
-        with SmartSession(session) as session:
+        if ( not no_provtag ) and ( not commit ):
+            raise RuntimeError( "Commit required when no_provtag is not set" )
+
+        with SmartSession(session) as sess:
             # start by getting the exposure and reference
-            exp_prov = session.merge(exposure.provenance)  # also merges the code_version
+            exp_prov = sess.merge(exposure.provenance)  # also merges the code_version
             provs = {'exposure': exp_prov}
             code_version = exp_prov.code_version
             is_testing = exp_prov.is_testing
@@ -436,7 +458,7 @@ def make_provenance_tree(self, exposure, overrides=None, session=None, commit=Tr
             # even though you can use it to make the Report provenance (just so you have something to refer to).
             if refset_name is not None:
 
-                refset = session.scalars(sa.select(RefSet).where(RefSet.name == refset_name)).first()
+                refset = sess.scalars(sa.select(RefSet).where(RefSet.name == refset_name)).first()
                 if refset is None:
                     raise ValueError(f'No reference set with name {refset_name} found in the database!')
 
@@ -445,6 +467,7 @@ def make_provenance_tree(self, exposure, overrides=None, session=None, commit=Tr
                     raise ValueError(f'No provenances found for reference set {refset_name}!')
 
             provs['referencing'] = ref_provs  # notice that this is a list, not a single provenance!
+
             for step in PROCESS_OBJECTS:  # produce the provenance for this step
                 if step in overrides:  # accept override from user input
                     provs[step] = overrides[step]
@@ -480,10 +503,54 @@ def make_provenance_tree(self, exposure, overrides=None, session=None, commit=Tr
                         is_testing=is_testing,
                     )
 
-                provs[step] = provs[step].merge_concurrent(session=session, commit=commit)
+                provs[step] = provs[step].merge_concurrent(session=sess, commit=commit)
+
+            # Make the report provenance
+            prov = Provenance(
+                process='report',
+                code_version=exposure.provenance.code_version,
+                parameters={},
+                upstreams=[provs['measuring']],
+                is_testing=exposure.provenance.is_testing,
+            )
+            provs['report'] = prov.merge_concurrent( session=sess, commit=commit )
 
             if commit:
-                session.commit()
+                sess.commit()
+
+            # Ensure that the provenance tag is right, creating it if it doesn't exist
+            if not no_provtag:
+                provtag = self.pars.provenance_tag
+                try:
+                    provids = []
+                    for prov in provs.values():
+                        if isinstance( prov, list ):
+                            provids.extend( [ i.id for i in prov ] )
+                        else:
+                            provids.append( prov.id )
+                    ProvenanceTag.newtag( provtag, provids, session=session )
+                except ProvenanceTagExistsError as ex:
+                    pass
+
+                # The rest of this could be inside the except block,
+                #   but leaving it outside verifies that the
+                #   ProvenanceTag.newtag worked properly.
+                missing = []
+                with SmartSession( session ) as sess:
+                    ptags = sess.query( ProvenanceTag ).filter( ProvenanceTag.tag==provtag ).all()
+                    ptag_pids = [ pt.provenance_id for pt in ptags ]
+                for step, prov in provs.items():
+                    if isinstance( prov, list ):
+                        missing.extend( [ i.id for i in prov if i.id not in ptag_pids ] )
+                    elif prov.id not in ptag_pids:
+                        missing.append( prov )
+                if len( missing ) != 0:
+                    strio = io.StringIO()
+                    strio.write( f"The following provenances are not associated with provenance tag {provtag}:\n " )
+                    for prov in missing:
+                        strio.write( f"   {prov.process}: {prov.id}\n" )
+                    SCLogger.error( strio.getvalue() )
+                    raise RuntimeError( strio.getvalue() )
 
             return provs
 
diff --git a/tests/conftest.py b/tests/conftest.py
index fceaa414..b55e6375 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,4 +1,5 @@
 import os
+import io
 import warnings
 import pytest
 import uuid
@@ -87,6 +88,64 @@ def pytest_sessionstart(session):
             session.delete(catexp)
         session.commit()
 
+def any_objects_in_database( dbsession ):
+    """Look in the database, print errors and return False if things are left behind.
+
+    The "allowed" tables (CodeVersion, CodeHash, SensorSection,
+    CatalogExcerpt, Provenance, Object, PasswordLink) will not cause
+    False to be returned, but will just print a debug message.
+
+    Parameters
+    ----------
+      dbsession: Session
+
+    Returns
+    -------
+      True if there are only database rows in allowed tables.
+      False if there are any databse rows in non-allowed tables.
+
+    """
+
+    objects = get_all_database_objects( session=dbsession )
+    any_objects = False
+    for Class, ids in objects.items():
+        # TODO: check that surviving provenances have test_parameter
+        if Class.__name__ in ['CodeVersion', 'CodeHash', 'SensorSection', 'CatalogExcerpt',
+                              'Provenance', 'Object', 'PasswordLink']:
+            SCLogger.debug(f'There are {len(ids)} {Class.__name__} objects in the database. These are OK to stay.')
+        elif len(ids) > 0:
+            any_objects = True
+            strio = io.StringIO()
+            strio.write( f'There are {len(ids)} {Class.__name__} objects in the database. '
+                         f'Please make sure to cleanup!')
+            for id in ids:
+                obj = dbsession.scalars(sa.select(Class).where(Class.id == id)).first()
+                strio.write( f'\n    {obj}' )
+            SCLogger.error( strio.getvalue() )
+    return any_objects
+
+# Uncomment this fixture to run the "empty database" check after each
+# test.  This can be useful in figuring out which test is leaving stuff
+# behind.  Because of session scope fixtures, it will cause nearly every
+# (or every) test to fail, but at least you'll have enough debug output
+# to (hopefully) find the tests that are leaving behind extra stuff.
+#
+# NOTE -- for this to work, ironically, you have to set
+# verify_archive_database_empty to False at the top of this file.
+# Otherwise, at the end of all the tests, the things left over in the
+# databse you are looking for will cause everything to fail, and you
+# *only* get that message instead of all the error messages from here
+# that you wanted to get!  (Oh, pytest.)
+#
+# (This is probably not practical, becasuse there is *so much* module
+# and session scope stuff that lots of things are left behind by tests.
+# You will have to sift through a lot of output to find what you're
+# looking for.  We need a better way.)
+# @pytest.fixture(autouse=True)
+# def check_empty_database_at_end_of_each_test():
+#     yield True
+#     with SmartSession() as dbsession:
+#         assert not any_objects_in_database( dbsession )
 
 # This will be executed after the last test (session is the pytest session, not the SQLAlchemy session)
 def pytest_sessionfinish(session, exitstatus):
@@ -102,19 +161,7 @@ def pytest_sessionfinish(session, exitstatus):
                 dbsession.delete(prov)
         dbsession.commit()
 
-        objects = get_all_database_objects(session=dbsession)
-        any_objects = False
-        for Class, ids in objects.items():
-            # TODO: check that surviving provenances have test_parameter
-            if Class.__name__ in ['CodeVersion', 'CodeHash', 'SensorSection', 'CatalogExcerpt',
-                                  'Provenance', 'Object', 'PasswordLink']:
-                SCLogger.debug(f'There are {len(ids)} {Class.__name__} objects in the database. These are OK to stay.')
-            elif len(ids) > 0:
-                print(f'There are {len(ids)} {Class.__name__} objects in the database. Please make sure to cleanup!')
-                for id in ids:
-                    obj = dbsession.scalars(sa.select(Class).where(Class.id == id)).first()
-                    print(f'  {obj}')
-                    any_objects = True
+        any_objects = any_objects_in_database( dbsession )
 
         # delete the CodeVersion object (this should remove all provenances as well)
         dbsession.execute(sa.delete(CodeVersion).where(CodeVersion.id == 'test_v1.0.0'))
@@ -193,6 +240,7 @@ def data_dir():
 
 @pytest.fixture(scope="session")
 def blocking_plots():
+
     """
     Control how and when plots will be generated.
     There are three options for the environmental variable "INTERACTIVE".
@@ -399,6 +447,24 @@ def catexp(data_dir, cache_dir, download_url):
     if os.path.isfile(filepath):
         os.remove(filepath)
 
+
+@pytest.fixture
+def browser():
+    opts = selenium.webdriver.FirefoxOptions()
+    opts.add_argument( "--headless" )
+    ff = selenium.webdriver.Firefox( options=opts )
+    # This next line lets us use self-signed certs on test servers
+    ff.accept_untrusted_certs = True
+    yield ff
+    ff.close()
+    ff.quit()
+
+
+@pytest.fixture( scope="session" )
+def webap_url():
+    return "http://webap:8081/"
+
+
 # ======================================================================
 # FOR REASONS I DO NOT UNDERSTAND, adding this fixture caused
 #  models/test_image_querying.py::test_image_query to pass
diff --git a/tests/fixtures/conductor.py b/tests/fixtures/conductor.py
index 2364923b..c03f2f0b 100644
--- a/tests/fixtures/conductor.py
+++ b/tests/fixtures/conductor.py
@@ -65,17 +65,6 @@ def conductor_user():
         session.commit()
 
 
-@pytest.fixture
-def browser():
-    opts = selenium.webdriver.FirefoxOptions()
-    opts.add_argument( "--headless" )
-    ff = selenium.webdriver.Firefox( options=opts )
-    # This next line lets us use self-signed certs on test servers
-    ff.accept_untrusted_certs = True
-    yield ff
-    ff.close()
-    ff.quit()
-
 @pytest.fixture
 def conductor_browser_logged_in( browser, conductor_user ):
     cfg = Config.get()
diff --git a/tests/fixtures/datastore_factory.py b/tests/fixtures/datastore_factory.py
index bd91118b..ce3d9eab 100644
--- a/tests/fixtures/datastore_factory.py
+++ b/tests/fixtures/datastore_factory.py
@@ -58,9 +58,11 @@ def make_datastore(
             augments={},
             bad_pixel_map=None,
             save_original_image=False,
-            skip_sub=False
+            skip_sub=False,
+            provtag='datastore_factory'
     ):
         code_version = args[0].provenance.code_version
+        SCLogger.debug( f"make_datastore called with args {args}, overrides={overrides}, augments={augments}" )
         ds = DataStore(*args)  # make a new datastore
         use_cache = cache_dir is not None and cache_base_name is not None and not env_as_bool( "LIMIT_CACHE_USAGE" )
 
@@ -73,7 +75,7 @@ def make_datastore(
         if use_cache:
             ds.cache_base_name = os.path.join(cache_dir, cache_base_name)  # save this for testing purposes
 
-        p = pipeline_factory()
+        p = pipeline_factory( provtag )
 
         # allow calling scope to override/augment parameters for any of the processing steps
         p.override_parameters(**overrides)
@@ -83,6 +85,7 @@ def make_datastore(
             code_version = session.merge(code_version)
 
             if ds.image is not None:  # if starting from an externally provided Image, must merge it first
+                SCLogger.debug( f"make_datastore was provided an external image; merging it" )
                 ds.image = ds.image.merge_all(session)
 
             ############ load the reference set ############
@@ -97,14 +100,14 @@ def make_datastore(
             refset = session.scalars(sa.select(RefSet).where(RefSet.name == refset_name)).first()
 
             if refset is None:
-                raise ValueError(f'No reference set found with name {refset_name}')
+                raise ValueError(f'make_datastore found no reference with name {refset_name}')
 
             ref_prov = refset.provenances[0]
 
             ############ preprocessing to create image ############
             if ds.image is None and use_cache:  # check if preprocessed image is in cache
                 if os.path.isfile(image_cache_path):
-                    SCLogger.debug('loading image from cache. ')
+                    SCLogger.debug('make_datastore loading image from cache. ')
                     ds.image = copy_from_cache(Image, cache_dir, cache_name)
                     # assign the correct exposure to the object loaded from cache
                     if ds.exposure_id is not None:
@@ -133,6 +136,8 @@ def make_datastore(
                     # if Image already exists on the database, use that instead of this one
                     existing = session.scalars(sa.select(Image).where(Image.filepath == ds.image.filepath)).first()
                     if existing is not None:
+                        SCLogger.debug( f"make_datastore updating existing image {existing.id} "
+                                        f"({existing.filepath}) with image loaded from cache" )
                         # overwrite the existing row data using the JSON cache file
                         for key in sa.inspect(ds.image).mapper.columns.keys():
                             value = getattr(ds.image, key)
@@ -142,6 +147,9 @@ def make_datastore(
                             ):
                                 setattr(existing, key, value)
                         ds.image = existing  # replace with the existing row
+                    else:
+                        SCLogger.debug( f"make_datastore did not find image with filepath "
+                                        f"{ds.image.filepath} in database" )
 
                     ds.image.provenance = prov
 
@@ -149,7 +157,7 @@ def make_datastore(
                     ds.image.save(verify_md5=False)
 
             if ds.image is None:  # make the preprocessed image
-                SCLogger.debug('making preprocessed image. ')
+                SCLogger.debug('make_datastore making preprocessed image. ')
                 ds = p.preprocessor.run(ds, session)
                 ds.image.provenance.is_testing = True
                 if bad_pixel_map is not None:
@@ -202,15 +210,16 @@ def make_datastore(
                 cache_name = f'{cache_base_name}.sources_{prov.id[:6]}.fits.json'
                 sources_cache_path = os.path.join(cache_dir, cache_name)
                 if os.path.isfile(sources_cache_path):
-                    SCLogger.debug('loading source list from cache. ')
+                    SCLogger.debug('make_datastore loading source list from cache. ')
                     ds.sources = copy_from_cache(SourceList, cache_dir, cache_name)
-
                     # if SourceList already exists on the database, use that instead of this one
                     existing = session.scalars(
                         sa.select(SourceList).where(SourceList.filepath == ds.sources.filepath)
                     ).first()
                     if existing is not None:
                         # overwrite the existing row data using the JSON cache file
+                        SCLogger.debug( f"make_datastore updating existing source list {existing.id} "
+                                        f"({existing.filepath}) with source list loaded from cache" )
                         for key in sa.inspect(ds.sources).mapper.columns.keys():
                             value = getattr(ds.sources, key)
                             if (
@@ -219,6 +228,9 @@ def make_datastore(
                             ):
                                 setattr(existing, key, value)
                         ds.sources = existing  # replace with the existing row
+                    else:
+                        SCLogger.debug( f"make_datastore did not find source list with filepath "
+                                        f"{ds.sources.filepath} in the database" )
 
                     ds.sources.provenance = prov
                     ds.sources.image = ds.image
@@ -230,15 +242,16 @@ def make_datastore(
                 cache_name = f'{cache_base_name}.psf_{prov.id[:6]}.fits.json'
                 psf_cache_path = os.path.join(cache_dir, cache_name)
                 if os.path.isfile(psf_cache_path):
-                    SCLogger.debug('loading PSF from cache. ')
+                    SCLogger.debug('make_datastore loading PSF from cache. ')
                     ds.psf = copy_from_cache(PSF, cache_dir, cache_name)
-
                     # if PSF already exists on the database, use that instead of this one
                     existing = session.scalars(
                         sa.select(PSF).where(PSF.filepath == ds.psf.filepath)
                     ).first()
                     if existing is not None:
                         # overwrite the existing row data using the JSON cache file
+                        SCLogger.debug( f"make_datastore updating existing psf {existing.id} "
+                                        f"({existing.filepath}) with psf loaded from cache" )
                         for key in sa.inspect(ds.psf).mapper.columns.keys():
                             value = getattr(ds.psf, key)
                             if (
@@ -247,6 +260,9 @@ def make_datastore(
                             ):
                                 setattr(existing, key, value)
                         ds.psf = existing  # replace with the existing row
+                    else:
+                        SCLogger.debug( f"make_datastore did not find psf with filepath "
+                                        f"{ds.psf.filepath} in the database" )
 
                     ds.psf.provenance = prov
                     ds.psf.image = ds.image
@@ -258,15 +274,16 @@ def make_datastore(
                 cache_name = f'{cache_base_name}.bg_{prov.id[:6]}.h5.json'
                 bg_cache_path = os.path.join(cache_dir, cache_name)
                 if os.path.isfile(bg_cache_path):
-                    SCLogger.debug('loading background from cache. ')
+                    SCLogger.debug('make_datastore loading background from cache. ')
                     ds.bg = copy_from_cache(Background, cache_dir, cache_name)
-
                     # if BG already exists on the database, use that instead of this one
                     existing = session.scalars(
                         sa.select(Background).where(Background.filepath == ds.bg.filepath)
                     ).first()
                     if existing is not None:
                         # overwrite the existing row data using the JSON cache file
+                        SCLogger.debug( f"make_datastore updating existing background {existing.id} "
+                                        f"({existing.filepath}) with source list loaded from cache" )
                         for key in sa.inspect(ds.bg).mapper.columns.keys():
                             value = getattr(ds.bg, key)
                             if (
@@ -275,6 +292,9 @@ def make_datastore(
                             ):
                                 setattr(existing, key, value)
                         ds.bg = existing
+                    else:
+                        SCLogger.debug( f"make_datastore did not find background with filepath "
+                                        f"{ds.bg.filepath} in the database" )
 
                     ds.bg.provenance = prov
                     ds.bg.image = ds.image
@@ -286,7 +306,7 @@ def make_datastore(
                 cache_name = f'{cache_base_name}.wcs_{prov.id[:6]}.txt.json'
                 wcs_cache_path = os.path.join(cache_dir, cache_name)
                 if os.path.isfile(wcs_cache_path):
-                    SCLogger.debug('loading WCS from cache. ')
+                    SCLogger.debug('make_datastore loading WCS from cache. ')
                     ds.wcs = copy_from_cache(WorldCoordinates, cache_dir, cache_name)
                     prov = session.merge(prov)
 
@@ -303,6 +323,8 @@ def make_datastore(
 
                     if existing is not None:
                         # overwrite the existing row data using the JSON cache file
+                        SCLogger.debug( f"make_datastore updating existing wcs {existing.id} "
+                                        f"with wcs loaded from cache" )
                         for key in sa.inspect(ds.wcs).mapper.columns.keys():
                             value = getattr(ds.wcs, key)
                             if (
@@ -311,6 +333,8 @@ def make_datastore(
                             ):
                                 setattr(existing, key, value)
                         ds.wcs = existing  # replace with the existing row
+                    else:
+                        SCLogger.debug( f"make_datastore did not find existing wcs in database" )
 
                     ds.wcs.provenance = prov
                     ds.wcs.sources = ds.sources
@@ -321,7 +345,7 @@ def make_datastore(
                 cache_name = cache_base_name + '.zp.json'
                 zp_cache_path = os.path.join(cache_dir, cache_name)
                 if os.path.isfile(zp_cache_path):
-                    SCLogger.debug('loading zero point from cache. ')
+                    SCLogger.debug('make_datastore loading zero point from cache. ')
                     ds.zp = copy_from_cache(ZeroPoint, cache_dir, cache_name)
 
                     # check if ZP already exists on the database
@@ -337,6 +361,8 @@ def make_datastore(
 
                     if existing is not None:
                         # overwrite the existing row data using the JSON cache file
+                        SCLogger.debug( f"make_datastore updating existing zp {existing.id} "
+                                        f"with zp loaded from cache" )
                         for key in sa.inspect(ds.zp).mapper.columns.keys():
                             value = getattr(ds.zp, key)
                             if (
@@ -345,13 +371,15 @@ def make_datastore(
                             ):
                                 setattr(existing, key, value)
                         ds.zp = existing  # replace with the existing row
+                    else:
+                        SCLogger.debug( "make_datastore did not find existing zp in database" )
 
                     ds.zp.provenance = prov
                     ds.zp.sources = ds.sources
 
             # if any data product is missing, must redo the extraction step
             if ds.sources is None or ds.psf is None or ds.bg is None or ds.wcs is None or ds.zp is None:
-                SCLogger.debug('extracting sources. ')
+                SCLogger.debug('make_datastore extracting sources. ')
                 ds = p.extractor.run(ds, session)
 
                 ds.sources.save(overwrite=True)
@@ -391,6 +419,7 @@ def make_datastore(
                     if output_path != zp_cache_path:
                         warnings.warn(f'cache path {zp_cache_path} does not match output path {output_path}')
 
+            SCLogger.debug( "make_datastore running ds.save_and_commit on image (before subtraction)" )
             ds.save_and_commit(session=session)
 
             # make a new copy of the image to cache, including the estimates for lim_mag, fwhm, etc.
@@ -399,14 +428,17 @@ def make_datastore(
 
             # If we were told not to try to do a subtraction, then we're done
             if skip_sub:
+                SCLogger.debug( "make_datastore : skip_sub is True, returning" )
                 return ds
 
             # must provide the reference provenance explicitly since we didn't build a prov_tree
             ref = ds.get_reference(ref_prov, session=session)
             if ref is None:
+                SCLogger.debug( "make_datastore : could not find a reference, returning" )
                 return ds  # if no reference is found, simply return the datastore without the rest of the products
 
             if use_cache:  # try to find the subtraction image in the cache
+                SCLogger.debug( "make_datstore looking for subtraction image in cache..." )
                 prov = Provenance(
                     code_version=code_version,
                     process='subtraction',
@@ -428,7 +460,7 @@ def make_datastore(
                 cache_name = cache_sub_name + '.image.fits.json'
                 sub_cache_path = os.path.join(cache_dir, cache_name)
                 if os.path.isfile(sub_cache_path):
-                    SCLogger.debug('loading subtraction image from cache. ')
+                    SCLogger.debug('make_datastore loading subtraction image from cache: {sub_cache_path}" ')
                     ds.sub_image = copy_from_cache(Image, cache_dir, cache_name)
 
                     ds.sub_image.provenance = prov
@@ -505,8 +537,11 @@ def make_datastore(
                             ds.sub_image._aligned_images = [image_aligned_ref, image_aligned_new]
                         else:
                             ds.sub_image._aligned_images = [image_aligned_new, image_aligned_ref]
+                else:
+                    SCLogger.debug( "make_datastore didn't find subtraction image in cache" )
 
             if ds.sub_image is None:  # no hit in the cache
+                SCLogger.debug( "make_datastore running subtractor to create subtraction image" )
                 ds = p.subtractor.run(ds, session)
                 ds.sub_image.save(verify_md5=False)  # make sure it is also saved to archive
                 if use_cache:
@@ -515,6 +550,7 @@ def make_datastore(
                         warnings.warn(f'cache path {sub_cache_path} does not match output path {output_path}')
 
             if use_cache:  # save the aligned images to cache
+                SCLogger.debug( "make_datastore saving aligned images to cache" )
                 for im in ds.sub_image.aligned_images:
                     im.save(no_archive=True)
                     copy_to_cache(im, cache_dir)
@@ -532,13 +568,14 @@ def make_datastore(
 
             cache_name = os.path.join(cache_dir, cache_sub_name + f'.sources_{prov.id[:6]}.npy.json')
             if use_cache and os.path.isfile(cache_name):
-                SCLogger.debug('loading detections from cache. ')
+                SCLogger.debug( "make_datastore loading detections from cache." )
                 ds.detections = copy_from_cache(SourceList, cache_dir, cache_name)
                 ds.detections.provenance = prov
                 ds.detections.image = ds.sub_image
                 ds.sub_image.sources = ds.detections
                 ds.detections.save(verify_md5=False)
             else:  # cannot find detections on cache
+                SCLogger.debug( "make_datastore running detector to find detections" )
                 ds = p.detector.run(ds, session)
                 ds.detections.save(verify_md5=False)
                 if use_cache:
@@ -557,13 +594,14 @@ def make_datastore(
 
             cache_name = os.path.join(cache_dir, cache_sub_name + f'.cutouts_{prov.id[:6]}.h5')
             if use_cache and ( os.path.isfile(cache_name) ):
-                SCLogger.debug('loading cutouts from cache. ')
+                SCLogger.debug( 'make_datastore loading cutouts from cache.' )
                 ds.cutouts = copy_from_cache(Cutouts, cache_dir, cache_name)
                 ds.cutouts.provenance = prov
                 ds.cutouts.sources = ds.detections
                 ds.cutouts.load_all_co_data()  # sources must be set first
                 ds.cutouts.save()  # make sure to save to archive as well
             else:  # cannot find cutouts on cache
+                SCLogger.debug( "make_datastore running cutter to create cutouts" )
                 ds = p.cutter.run(ds, session)
                 ds.cutouts.save()
                 if use_cache:
@@ -584,7 +622,7 @@ def make_datastore(
 
             if use_cache and ( os.path.isfile(cache_name) ):
                 # note that the cache contains ALL the measurements, not only the good ones
-                SCLogger.debug('loading measurements from cache. ')
+                SCLogger.debug( 'make_datastore loading measurements from cache.' )
                 ds.all_measurements = copy_list_from_cache(Measurements, cache_dir, cache_name)
                 [setattr(m, 'provenance', prov) for m in ds.all_measurements]
                 [setattr(m, 'cutouts', ds.cutouts) for m in ds.all_measurements]
@@ -599,10 +637,12 @@ def make_datastore(
                 [m.associate_object(session) for m in ds.measurements]  # create or find an object for each measurement
                 # no need to save list because Measurements is not a FileOnDiskMixin!
             else:  # cannot find measurements on cache
+                SCLogger.debug( "make_datastore running measurer to create measurements" )
                 ds = p.measurer.run(ds, session)
                 if use_cache:
                     copy_list_to_cache(ds.all_measurements, cache_dir, cache_name)  # must provide filepath!
 
+            SCLogger.debug( "make_datastore running ds.save_and_commit after subtraction/etc" )
             ds.save_and_commit(session=session)
 
             return ds
diff --git a/tests/fixtures/decam.py b/tests/fixtures/decam.py
index cfeec856..2e739394 100644
--- a/tests/fixtures/decam.py
+++ b/tests/fixtures/decam.py
@@ -283,7 +283,8 @@ def decam_datastore(
         cache_dir=decam_cache_dir,
         cache_base_name='007/c4d_20230702_080904_S3_r_Sci_NBXRIO',
         overrides={'subtraction': {'refset': 'test_refset_decam'}},
-        save_original_image=True
+        save_original_image=True,
+        provtag='decam_datastore'
     )
     # This save is redundant, as the datastore_factory calls save_and_commit
     # However, I leave this here because it is a good test that calling it twice
@@ -313,6 +314,11 @@ def decam_datastore(
 
     ImageAligner.cleanup_temp_images()
 
+    # Clean up the provenance tag potentially created by the pipeline
+    with SmartSession() as session:
+        session.execute( sa.text( "DELETE FROM provenance_tags WHERE tag=:tag" ), {'tag': 'decam_datastore' } )
+        session.commit()
+
 
 @pytest.fixture
 def decam_processed_image(decam_datastore):
@@ -365,7 +371,7 @@ def decam_fits_image_filename2(download_url, decam_cache_dir):
 def decam_elais_e1_two_refs_datastore( code_version, download_url, decam_cache_dir, data_dir,
                                        datastore_factory, refmaker_factory ):
     filebase = 'ELAIS-E1-r-templ'
-    maker = refmaker_factory( 'test_refset_decam', 'DECam' )
+    maker = refmaker_factory( 'test_refset_decam', 'DECam', 'decam_elais_e1_two_refs_datastore' )
 
     with SmartSession() as session:
         maker.make_refset(session=session)
@@ -429,7 +435,8 @@ def decam_elais_e1_two_refs_datastore( code_version, download_url, decam_cache_d
             ds = datastore_factory( image,
                                     cache_dir=decam_cache_dir,
                                     cache_base_name=f'007/{filebase}.{chip:02d}',
-                                    skip_sub=True )
+                                    skip_sub=True,
+                                    provtag='decam_elais_e1_two_refs_datastore_datastore_factory')
 
             for filename in image.get_fullpath(as_list=True):
                 assert os.path.isfile(filename)
@@ -451,6 +458,13 @@ def decam_elais_e1_two_refs_datastore( code_version, download_url, decam_cache_d
         if obj is not None and hasattr(obj, 'delete_from_disk_and_database'):
             obj.delete_from_disk_and_database(archive=True)
 
+    # Clean out the provenance tag that may have been created by the refmaker_factory and datastore_factory
+    with SmartSession() as session:
+        for tag in [ 'decam_elais_e1_two_refs_datastore',
+                     'decam_elais_e1_two_refs_datastore_datastore_factory' ]:
+            session.execute( sa.text( "DELETE FROM provenance_tags WHERE tag=:tag" ), {'tag': tag } )
+        session.commit()
+
     ImageAligner.cleanup_temp_images()
 
 @pytest.fixture
@@ -461,7 +475,7 @@ def decam_ref_datastore( decam_elais_e1_two_refs_datastore ):
 def decam_elais_e1_two_references( decam_elais_e1_two_refs_datastore, refmaker_factory ):
     refs = []
     with SmartSession() as session:
-        maker = refmaker_factory('test_refset_decam', 'DECam')
+        maker = refmaker_factory('test_refset_decam', 'DECam', 'decam_elais_e1_two_references' )
         maker.make_refset(session=session)
         prov = maker.refset.provenances[0]
         prov = session.merge(prov)
@@ -498,6 +512,13 @@ def decam_elais_e1_two_references( decam_elais_e1_two_refs_datastore, refmaker_f
                 session.delete( ref )
             session.commit()
 
+    # clean out the provenance tag that may have been created by the refmaker_factory
+    with SmartSession() as session:
+        session.execute( sa.text( "DELETE FROM provenance_tags WHERE tag=:tag" ),
+                         {'tag': 'decam_elais_e1_two_references' } )
+        session.commit()
+
+
 @pytest.fixture
 def decam_reference( decam_elais_e1_two_references ):
     return decam_elais_e1_two_references[0]
@@ -508,7 +529,7 @@ def decam_ref_datastore( decam_elais_e1_two_refs_datastore ):
 
 @pytest.fixture(scope='session')
 def decam_refset(refmaker_factory):
-    refmaker = refmaker_factory('test_refset_decam', 'DECam')
+    refmaker = refmaker_factory('test_refset_decam', 'DECam', 'decam_refset' )
     refmaker.pars.save_new_refs = True
 
     refmaker.make_refset()
@@ -527,6 +548,10 @@ def decam_refset(refmaker_factory):
 
         session.commit()
 
+    # Clean out the provenance tag that may have been created by the refmaker_factory
+    with SmartSession() as session:
+        session.execute( sa.text( "DELETE FROM provenance_tags WHERE tag=:tag" ), {'tag': 'decam_refset' } )
+        session.commit()
 
 @pytest.fixture
 def decam_subtraction(decam_datastore):
diff --git a/tests/fixtures/pipeline_objects.py b/tests/fixtures/pipeline_objects.py
index 461bf150..efe52785 100644
--- a/tests/fixtures/pipeline_objects.py
+++ b/tests/fixtures/pipeline_objects.py
@@ -1,5 +1,11 @@
 import pytest
 
+import sqlalchemy as sa
+
+from util.logger import SCLogger
+
+from models.base import SmartSession
+
 from pipeline.preprocessing import Preprocessor
 from pipeline.detection import Detector
 from pipeline.backgrounding import Backgrounder
@@ -237,8 +243,11 @@ def pipeline_factory(
         measurer_factory,
         test_config,
 ):
-    def make_pipeline():
-        p = Pipeline(**test_config.value('pipeline'))
+    def make_pipeline( provtag=None ):
+        kwargs = {}
+        if provtag is not None:
+            kwargs['pipeline'] = { 'provenance_tag': provtag }
+        p = Pipeline(**kwargs)
         p.pars.save_before_subtraction = False
         p.pars.save_at_finish = False
         p.preprocessor = preprocessor_factory()
@@ -271,7 +280,14 @@ def make_pipeline():
 
 @pytest.fixture
 def pipeline_for_tests(pipeline_factory):
-    return pipeline_factory()
+    p = pipeline_factory( 'pipeline_for_tests' )
+    yield p
+
+    # Clean up the provenance tag potentially created by the pipeline
+    with SmartSession() as session:
+        session.execute( sa.text( "DELETE FROM provenance_tags WHERE tag=:tag" ), {'tag': 'pipeline_for_tests' } )
+        session.commit()
+
 
 
 @pytest.fixture(scope='session')
@@ -316,14 +332,14 @@ def coadd_pipeline_for_tests(coadd_pipeline_factory):
 @pytest.fixture(scope='session')
 def refmaker_factory(test_config, pipeline_factory, coadd_pipeline_factory):
 
-    def make_refmaker(name, instrument):
+    def make_refmaker(name, instrument, provtag='refmaker_factory'):
         maker = RefMaker(maker={'name': name, 'instruments': [instrument]})
         maker.pars._enforce_no_new_attrs = False
         maker.pars.test_parameter = maker.pars.add_par(
             'test_parameter', 'test_value', str, 'parameter to define unique tests', critical=True
         )
         maker.pars._enforce_no_new_attrs = True
-        maker.pipeline = pipeline_factory()
+        maker.pipeline = pipeline_factory( provtag )
         maker.pipeline.override_parameters(**test_config.value('referencing.pipeline'))
         maker.coadd_pipeline = coadd_pipeline_factory()
         maker.coadd_pipeline.override_parameters(**test_config.value('referencing.coaddition'))
diff --git a/tests/fixtures/ptf.py b/tests/fixtures/ptf.py
index b4d03b84..c6b9470a 100644
--- a/tests/fixtures/ptf.py
+++ b/tests/fixtures/ptf.py
@@ -29,7 +29,7 @@
 
 from util.retrydownload import retry_download
 from util.logger import SCLogger
-from util.cache import copy_to_cache, copy_list_to_cache, copy_from_cache, copy_list_from_cache
+from util.cache import copy_to_cache, copy_from_cache
 from util.util import env_as_bool
 
 
@@ -175,12 +175,18 @@ def ptf_datastore(datastore_factory, ptf_exposure, ptf_ref, ptf_cache_dir, ptf_b
         cache_base_name='187/PTF_20110429_040004_11_R_Sci_BNKEKA',
         overrides={'extraction': {'threshold': 5}, 'subtraction': {'refset': 'test_refset_ptf'}},
         bad_pixel_map=ptf_bad_pixel_map,
+        provtag='ptf_datastore'
     )
     yield ds
     ds.delete_everything()
 
     ImageAligner.cleanup_temp_images()
 
+    # Clean out the provenance tag that may have been created by the datastore_factory
+    with SmartSession() as session:
+        session.execute( sa.text( "DELETE FROM provenance_tags WHERE tag=:tag" ), {'tag': 'ptf_datastore' } )
+        session.commit()
+
 
 @pytest.fixture(scope='session')
 def ptf_urls(download_url):
@@ -207,7 +213,7 @@ def ptf_urls(download_url):
 @pytest.fixture(scope='session')
 def ptf_images_factory(ptf_urls, ptf_downloader, datastore_factory, ptf_cache_dir, ptf_bad_pixel_map):
 
-    def factory(start_date='2009-04-04', end_date='2013-03-03', max_images=None):
+    def factory(start_date='2009-04-04', end_date='2013-03-03', max_images=None, provtag='ptf_images_factory'):
         # see if any of the cache names were saved to a manifest file
         cache_names = {}
         if (   ( not env_as_bool( "LIMIT_CACHE_USAGE" ) ) and
@@ -247,6 +253,7 @@ def factory(start_date='2009-04-04', end_date='2013-03-03', max_images=None):
                     cache_base_name=cache_names.get(url, None),
                     overrides={'extraction': {'threshold': 5}},
                     bad_pixel_map=ptf_bad_pixel_map,
+                    provtag=provtag
                 )
 
                 if (
@@ -283,7 +290,7 @@ def factory(start_date='2009-04-04', end_date='2013-03-03', max_images=None):
 
 @pytest.fixture(scope='session')
 def ptf_reference_images(ptf_images_factory):
-    images = ptf_images_factory('2009-04-05', '2009-05-01', max_images=5)
+    images = ptf_images_factory('2009-04-05', '2009-05-01', max_images=5, provtag='ptf_reference_images')
 
     yield images
 
@@ -312,20 +319,15 @@ def ptf_reference_images(ptf_images_factory):
     for image in imgs:
         image.delete_from_disk_and_database( commit=True, remove_downstreams=True )
 
-    # ROB REMOVE THIS COMMENT
-    # with SmartSession() as session:
-    #     session.autoflush = False
-
-    #     for image in images:
-    #         image = session.merge(image)
-    #         image.exposure.delete_from_disk_and_database(session=session, commit=False)
-    #         image.delete_from_disk_and_database(session=session, commit=False, remove_downstreams=True)
-    #     session.commit()
+    # Clean out the provenance tag that may have been created by the datastore_factory
+    with SmartSession() as session:
+        session.execute( sa.text( "DELETE FROM provenance_tags WHERE tag=:tag" ), {'tag': 'ptf_reference_images' } )
+        session.commit()
 
 
 @pytest.fixture(scope='session')
 def ptf_supernova_images(ptf_images_factory):
-    images = ptf_images_factory('2010-02-01', '2013-12-31', max_images=2)
+    images = ptf_images_factory('2010-02-01', '2013-12-31', max_images=2, provtag='ptf_supernova_images')
 
     yield images
 
@@ -340,19 +342,10 @@ def ptf_supernova_images(ptf_images_factory):
     for image in imgs:
         image.delete_from_disk_and_database( commit=True, remove_downstreams=True )
 
-    # ROB REMOVE THIS COMMENT
-    # with SmartSession() as session:
-    #     session.autoflush = False
-
-    #     for image in images:
-    #         image = session.merge(image)
-    #         # first delete the image and all it's products and the associated data (locally and on archive)
-    #         image.delete_from_disk_and_database(session=session, commit=False, remove_downstreams=True)
-    #         # only then delete the exposure, so it doesn't cascade delete the image and prevent deleting products
-    #         image.exposure.delete_from_disk_and_database(session=session, commit=False)
-
-    #     session.commit()
-
+    # Clean out the provenance tag that may have been created by the datastore_factory
+    with SmartSession() as session:
+        session.execute( sa.text( "DELETE FROM provenance_tags WHERE tag=:tag" ), {'tag': 'ptf_supernova_images' } )
+        session.commit()
 
 # conditionally call the ptf_reference_images fixture if cache is not there:
 # ref: https://stackoverflow.com/a/75337251
@@ -360,6 +353,14 @@ def ptf_supernova_images(ptf_images_factory):
 def ptf_aligned_images(request, ptf_cache_dir, data_dir, code_version):
     cache_dir = os.path.join(ptf_cache_dir, 'aligned_images')
 
+    prov = Provenance(
+        code_version=code_version,
+        parameters={'alignment': {'method': 'swarp', 'to_index': 'last'}, 'test_parameter': 'test_value'},
+        upstreams=[],
+        process='coaddition',
+        is_testing=True,
+    )
+
     # try to load from cache
     if (    ( not env_as_bool( "LIMIT_CACHE_USAGE" ) ) and
             ( os.path.isfile(os.path.join(cache_dir, 'manifest.txt')) )
@@ -370,20 +371,26 @@ def ptf_aligned_images(request, ptf_cache_dir, data_dir, code_version):
         for filename in filenames:
             imfile, psffile, bgfile = filename.split()
             output_images.append(copy_from_cache(Image, cache_dir, imfile + '.image.fits'))
+            output_images[-1].provenance = prov
+            # Associate other objects
+            # BROKEN -- we don't set the provenance properly below!
+            #   Set the provenance_id to None to explicitly indicate
+            #   that we're not depending on the proper provenance
+            #   to happen to have the same id this time around as it
+            #   did when the cache was written.
             output_images[-1].psf = copy_from_cache(PSF, cache_dir, psffile + '.fits')
+            output_images[-1].psf.image = output_images[-1]
+            output_images[-1].psf.provenance_id = None
             output_images[-1].bg = copy_from_cache(Background, cache_dir, bgfile)
+            output_images[-1].bg.image = output_images[-1]
+            output_images[-1].bg.provenance_id = None
             output_images[-1].zp = copy_from_cache(ZeroPoint, cache_dir, imfile + '.zp')
+            output_images[-1].zp.sources_id = None    # This isn't right, but we dont' have what we need
+            output_images[-1].zp.provenance_id = None
     else:  # no cache available
         ptf_reference_images = request.getfixturevalue('ptf_reference_images')
 
         images_to_align = ptf_reference_images
-        prov = Provenance(
-            code_version=code_version,
-            parameters={'alignment': {'method': 'swarp', 'to_index': 'last'}, 'test_parameter': 'test_value'},
-            upstreams=[],
-            process='coaddition',
-            is_testing=True,
-        )
         coadd_image = Image.from_images(images_to_align, index=-1)
         coadd_image.provenance = prov
         coadd_image.provenance_id = prov.id
@@ -449,18 +456,6 @@ def ptf_aligned_images(request, ptf_cache_dir, data_dir, code_version):
             # for image in ptf_reference_images:
             #     image.exposure.delete_from_disk_and_database( commit=True, remove_downstreams=True )
 
-        # ROB REMOVE THIS COMMENT
-        # with SmartSession() as session, warnings.catch_warnings():
-        #     warnings.filterwarnings(
-        #         action='ignore',
-        #         message=r'.*DELETE statement on table .* expected to delete \d* row\(s\).*',
-        #     )
-        #     for image in ptf_reference_images:
-        #         image = merge( session, image )
-        #         image.exposure.delete_from_disk_and_database(commit=False, session=session, remove_downstreams=True)
-        #         # image.delete_from_disk_and_database(commit=False, session=session, remove_downstreams=True)
-        #     session.commit()
-
 
 @pytest.fixture
 def ptf_ref(
@@ -471,7 +466,7 @@ def ptf_ref(
         data_dir,
         code_version
 ):
-    refmaker = refmaker_factory('test_ref_ptf', 'PTF')
+    refmaker = refmaker_factory('test_ref_ptf', 'PTF', provtag='ptf_ref')
     pipe = refmaker.coadd_pipeline
 
     # build up the provenance tree
@@ -522,16 +517,21 @@ def ptf_ref(
         coadd_image.sources = copy_from_cache(
             SourceList, ptf_cache_dir, cache_base_name + f'.sources_{sources_prov.id[:6]}.fits'
         )
+        # Make sure that any automated fields set in the database don't have
+        #  the values they happened to have when the cache was created
+        coadd_image.sources.image = coadd_image
         coadd_image.sources.provenance = sources_prov
         assert coadd_image.sources.provenance_id == coadd_image.sources.provenance.id
 
         # get the PSF:
         coadd_image.psf = copy_from_cache(PSF, ptf_cache_dir, cache_base_name + f'.psf_{sources_prov.id[:6]}.fits')
+        coadd_image.psf.image = coadd_image
         coadd_image.psf.provenance = sources_prov
         assert coadd_image.psf.provenance_id == coadd_image.psf.provenance.id
 
         # get the background:
         coadd_image.bg = copy_from_cache(Background, ptf_cache_dir, cache_base_name + f'.bg_{sources_prov.id[:6]}.h5')
+        coadd_image.bg.image = coadd_image
         coadd_image.bg.provenance = sources_prov
         assert coadd_image.bg.provenance_id == coadd_image.bg.provenance.id
 
@@ -539,12 +539,14 @@ def ptf_ref(
         coadd_image.wcs = copy_from_cache(
             WorldCoordinates, ptf_cache_dir, cache_base_name + f'.wcs_{sources_prov.id[:6]}.txt'
         )
+        coadd_image.wcs.sources = coadd_image.sources
         coadd_image.wcs.provenance = sources_prov
         coadd_image.sources.wcs = coadd_image.wcs
         assert coadd_image.wcs.provenance_id == coadd_image.wcs.provenance.id
 
         # get the zero point:
         coadd_image.zp = copy_from_cache(ZeroPoint, ptf_cache_dir, cache_base_name + '.zp')
+        coadd_image.zp.sources = coadd_image.sources
         coadd_image.zp.provenance = sources_prov
         coadd_image.sources.zp = coadd_image.zp
         assert coadd_image.zp.provenance_id == coadd_image.zp.provenance.id
@@ -585,6 +587,10 @@ def ptf_ref(
         ref_in_db = session.scalars(sa.select(Reference).where(Reference.id == ref.id)).first()
         assert ref_in_db is None  # should have been deleted by cascade when image is deleted
 
+    # Clean out the provenance tag that may have been created by the refmaker_factory
+    with SmartSession() as session:
+        session.execute( sa.text( "DELETE FROM provenance_tags WHERE tag=:tag" ), {'tag': 'ptf_ref' } )
+        session.commit()
 
 @pytest.fixture
 def ptf_ref_offset(ptf_ref):
@@ -620,7 +626,7 @@ def ptf_ref_offset(ptf_ref):
 
 @pytest.fixture(scope='session')
 def ptf_refset(refmaker_factory):
-    refmaker = refmaker_factory('test_refset_ptf', 'PTF')
+    refmaker = refmaker_factory('test_refset_ptf', 'PTF', 'ptf_refset')
     refmaker.pars.save_new_refs = True
 
     refmaker.make_refset()  # this makes a refset without making any references
@@ -638,6 +644,11 @@ def ptf_refset(refmaker_factory):
 
         session.commit()
 
+    # Clean out the provenance tag that may have been created by the refmaker_factory
+    with SmartSession() as session:
+        session.execute( sa.text( "DELETE FROM provenance_tags WHERE tag=:tag" ), {'tag': 'ptf_refset' } )
+        session.commit()
+
 
 @pytest.fixture
 def ptf_subtraction1(ptf_ref, ptf_supernova_images, subtractor, ptf_cache_dir):
@@ -664,7 +675,6 @@ def ptf_subtraction1(ptf_ref, ptf_supernova_images, subtractor, ptf_cache_dir):
         im = copy_from_cache(Image, ptf_cache_dir, cache_path)
         im.upstream_images = [ptf_ref.image, ptf_supernova_images[0]]
         im.ref_image_id = ptf_ref.image.id
-
         im.provenance = prov
 
     else:  # cannot find it on cache, need to produce it, using other fixtures
diff --git a/tests/models/test_provenance.py b/tests/models/test_provenance.py
index f9c82042..a0444745 100644
--- a/tests/models/test_provenance.py
+++ b/tests/models/test_provenance.py
@@ -7,6 +7,7 @@
 from models.base import SmartSession
 from models.provenance import CodeHash, CodeVersion, Provenance
 
+# Note: ProvenanceTag.newtag is tested as part of pipeline/test_pipeline.py::test_provenance_tree
 
 @pytest.mark.xfail
 def test_code_versions():
@@ -379,4 +380,4 @@ def test_eager_load_upstreams( provenance_base ):
             if 'p4' in locals():
                 session.execute(sa.delete(Provenance).where(Provenance.id == p4.id))
 
-            session.commit()
\ No newline at end of file
+            session.commit()
diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py
index 177289e4..6a66d94d 100644
--- a/tests/pipeline/test_pipeline.py
+++ b/tests/pipeline/test_pipeline.py
@@ -7,7 +7,7 @@
 import numpy as np
 
 from models.base import SmartSession, FileOnDiskMixin
-from models.provenance import Provenance
+from models.provenance import Provenance, ProvenanceTag
 from models.image import Image, image_upstreams_association_table
 from models.calibratorfile import CalibratorFile
 from models.source_list import SourceList
@@ -311,7 +311,7 @@ def test_bitflag_propagation(decam_exposure, decam_reference, decam_default_cali
     sec_id = ref.section_id
 
     try:  # cleanup the file at the end
-        p = Pipeline()
+        p = Pipeline( pipeline={'provenance_tag': 'test_bitflag_propagation'} )
         p.subtractor.pars.refset = 'test_refset_decam'
         p.pars.save_before_subtraction = False
         exposure.badness = 'banding'  # add a bitflag to check for propagation
@@ -415,6 +415,9 @@ def test_bitflag_propagation(decam_exposure, decam_reference, decam_default_cali
             ds.exposure.bitflag = 0
             session.merge(ds.exposure)
             session.commit()
+            # Remove the ProvenanceTag that will have been created
+            session.execute( sa.text( "DELETE FROM provenance_tags WHERE tag='test_bitflag_propagation'" ) )
+            session.commit()
 
 
 def test_get_upstreams_and_downstreams(decam_exposure, decam_reference, decam_default_calibrators, archive):
@@ -426,7 +429,7 @@ def test_get_upstreams_and_downstreams(decam_exposure, decam_reference, decam_de
     sec_id = ref.section_id
 
     try:  # cleanup the file at the end
-        p = Pipeline()
+        p = Pipeline( pipeline={'provenance_tag': 'test_get_upstreams_and_downstreams'} )
         p.subtractor.pars.refset = 'test_refset_decam'
         ds = p.run(exposure, sec_id)
 
@@ -502,6 +505,11 @@ def test_get_upstreams_and_downstreams(decam_exposure, decam_reference, decam_de
     finally:
         if 'ds' in locals():
             ds.delete_everything()
+        # Clean up the provenance tag created by the pipeline
+        with SmartSession() as session:
+            session.execute( sa.text( "DELETE FROM provenance_tags WHERE tag=:tag" ),
+                            { 'tag': 'test_get_upstreams_and_downstreams' } )
+            session.commit()
         # added this cleanup to make sure the temp data folder is cleaned up
         # this should be removed after we add datastore failure modes (issue #150)
         shutil.rmtree(os.path.join(os.path.dirname(exposure.get_fullpath()), '115'), ignore_errors=True)
@@ -560,9 +568,26 @@ def test_provenance_tree(pipeline_for_tests, decam_refset, decam_exposure, decam
     p = pipeline_for_tests
     p.subtractor.pars.refset = 'test_refset_decam'
 
-    provs = p.make_provenance_tree(decam_exposure)
+    def check_prov_tag( provs, ptagname ):
+        with SmartSession() as session:
+            ptags = session.query( ProvenanceTag ).filter( ProvenanceTag.tag==ptagname ).all()
+        provids = []
+        for prov in provs:
+            if isinstance( prov, list ):
+                provids.extend( [ i.id for i in prov ] )
+            else:
+                provids.append( prov.id )
+        ptagprovids = [ ptag.provenance_id for ptag in ptags ]
+        assert all( [ pid in provids for pid in ptagprovids ] )
+        assert all( [ pid in ptagprovids for pid in provids ] )
+        return ptags
+
+    provs = p.make_provenance_tree( decam_exposure )
     assert isinstance(provs, dict)
 
+    # Make sure the ProvenanceTag got created properly
+    ptags = check_prov_tag( provs.values(), 'pipeline_for_tests' )
+
     t_start = datetime.datetime.utcnow()
     ds = p.run(decam_exposure, 'S3')  # the data should all be there so this should be quick
     t_end = datetime.datetime.utcnow()
@@ -586,6 +611,33 @@ def test_provenance_tree(pipeline_for_tests, decam_refset, decam_exposure, decam
         assert abs(report.start_time - t_start) < datetime.timedelta(seconds=1)
         assert abs(report.finish_time - t_end) < datetime.timedelta(seconds=1)
 
+    # Make sure that the provenance tags are reused if we ask for the same thing
+    newprovs = p.make_provenance_tree( decam_exposure )
+    provids = []
+    for prov in provs.values():
+        if isinstance( prov, list ):
+            provids.extend( [ i.id for i in prov ] )
+        else:
+            provids.append( prov.id )
+    newprovids = []
+    for prov in newprovs.values():
+        if isinstance( prov, list ):
+            newprovids.extend( [ i.id for i in prov ] )
+        else:
+            newprovids.append( prov.id )
+    assert set( newprovids ) == set( provids )
+    newptags = check_prov_tag( newprovs.values(), 'pipeline_for_tests' )
+    assert set( [ i.id for i in newptags ] ) == set( [ i.id for i in ptags ] )
+
+    # Make sure that we get an exception if we ask for a mismatched provenance tree
+    # Do this by creating a new pipeline with inconsistent parameters but asking
+    # for the same provenance tag.
+    newp = Pipeline( pipeline={'provenance_tag': 'pipeline_for_tests'},
+                     extraction={'sources': { 'threshold': 42. } } )
+    with pytest.raises( RuntimeError,
+                        match='The following provenances are not associated with provenance tag pipeline_for_tests' ):
+        newp.make_provenance_tree( decam_exposure )
+
 
 def test_inject_warnings_errors(decam_datastore, decam_reference, pipeline_for_tests):
     from pipeline.top_level import PROCESS_OBJECTS
diff --git a/tests/webap/test_webap.py b/tests/webap/test_webap.py
new file mode 100644
index 00000000..2a376047
--- /dev/null
+++ b/tests/webap/test_webap.py
@@ -0,0 +1,157 @@
+import re
+import time
+import pytest
+
+import sqlalchemy as sa
+
+import selenium
+import selenium.webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions
+from selenium.webdriver.support.wait import WebDriverWait
+from selenium.webdriver.support.ui import Select
+from selenium.webdriver.remote.webelement import WebElement
+
+from models.base import SmartSession
+from models.provenance import CodeVersion, Provenance, ProvenanceTag
+
+from util.logger import SCLogger
+
+def test_webap( browser, webap_url, decam_datastore ):
+    ds = decam_datastore
+    junkprov = None
+
+    try:
+        # Create a new provenance tag, tagging the provenances that are in decam_datastore
+        ProvenanceTag.newtag( 'test_webap',
+                              [ ds.exposure.provenance,
+                                ds.image.provenance,
+                                ds.sources.provenance,
+                                ds.reference.provenance,
+                                ds.sub_image.provenance,
+                                ds.detections.provenance,
+                                ds.cutouts.provenance,
+                                ds.measurements[0].provenance ] )
+
+        # Create a throwaway provenance and provenance tag so we can test
+        #  things *not* being found
+        with SmartSession() as session:
+            cv = session.query( CodeVersion ).first()
+            junkprov = Provenance( process='no_process', code_version=cv, is_testing=True )
+            session.add( junkprov )
+            session.commit()
+            ProvenanceTag.newtag( 'no_such_tag', [ junkprov ] )
+
+        browser.get( webap_url )
+        WebDriverWait( browser, timeout=10 ).until(
+            lambda d: d.find_element(By.ID, 'seechange_context_render_page_complete' ) )
+
+        # The "test_webap" option in the provtag_wid select widget won't necessarily
+        # be there immediately, because it's filled in with a callback from a web request
+        tries = 5
+        while ( tries > 0 ):
+            provtag_wid = browser.find_element( By.ID, "provtag_wid" )
+            options = provtag_wid.find_elements( By.TAG_NAME, "option" )
+            if any( [ o.text == 'test_webap' for o in options ] ):
+                break
+            tries -= 1
+            if tries < 0:
+                assert False, "Failed to find the test_webap option in the provenances select widget"
+            else:
+                SCLogger.debug( "Didn't find test_webap in the provtag_wid select, sleeping 1s and retrying" )
+                time.sleep( 1 )
+
+        buttons = browser.find_elements( By.XPATH, "//input[@type='button']" )
+        buttons = { b.get_attribute("value") : b for b in buttons }
+
+        # Make sure we get no exposures if we ask for the junk tag
+
+        select = Select( provtag_wid )
+        select.select_by_visible_text( 'no_such_tag' )
+        buttons['Show Exposures'].click()
+
+        WebDriverWait( browser, timeout=10 ).until(
+            lambda d: d.find_element( By.XPATH, "//h2[contains(.,'Exposures from')]" ) )
+
+        # Make sure that the "Exposure List" div is what's shown
+        # WARNING -- this absolute xpath might change if the page layout is changed!
+        tabcontentdiv = browser.find_element( By.XPATH, "html/body/div/div/div/div/div/div/div[2]" )
+        assert tabcontentdiv.text[:15] == 'Exposures from '
+        explist = browser.find_element( By.ID, "exposure_list_table" )
+        rows = explist.find_elements( By.TAG_NAME, "tr" )
+        assert len(rows) == 1   # Just the header row
+
+        # Now ask for the test_webap tag, see if we get the one exposure we expect
+
+        select.select_by_visible_text( 'test_webap' )
+        buttons['Show Exposures'].click()
+        # Give it half a second to go at least get to the "loading" screen; that's
+        #  all javascript with no server communcation, so should be fast.
+        time.sleep( 0.5 )
+        WebDriverWait( browser, timeout=10 ).until(
+            lambda d: d.find_element( By.XPATH, "//h2[contains(.,'Exposures from')]" ) )
+
+        tabcontentdiv = browser.find_element( By.XPATH, "html/body/div/div/div/div/div/div/div[2]" )
+        assert tabcontentdiv.text[:15] == 'Exposures from '
+        explist = browser.find_element( By.ID, "exposure_list_table" )
+        rows = explist.find_elements( By.TAG_NAME, "tr" )
+        assert len(rows) == 2   # Just the header row
+
+        cols = rows[1].find_elements( By.XPATH, "./*" )
+        assert cols[0].text == 'c4d_230702_080904_ori.fits.fz'
+        assert cols[2].text == 'ELAIS-E1'
+        assert cols[5].text == '1'    # n_images
+        assert cols[6].text == '172'  # detections
+        assert cols[7].text == '6'    # sources
+
+        # Try to click on the exposure name, make sure we get the exposure details
+        expnamelink = cols[0].find_element( By.TAG_NAME, 'a' )
+        expnamelink.click()
+        WebDriverWait( browser, timeout=10 ).until(
+            lambda d: d.find_element( By.XPATH, "//h2[contains(.,'Exposure c4d_230702_080904_ori.fits.fz')]" ) )
+
+        # OMG I nest a lot of divs
+        tabcontentdiv = browser.find_element( By.XPATH, "html/body/div/div/div/div/div/div/div[2]" )
+        imagesdiv = tabcontentdiv.find_element( By.XPATH, "./div/div/div/div[2]/div" )
+        assert re.search( r"^Exposure has 1 images and 1 completed subtractions.*"
+                          r"6 out of 172 detections pass preliminary cuts",
+                          imagesdiv.text, re.DOTALL ) is not None
+
+
+        imagestab = imagesdiv.find_element( By.TAG_NAME, 'table' )
+        rows = imagestab.find_elements( By.TAG_NAME, 'tr' )
+        assert len(rows) == 2
+        cols = rows[1].find_elements( By.XPATH, "./*" )
+        assert re.search( r'^c4d_20230702_080904_S3_r_Sci', cols[1].text ) is not None
+
+        # Find the sources tab and click on that
+        tabbuttonsdiv = tabcontentdiv.find_element( By.XPATH, "./div/div/div/div[1]" )
+        sourcestab = tabbuttonsdiv.find_element( By.XPATH, "//.[.='Sources']" )
+        sourcestab.click()
+        # Give it half a second to go at least get to the "loading" screen; that's
+        #  all javascript with no server communcation, so should be fast.
+        time.sleep( 0.5 )
+        WebDriverWait( browser, timeout=10 ).until(
+            lambda d: d.find_element( By.XPATH, "//p[contains(.,'Sources for all successfully completed chips')]" ) )
+
+        # Now imagesdiv should have information about the sources
+        tabcontentdiv = browser.find_element( By.XPATH, "html/body/div/div/div/div/div/div/div[2]" )
+        imagesdiv = tabcontentdiv.find_element( By.XPATH, "./div/div/div/div[2]/div" )
+
+        sourcestab = imagesdiv.find_element( By.TAG_NAME, 'table' )
+        rows = sourcestab.find_elements( By.TAG_NAME, 'tr' )
+        assert len(rows) == 7
+        # check stuff about the rows?
+
+        # There is probably more we should be testing here.  Definitely.
+
+    finally:
+        # Clean up the junk Provenance, and the ProvenanceTags we created
+        with SmartSession() as session:
+            session.execute( sa.text( "DELETE FROM provenance_tags "
+                                      "WHERE tag IN ('test_webap', 'no_such_tag')" ) )
+            if junkprov is not None:
+                session.delete( junkprov )
+            session.commit()
+
+
diff --git a/util/cache.py b/util/cache.py
index d93de5be..5209080e 100644
--- a/util/cache.py
+++ b/util/cache.py
@@ -1,6 +1,20 @@
+# DO NOT USE THESE OUTSIDE OF TESTS IN tests/
+#
+# (The cache has some scariness to it, and we don't want
+# it built into the mainstream pipeline.)
+#
+# What's more, because of how it functions, tests will probably fail if
+# you don't empty the cache every time you reinitialize the database.
+# See Issue #339/
+
+# (The cache is still not useless, because if you run multiple tests,
+# the cache will be used internally to avoid recalculating stuff for
+# different tests.)
+
 import os
 import shutil
 import json
+import datetime
 
 from models.base import FileOnDiskMixin
 from util.logger import SCLogger
@@ -153,6 +167,14 @@ def copy_from_cache(cls, cache_dir, filepath):
     with its files but will not necessarily have the correct
     relationships to other objects.
 
+    WARNING : this caches database records to json files, including
+    values of things that are determined automatically (e.g. ids set
+    from database sequences) or that are foreign ids whose values depend
+    on the history of what was built into the database, and which may
+    well not be the same when the object is restored from the cache.  As
+    such, anything that uses this cache needs to very carefully go
+    thorugh all fields like that and make sure that they're wiped!
+
     Parameters
     ----------
     cls : Class that derives from FileOnDiskMixin, or that implements from_dict(dict)
@@ -181,6 +203,20 @@ def copy_from_cache(cls, cache_dir, filepath):
 
     output = cls.from_dict(json_dict)
 
+    # COMMENTED THE NEXT OUT.
+    # It's the right thing to do -- automatically assigned
+    #  database attributes should *not* be restored
+    #  from whatever they happened to be when the cache
+    #  was written -- but it was leading to mysterious
+    #  sqlalchemy errors elsewhere.
+    # if hasattr( output, 'id' ):
+    #     output.id = None
+    # now = datetime.datetime.now( tz=datetime.timezone.utc )
+    # if hasattr( output, 'created_at' ):
+    #     output.created_at = now
+    # if hasattr( output, 'modified' ):
+    #     output.modified = now
+
     # copy any associated files
     if isinstance(output, FileOnDiskMixin):
         # if fullpath ends in filepath_extensions[0]
@@ -212,6 +248,8 @@ def copy_list_from_cache(cls, cache_dir, filepath):
     it will be able to figure out where all the associated files are saved
     based on the filepath and extensions in the JSON file.
 
+    See the WARNING in copy_from_cache docstring.
+
     Parameters
     ----------
     cls: Class that derives from FileOnDiskMixin, or that implements from_dict(dict)
@@ -225,6 +263,7 @@ def copy_list_from_cache(cls, cache_dir, filepath):
     -------
     output: list
         The list of reconstructed objects, of the same type as the class.
+
     """
     # allow user to give an absolute path, so long as it is in the cache dir
     if filepath.startswith(cache_dir):
@@ -239,8 +278,18 @@ def copy_list_from_cache(cls, cache_dir, filepath):
         json_list = json.load(fp)
 
     output = []
+    now = datetime.datetime.now( tz=datetime.timezone.utc )
     for obj_dict in json_list:
-        output.append(cls.from_dict(obj_dict))
+        newobj = cls.from_dict( obj_dict )
+        # COMMENTED THE NEXT OUT.
+        # Search above for "COMMENTED THE NEXT OUT" for reason.
+        # if hasattr( newobj, 'id' ):
+        #     newobj.id = None
+        # if hasattr( newobj, 'created_at' ):
+        #     newobj.created_at = now
+        # if hasattr( newobj, 'modified' ):
+        #     newobj.modified = now
+        output.append( newobj )
 
     if len(output) == 0:
         return []
diff --git a/util/radec.py b/util/radec.py
index 9740e352..11f07b69 100644
--- a/util/radec.py
+++ b/util/radec.py
@@ -5,8 +5,8 @@
 from astropy.coordinates import SkyCoord, BarycentricTrueEcliptic
 import astropy.units as u
 
-_radecparse = re.compile( '^ *(?P<sign>[\-\+])? *(?P<d>[0-9]{1,2}): *(?P<m>[0-9]{1,2}):'
-                          ' *(?P<s>[0-9]{1,2}(\.[0-9]*)?) *$' )
+_radecparse = re.compile( r'^ *(?P<sign>[\-\+])? *(?P<d>[0-9]{1,2}): *(?P<m>[0-9]{1,2}):'
+                          r' *(?P<s>[0-9]{1,2}(\.[0-9]*)?) *$' )
 
 
 def parse_sexigesimal_degrees( strval, hours=False, positive=None ):
diff --git a/util/util.py b/util/util.py
index dcaeb24d..979eb6c1 100644
--- a/util/util.py
+++ b/util/util.py
@@ -453,7 +453,7 @@ def as_datetime( string ):
     ---------
       string : str or datetime.datetime
          The string to convert.  If a datetime.datetime, the return
-         value is just this.  If none or an empty string ("^\s*$"), will
+         value is just this.  If none or an empty string ("^\\s*$"), will
          return None.  Otherwise, must be a string that
          dateutil.parser.parse can handle.
 
diff --git a/webap/seechange_webap.py b/webap/seechange_webap.py
index 0d10e4b8..cce670eb 100644
--- a/webap/seechange_webap.py
+++ b/webap/seechange_webap.py
@@ -68,11 +68,29 @@ def mainpage():
 
 # **********************************************************************
 
+@app.route( "/provtags", methods=['POST'], strict_slashes=False )
+def provtags():
+    try:
+        conn = next( dbconn() )
+        cursor = conn.cursor()
+        cursor.execute( 'SELECT DISTINCT ON(tag) tag FROM provenance_tags ORDER BY tag' )
+        return { 'status': 'ok',
+                 'provenance_tags': [ row[0] for row in cursor.fetchall() ]
+                }
+    except Exception as ex:
+        app.logger.exception( ex )
+        return { 'status': 'error',
+                 'error': f'Exception: {ex}' }
+
+
+# **********************************************************************
+
 @app.route( "/exposures", methods=['POST'], strict_slashes=False )
 def exposures():
     try:
         data = { 'startdate': None,
-                 'enddate': None
+                 'enddate': None,
+                 'provenancetag': None,
                 }
         if flask.request.is_json:
             data.update( flask.request.json )
@@ -84,30 +102,67 @@ def exposures():
 
         conn = next( dbconn() )
         cursor = conn.cursor()
-        # TODO : deal with provenance!
-        # (We need some kind of provenance tagging table, so that the user can specify
-        # a user-readable name (e.g. "default", "latest", "dr1", whatever) that specifies
-        # a set of provenances to search.  One of these names must be all the provenances
-        # we're using "right now" in the active pipeline; that will be the one that
-        # (by default) the webap uses.
-        q = ( 'SELECT m.id, m.filepath, m.mjd, m.target, m.filter, m.filter_array, m.exp_time, '
-              '       m.n_images, m.n_cutouts, m.n_measurements, '
-              '       SUM( CASE WHEN r.success THEN 1 ELSE 0 END ) AS n_successim, '
-              '       SUM( CASE WHEN r.error_message IS NOT NULL THEN 1 ELSE 0 END ) as n_errors '
-              'FROM ( '
-              '  SELECT e.id, e.filepath, e.mjd, e.target, e.filter, e.filter_array, e.exp_time, '
-              '         COUNT(DISTINCT(i.id)) AS n_images, COUNT(c.id) AS n_cutouts, COUNT(m.id) AS n_measurements '
-              '  FROM exposures e '
-              '  LEFT JOIN images i ON i.exposure_id=e.id '
-              '  LEFT JOIN image_upstreams_association ias ON ias.upstream_id=i.id '
-              '  LEFT JOIN images s ON s.id = ias.downstream_id AND s.is_sub '
-              '  LEFT JOIN source_lists sl ON sl.image_id=s.id '
-              '  LEFT JOIN cutouts c ON c.sources_id=sl.id '
-              '  LEFT JOIN measurements m ON m.cutouts_id=c.id '
-              '  LEFT JOIN reports r ON r.exposure_id=e.id ' )
+
+        # Gonna do this in three steps.  First, get all the images with
+        #  counts of source lists and counts of measurements in a temp
+        #  table, then do the sums and things on that temp table.
+        # Filtering on provenance tags makes this more complicated, so
+        #  we'll do a different query if we're doing that.  Truthfully,
+        #  asking for all provenance tags is going to be a mess for the
+        #  user....  perhaps we should disable it?
+        haveputinwhere = False
         subdict = {}
+        if data['provenancetag'] is None:
+            q = ( 'SELECT e.id, e.filepath, e.mjd, e.target, e.filter, e.filter_array, e.exp_time, '
+                  '       i.id AS imgid, s.id AS subid, sl.id AS slid, sl.num_sources, '
+                  '       COUNT(m.id) AS num_measurements '
+                  'INTO TEMP TABLE temp_imgs '
+                  'FROM exposures e '
+                  'LEFT JOIN images i ON i.exposure_id=e.id '
+                  'LEFT JOIN ( '
+                  '  SELECT su.id, ias.upstream_id '
+                  '  FROM images su '
+                  '  INNER JOIN image_upstreams_association ias ON ias.downstream_id=su.id '
+                  '  WHERE su.is_sub '
+                  ') s ON s.upstream_id=i.id '
+                  'LEFT JOIN source_lists sl ON sl.image_id=s.id '
+                  'LEFT JOIN cutouts cu ON cu.sources_id=sl.id '
+                  'LEFT JOIN measurements m ON m.cutouts_id=cu.id '
+                  'GROUP BY e.id, i.id, s.id, sl.id '
+                 )
+        else:
+            q = ( 'SELECT e.id, e.filepath, e.mjd, e.target, e.filter, e.filter_array, e.exp_time, '
+                  '       i.id AS imgid, s.id AS subid, sl.id AS slid, sl.num_sources, '
+                  '       COUNT(m.id) AS num_measurements '
+                  'INTO TEMP TABLE temp_imgs '
+                  'FROM exposures e '
+                  'LEFT JOIN ( '
+                  '  SELECT im.id, im.exposure_id FROM images im '
+                  '  INNER JOIN provenance_tags impt ON impt.provenance_id=im.provenance_id AND impt.tag=%(provtag)s '
+                  ') i ON i.exposure_id=e.id '
+                  'LEFT JOIN ( '
+                  '  SELECT su.id, ias.upstream_id FROM images su '
+                  '  INNER JOIN image_upstreams_association ias ON ias.downstream_id=su.id AND su.is_sub '
+                  '  INNER JOIN provenance_tags supt ON supt.provenance_id=su.provenance_id AND supt.tag=%(provtag)s '
+                  ') s ON s.upstream_id=i.id '
+                  'LEFT JOIN ( '
+                  '  SELECT sli.id, sli.image_id, sli.num_sources FROM source_lists sli '
+                  '  INNER JOIN provenance_tags slpt ON slpt.provenance_id=sli.provenance_id AND slpt.tag=%(provtag)s '
+                  ') sl ON sl.image_id=s.id '
+                  'LEFT JOIN ( '
+                  '  SELECT cu.id, cu.sources_id FROM cutouts cu '
+                  '  INNER JOIN provenance_tags cupt ON cu.provenance_id=cupt.provenance_id AND cupt.tag=%(provtag)s '
+                  ') c ON c.sources_id=sl.id '
+                  'LEFT JOIN ( '
+                  '  SELECT meas.id, meas.cutouts_id FROM measurements meas '
+                  '  INNER JOIN provenance_tags mept ON mept.provenance_id=meas.provenance_id AND mept.tag=%(provtag)s '
+                  ') m ON m.cutouts_id=c.id '
+                  'INNER JOIN provenance_tags ept ON ept.provenance_id=e.provenance_id AND ept.tag=%(provtag)s '
+                  'GROUP BY e.id, i.id, s.id, sl.id, sl.num_sources '
+                 )
+            subdict['provtag'] = data['provenancetag']
         if ( t0 is not None ) or ( t1 is not None ):
-            q += "  WHERE "
+            q += 'WHERE '
             if t0 is not None:
                 q += 'e.mjd >= %(t0)s'
                 subdict['t0'] = t0
@@ -115,15 +170,46 @@ def exposures():
                 if t0 is not None: q += ' AND '
                 q += 'e.mjd <= %(t1)s'
                 subdict['t1'] = t1
-        q += ( '   GROUP BY e.id ' # ,e.filepath,e.mjd,e.target,e.filter,e.filter_array,e.exp_time '
-               '   ORDER BY e.mjd, e.filter, e.filter_array ' )
-
-        q += ( ') m '
-               'LEFT JOIN reports r ON m.id=r.exposure_id '
-               'GROUP BY m.id, m.filepath, m.mjd, m.target, m.filter, m.filter_array, m.exp_time, '
-               '         m.n_images, m.n_cutouts, m.n_measurements ' )
 
         cursor.execute( q, subdict )
+
+        # Now run a second query to count and sum those things
+        # These numbers will be wrong (double-counts) if not filtering on a provenance tag, or if the
+        #   provenance tag includes multiple provenances for a given step!
+        q = ( 'SELECT t.id, t.filepath, t.mjd, t.target, t.filter, t.filter_array, t.exp_time, '
+              '  COUNT(t.subid) AS num_subs, SUM(t.num_sources) AS num_sources, '
+              '  SUM(t.num_measurements) AS num_measurements '
+              'INTO TEMP TABLE temp_imgs_2 '
+              'FROM temp_imgs t '
+              'GROUP BY t.id, t.filepath, t.mjd, t.target, t.filter, t.filter_array, t.exp_time '
+             )
+
+        cursor.execute( q )
+
+        # Run a third query count reports
+        subdict = {}
+        q = ( 'SELECT t.id, t.filepath, t.mjd, t.target, t.filter, t.filter_array, t.exp_time, '
+              '  t.num_subs, t.num_sources, t.num_measurements, '
+              '  SUM( CASE WHEN r.success THEN 1 ELSE 0 END ) as n_successim, '
+              '  SUM( CASE WHEN r.error_message IS NOT NULL THEN 1 ELSE 0 END ) AS n_errors '
+              'FROM temp_imgs_2 t '
+             )
+        if data['provenancetag'] is None:
+            q += 'LEFT JOIN reports r ON r.exposure_id=t.id '
+        else:
+            q += ( 'LEFT JOIN ( '
+                   '  SELECT re.exposure_id, re.success, re.error_message '
+                   '  FROM reports re '
+                   '  INNER JOIN provenance_tags rept ON rept.provenance_id=re.provenance_id AND rept.tag=%(provtag)s '
+                   ') r ON r.exposure_id=t.id '
+                  )
+            subdict['provtag'] = data['provenancetag']
+        # I wonder if making a primary key on the temp table would be more efficient than
+        #    all these columns in GROUP BY?  Investigate this.
+        q += ( 'GROUP BY t.id, t.filepath, t.mjd, t.target, t.filter, t.filter_array, t.exp_time, '
+               '  t.num_subs, t.num_sources, t.num_measurements ' )
+
+        cursor.execute( q, subdict  )
         columns = { cursor.description[i][0]: i for i in range(len(cursor.description)) }
 
         ids = []
@@ -132,9 +218,9 @@ def exposures():
         target = []
         filtername = []
         exp_time = []
-        n_images = []
-        n_cutouts = []
+        n_subs = []
         n_sources = []
+        n_measurements = []
         n_successim = []
         n_errors = []
 
@@ -152,15 +238,16 @@ def exposures():
                               f"filter_array={row[columns['filter_array']]} type {row[columns['filter_array']]}" )
             filtername.append( row[columns['filter']] )
             exp_time.append( row[columns['exp_time']] )
-            n_images.append( row[columns['n_images']] )
-            n_cutouts.append( row[columns['n_cutouts']] )
-            n_sources.append( row[columns['n_measurements']] )
+            n_subs.append( row[columns['num_subs']] )
+            n_sources.append( row[columns['num_sources']] )
+            n_measurements.append( row[columns['num_measurements']] )
             n_successim.append( row[columns['n_successim']] )
             n_errors.append( row[columns['n_errors']] )
 
         return { 'status': 'ok',
                  'startdate': t0,
                  'enddate': t1,
+                 'provenance_tag': data['provenancetag'],
                  'exposures': {
                      'id': ids,
                      'name': name,
@@ -168,9 +255,9 @@ def exposures():
                      'target': target,
                      'filter': filtername,
                      'exp_time': exp_time,
-                     'n_images': n_images,
-                     'n_cutouts': n_cutouts,
+                     'n_subs': n_subs,
                      'n_sources': n_sources,
+                     'n_measurements': n_measurements,
                      'n_successim': n_successim,
                      'n_errors': n_errors,
                  }
@@ -186,47 +273,117 @@ def exposures():
 
 # **********************************************************************
 
-@app.route( "/exposure_images/<expid>", methods=['GET', 'POST'], strict_slashes=False )
-def exposure_images( expid ):
+@app.route( "/exposure_images/<expid>/<provtag>", methods=['GET', 'POST'], strict_slashes=False )
+def exposure_images( expid, provtag ):
     try:
         conn = next( dbconn() )
         cursor = conn.cursor()
-        # TODO : deal with provenance!
-        q = ( 'SELECT i.id, i.filepath, i.ra, i.dec, i.gallat, i.section_id, i.fwhm_estimate, '
-              '       i.zero_point_estimate, i.lim_mag_estimate, i.bkg_mean_estimate, i.bkg_rms_estimate, '
-              '       s.id AS subid, COUNT(c.id) AS numcutouts, COUNT(m.id) AS nummeasurements, '
-              '       r.error_step, r.error_type, r.error_message, r.warnings, '
-              '       r.process_memory, r.process_runtime, r.progress_steps_bitflag, r.products_exist_bitflag '
+
+        # Going to do this in a few steps again.  Might be able to write one
+        # bigass query, but it's probably more efficient to use temp tables.
+        # Easier to build the queries that way too.
+
+        subdict = { 'expid': int(expid), 'provtag': provtag }
+
+        # Step 1: collect image info into temp_exposure_images
+        q = ( 'SELECT i.id, i.filepath, i.ra, i.dec, i.gallat, i.exposure_id, i.section_id, i.fwhm_estimate, '
+              '       i.zero_point_estimate, i.lim_mag_estimate, i.bkg_mean_estimate, i.bkg_rms_estimate '
+              'INTO TEMP TABLE temp_exposure_images '
               'FROM images i '
-              'LEFT JOIN image_upstreams_association ias ON ias.upstream_id=i.id '
-              'LEFT JOIN images s ON s.id = ias.downstream_id AND s.is_sub '
-              'LEFT JOIN source_lists sl ON sl.image_id=s.id '
-              'LEFT JOIN cutouts c ON  c.sources_id=sl.id '
-              'LEFT JOIN measurements m ON c.id=m.cutouts_id '
-              'LEFT JOIN reports r ON r.exposure_id=i.exposure_id AND r.section_id=i.section_id '
-              'WHERE i.is_sub=false AND i.exposure_id=%(expid)s '
-              'GROUP BY i.id,s.id,r.id '
-              'ORDER BY i.section_id,s.id ' )
-        app.logger.debug( f"Getting images for exposure {expid}; query = {cursor.mogrify(q, {'expid': int(expid)})}" )
-        cursor.execute( q, { 'expid': int(expid) } )
+              'INNER JOIN provenance_tags ipt ON ipt.provenance_id=i.provenance_id '
+              'WHERE i.exposure_id=%(expid)s '
+              '  AND ipt.tag=%(provtag)s '
+             )
+        #  app.logger.debug( f"exposure_images finding images; query: {cursor.mogrify(q,subdict)}" )
+        cursor.execute( q, subdict )
+        cursor.execute( "ALTER TABLE temp_exposure_images ADD PRIMARY KEY(id)" )
+        # ****
+        # cursor.execute( "SELECT COUNT(*) FROM temp_exposure_images" )
+        # app.logger.debug( f"Got {cursor.fetchone()[0]} images" )
+        # ****
+
+        # Step 2: count measurements by joining temp_exposure_images to many things.
+        q = ( 'SELECT i.id, s.id AS subid, sl.num_sources AS numsources, COUNT(m.id) AS nummeasurements '
+              'INTO TEMP TABLE temp_exposure_images_counts '
+              'FROM temp_exposure_images i '
+              'INNER JOIN image_upstreams_association ias ON ias.upstream_id=i.id '
+              'INNER JOIN images s ON s.is_sub AND s.id=ias.downstream_id '
+              'INNER JOIN provenance_tags spt ON spt.provenance_id=s.provenance_id AND spt.tag=%(provtag)s '
+              'LEFT JOIN ( '
+              '  SELECT sli.id, sli.image_id, sli.num_sources FROM source_lists sli '
+              '  INNER JOIN provenance_tags slpt ON slpt.provenance_id=sli.provenance_id AND slpt.tag=%(provtag)s '
+              ') sl ON sl.image_id=s.id '
+              'LEFT JOIN ('
+              '  SELECT cu.id, cu.sources_id FROM cutouts cu '
+              '  INNER JOIN provenance_tags cupt ON cupt.provenance_id=cu.provenance_id AND cupt.tag=%(provtag)s '
+              ') c ON c.sources_id=sl.id '
+              'LEFT JOIN ('
+              '  SELECT me.id, me.cutouts_id FROM measurements me '
+              '  INNER JOIN provenance_tags mept ON mept.provenance_id=me.provenance_id AND mept.tag=%(provtag)s '
+              ') m ON m.cutouts_id=c.id '
+              'GROUP BY i.id, s.id, sl.num_sources '
+             )
+        # app.logger.debug( f"exposure_images counting sources: query {cursor.mogrify(q,subdict)}" )
+        cursor.execute( q, subdict )
+        # We will get an error here if there are multiple rows for a given image.
+        # (Which is good; there shouldn't be multiple rows!  There should only be
+        # one (e.g.) source list child of the image for a given provenance tag, etc.)
+        cursor.execute( "ALTER TABLE temp_exposure_images_counts ADD PRIMARY KEY(id)" )
+        # ****
+        # cursor.execute( "SELECT COUNT(*) FROM temp_exposure_images_counts" )
+        # app.logger.debug( f"Got {cursor.fetchone()[0]} rows with counts" )
+        # ****
+
+        # Step 3: join to the report table.  This one is probably mergeable with step 1.
+        q = ( 'SELECT i.id, r.error_step, r.error_type, r.error_message, r.warnings, '
+              '       r.process_memory, r.process_runtime, r.progress_steps_bitflag, r.products_exist_bitflag '
+              'INTO TEMP TABLE temp_exposure_images_reports '
+              'FROM temp_exposure_images i '
+              'INNER JOIN ( '
+              '  SELECT re.exposure_id, re.section_id, '
+              '         re.error_step, re.error_type, re.error_message, re.warnings, '
+              '         re.process_memory, re.process_runtime, re.progress_steps_bitflag, re.products_exist_bitflag '
+              '  FROM reports re '
+              '  INNER JOIN provenance_tags rept ON rept.provenance_id=re.provenance_id AND rept.tag=%(provtag)s '
+              ') r ON r.exposure_id=i.exposure_id AND r.section_id=i.section_id '
+             )
+        # app.logger.debug( f"exposure_images getting reports; query {cursor.mogrify(q,subdict)}" )
+        cursor.execute( q, subdict )
+        # Again, we will get an error here if there are multiple rows for a given image
+        cursor.execute( "ALTER TABLE temp_exposure_images_reports ADD PRIMARY KEY(id)" )
+        # ****
+        # cursor.execute( "SELECT COUNT(*) FROM temp_exposure_images_reports" )
+        # app.logger.debug( f"Got {cursor.fetchone()[0]} rows with reports" )
+        # ****
+
+        cursor.execute( "SELECT t1.*, t2.*, t3.* "
+                        "FROM temp_exposure_images t1 "
+                        "LEFT JOIN temp_exposure_images_counts t2 ON t1.id=t2.id "
+                        "LEFT JOIN temp_exposure_images_reports t3 ON t1.id=t3.id "
+                        "ORDER BY t1.section_id" )
         columns = { cursor.description[i][0]: i for i in range(len(cursor.description)) }
-        app.logger.debug( f"Got {len(columns)} columns, {cursor.rowcount} rows" )
+        rows = cursor.fetchall()
+        # app.logger.debug( f"exposure_images got {len(rows)} rows from the final query." )
 
         fields = ( 'id', 'ra', 'dec', 'gallat', 'section_id', 'fwhm_estimate', 'zero_point_estimate',
                    'lim_mag_estimate', 'bkg_mean_estimate', 'bkg_rms_estimate',
-                   'numcutouts', 'nummeasurements', 'subid',
+                   'numsources', 'nummeasurements', 'subid',
                    'error_step', 'error_type', 'error_message', 'warnings',
                    'process_memory', 'process_runtime', 'progress_steps_bitflag', 'products_exist_bitflag' )
 
-        retval = { 'status': 'ok', 'name': [] }
+        retval = { 'status': 'ok',
+                   'provenancetag': provtag,
+                   'name': [] }
+
         for field in fields :
             retval[ field ] = []
 
         lastimg = -1
+        multiples = set()
         slashre = re.compile( '^.*/([^/]+)$' )
-        for row in cursor.fetchall():
+        for row in rows:
             if row[columns['id']] == lastimg:
-                app.logger.warning( f'Multiple subtractions for image {lastimg}, need to deal with provenance!' )
+                multiples.add( row[columns['id']] )
                 continue
             lastimg = row[columns['id']]
 
@@ -235,6 +392,13 @@ def exposure_images( expid ):
             for field in fields:
                 retval[field].append( row[columns[field]] )
 
+        if len(multiples) != 0:
+            return { 'status': 'error',
+                     'error': ( f'Some images had multiple rows in the query; this probably indicates '
+                                f'that the reports table is not well-formed.  Or maybe something else. '
+                                f'offending images: {multiples}' ) }
+
+        app.logger.debug( f"exposure_images returning {retval}" )
         return retval
 
     except Exception as ex:
@@ -244,20 +408,21 @@ def exposure_images( expid ):
 
 # **********************************************************************
 
-@app.route( "/png_cutouts_for_sub_image/<int:exporsubid>/<int:issubid>/<int:nomeas>",
+@app.route( "/png_cutouts_for_sub_image/<int:exporsubid>/<provtag>/<int:issubid>/<int:nomeas>",
             methods=['GET', 'POST'], strict_slashes=False )
-@app.route( "/png_cutouts_for_sub_image/<int:exporsubid>/<int:issubid>/<int:nomeas>/<int:limit>",
+@app.route( "/png_cutouts_for_sub_image/<int:exporsubid>/<provtag>/<int:issubid>/<int:nomeas>/<int:limit>",
             methods=['GET', 'POST'], strict_slashes=False )
-@app.route( "/png_cutouts_for_sub_image/<int:exporsubid>/<int:issubid>/<int:nomeas>/<int:limit>/<int:offset>",
+@app.route( "/png_cutouts_for_sub_image/<int:exporsubid>/<provtag>/<int:issubid>/<int:nomeas>/"
+            "<int:limit>/<int:offset>",
             methods=['GET', 'POST'], strict_slashes=False )
-def png_cutouts_for_sub_image( exporsubid, issubid, nomeas, limit=None, offset=0 ):
+def png_cutouts_for_sub_image( exporsubid, provtag, issubid, nomeas, limit=None, offset=0 ):
     try:
         data = { 'sortby': 'fluxdesc_chip_index' }
         if flask.request.is_json:
             data.update( flask.request.json )
 
         app.logger.debug( f"Processing {flask.request.url}" )
-        if nomeas:
+        if issubid:
             app.logger.debug( f"Looking for cutouts from subid {exporsubid} ({'with' if nomeas else 'without'} "
                               f"missing-measurements)" )
         else:
@@ -266,7 +431,6 @@ def png_cutouts_for_sub_image( exporsubid, issubid, nomeas, limit=None, offset=0
 
         conn = next( dbconn() )
         cursor = conn.cursor()
-        # TODO : deal with provenance!
         # TODO : r/b and sorting
 
         # Figure out the subids, zeropoints, backgrounds, and apertures we need
@@ -282,23 +446,36 @@ def png_cutouts_for_sub_image( exporsubid, issubid, nomeas, limit=None, offset=0
         q = ( 'SELECT s.id AS subid, z.zp, z.dzp, z.aper_cor_radii, z.aper_cors, '
               '  i.id AS imageid, i.bkg_mean_estimate '
               'FROM images s '
-              'INNER JOIN image_upstreams_association ias ON ias.downstream_id=s.id '
-              '   AND s.ref_image_id != ias.upstream_id '
-              'INNER JOIN images i ON ias.upstream_id=i.id '
-              'INNER JOIN source_lists sl ON sl.image_id=i.id '
-              'INNER JOIN zero_points z ON sl.id=z.sources_id ' )
+              )
+        if not issubid:
+            # If we got an exposure id, make sure only to get subtractions of the requested provenance
+            q += 'INNER JOIN provenance_tags spt ON s.provenance_id=spt.provenance_id AND spt.tag=%(provtag)s '
+        q +=  ( 'INNER JOIN image_upstreams_association ias ON ias.downstream_id=s.id '
+                '   AND s.ref_image_id != ias.upstream_id '
+                'INNER JOIN images i ON ias.upstream_id=i.id '
+                'INNER JOIN source_lists sl ON sl.image_id=i.id '
+                'INNER JOIN provenance_tags slpt ON sl.provenance_id=slpt.provenance_id AND slpt.tag=%(provtag)s '
+                'INNER JOIN zero_points z ON sl.id=z.sources_id ' )
+        # (Don't need to check provenance tag of zeropoint since we have a
+        # 1:1 relationship between zeropoints and source lists.  Don't need
+        # to check image provenance, because there will be a single image id
+        # upstream of each sub id.
+
         if issubid:
             q += 'WHERE s.id=%(subid)s '
-            cursor.execute( q, { 'subid': exporsubid } )
+            cursor.execute( q, { 'subid': exporsubid, 'provtag': provtag } )
             cols = { cursor.description[i][0]: i for i in range(len(cursor.description)) }
             rows = cursor.fetchall()
             if len(rows) > 1:
-                app.logger.warning( f"Multiple zeropoints for subid {exporsubid}, deal with provenance" )
+                app.logger.error( f"Multiple rows for subid {exporsubid}, provenance tag {provtag} "
+                                  f"is not well-defined, or something else is wrong." )
+                return { 'status': 'error',
+                         'error': ( f"Multiple rows for subid {exporsubid}, provenance tag {provtag} "
+                                    f"is not well-defined, or something else is wrong." ) }
             if len(rows) == 0:
                 app.logger.error( f"Couldn't find a zeropoint for subid {exporsubid}" )
-                zp = -99
-                dzp = -99
-                imageid = -99
+                return { 'status': 'error',
+                         'error': f"Coudn't find zeropoint for subid {exporsubid}" }
             subids.append( exporsubid )
             zps[exporsubid] = rows[0][cols['zp']]
             dzps[exporsubid] = rows[0][cols['dzp']]
@@ -310,13 +487,16 @@ def png_cutouts_for_sub_image( exporsubid, issubid, nomeas, limit=None, offset=0
         else:
             q += ( 'INNER JOIN exposures e ON i.exposure_id=e.id '
                    'WHERE e.id=%(expid)s ORDER BY i.section_id  ' )
-            cursor.execute( q, { 'expid': exporsubid } )
+            # Don't need to verify provenance here, because there's just going to be one expid!
+            cursor.execute( q, { 'expid': exporsubid, 'provtag': provtag } )
             cols = { cursor.description[i][0]: i for i in range(len(cursor.description)) }
             rows = cursor.fetchall()
             for row in rows:
                 subid = row[cols['subid']]
                 if ( subid in subids ):
-                    app.logger.warning( f"subid {subid} showed up more than once in zp qury, deal with provenance" )
+                    app.logger.error( f"subid {subid} showed up more than once in zp query" )
+                    return { 'status': 'error',
+                             'error': f"subid {subid} showed up more than once in zp query" }
                 subids.append( subid )
                 zps[subid] = row[cols['zp']]
                 dzps[subid] = row[cols['dzp']]
@@ -324,45 +504,64 @@ def png_cutouts_for_sub_image( exporsubid, issubid, nomeas, limit=None, offset=0
                 newbkgs[subid] = row[cols['bkg_mean_estimate']]
                 aperradses[subid] = row[cols['aper_cor_radii']]
                 apercorses[subid] = row[cols['aper_cors']]
+        app.logger.debug( f'Got {len(subids)} subtractions.' )
 
-        app.logger.debug( f"Getting cutouts for sub images {subids}" )
-        q = ( 'SELECT c.id AS id, c.filepath, c.ra, c.dec, c.x, c.y, c.index_in_sources, m.best_aperture, '
-              '       m.flux, m.dflux, m.name, m.is_test, m.is_fake, '
-              '       m.ra AS measra, m.dec AS measdec, s.id AS subid, s.section_id '
+        app.logger.debug( f"Getting cutouts files for sub images {subids}" )
+        q = ( 'SELECT c.filepath,s.id AS subimageid,sl.filepath AS sources_path '
               'FROM cutouts c '
+              'INNER JOIN provenance_tags cpt ON cpt.provenance_id=c.provenance_id AND cpt.tag=%(provtag)s '
+              'INNER JOIN source_lists sl ON c.sources_id=sl.id '
+              'INNER JOIN images s ON sl.image_id=s.id '
+              'WHERE s.id IN %(subids)s ' )
+        # Don't have to check the source_lists provenance tag because the cutouts provenance
+        # tag cut will limit us to a single source_list for each cutouts
+        cursor.execute( q, { 'subids': tuple(subids), 'provtag': provtag } )
+        cols = { cursor.description[i][0]: i for i in range(len(cursor.description)) }
+        rows = cursor.fetchall()
+        cutoutsfiles = { c[cols['subimageid']]: c[cols['filepath']] for c in rows }
+        sourcesfiles = { c[cols['subimageid']]: c[cols['sources_path']] for c in rows }
+        app.logger.debug( f"Got: {cutoutsfiles}" )
+
+        app.logger.debug( f"Getting measurements for sub images {subids}" )
+        q = ( 'SELECT m.ra AS measra, m.dec AS measdec, m.index_in_sources, m.best_aperture, '
+              '       m.flux, m.dflux, m.psfflux, m.dpsfflux, m.is_bad, m.name, m.is_test, m.is_fake, '
+              '       s.id AS subid, s.section_id '
+              'FROM cutouts c '
+              'INNER JOIN provenance_tags cpt ON cpt.provenance_id=c.provenance_id AND cpt.tag=%(provtag)s '
               'INNER JOIN source_lists sl ON c.sources_id=sl.id '
               'INNER JOIN images s ON sl.image_id=s.id '
               'LEFT JOIN '
-              '  ( SELECT meas.cutouts_id AS meascutid, meas.ra, meas.dec, meas.best_aperture, '
-              '           meas.flux_apertures[meas.best_aperture+1] AS flux, '
-              '           meas.flux_apertures_err[meas.best_aperture+1] AS dflux, obj.name, obj.is_test, obj.is_fake '
+              '  ( SELECT meas.cutouts_id AS meascutid, meas.index_in_sources, meas.ra, meas.dec, meas.is_bad, '
+              '           meas.best_aperture, meas.flux_apertures[meas.best_aperture+1] AS flux, '
+              '           meas.flux_apertures_err[meas.best_aperture+1] AS dflux, '
+              '           meas.flux_psf AS psfflux, meas.flux_psf_err AS dpsfflux, '
+              '           obj.name, obj.is_test, obj.is_fake '
               '    FROM measurements meas '
-              '    INNER JOIN objects obj ON meas.object_id=obj.id '
-             '   ) AS m ON m.meascutid=c.id '
-              'WHERE s.id IN %(subids)s ' )
+              '    INNER JOIN provenance_tags mpt ON meas.provenance_id=mpt.provenance_id AND mpt.tag=%(provtag)s '
+              '    INNER JOIN objects obj ON meas.object_id=obj.id ' )
         if not nomeas:
-            q += "AND m.best_aperture IS NOT NULL "
+            q += '    WHERE NOT meas.is_bad '
+        q += ( '   ) AS m ON m.meascutid=c.id '
+               'WHERE s.id IN %(subids)s ' )
         if data['sortby'] == 'fluxdesc_chip_index':
-            q += 'ORDER BY flux DESC NULLS LAST,s.section_id,c.index_in_sources '
+            q += 'ORDER BY flux DESC NULLS LAST,s.section_id,m.index_in_sources '
         else:
             raise RuntimeError( f"Unknown sort criterion {data['sortby']}" )
         if limit is not None:
             q += 'LIMIT %(limit)s OFFSET %(offset)s'
-        subdict = { 'subids': tuple(subids), 'limit': limit, 'offset': offset }
+        subdict = { 'subids': tuple(subids), 'provtag': provtag, 'limit': limit, 'offset': offset }
+        app.logger.debug( f"Sending query to get measurements: {cursor.mogrify(q,subdict)}" )
         cursor.execute( q, subdict );
         cols = { cursor.description[i][0]: i for i in range(len(cursor.description)) }
         rows = cursor.fetchall()
         app.logger.debug( f"Got {len(cols)} columns, {len(rows)} rows" )
 
-        hdf5files = {}
         retval = { 'status': 'ok',
                    'cutouts': {
                        'sub_id': [],
                        'image_id': [],
                        'section_id': [],
-                       'id': [],
-                       'ra': [],
-                       'dec': [],
+                       'source_index': [],
                        'measra': [],
                        'measdec': [],
                        'flux': [],
@@ -370,6 +569,7 @@ def png_cutouts_for_sub_image( exporsubid, issubid, nomeas, limit=None, offset=0
                        'aperrad': [],
                        'mag': [],
                        'dmag': [],
+                       'is_bad': [],
                        'objname': [],
                        'is_test': [],
                        'is_fake': [],
@@ -385,11 +585,14 @@ def png_cutouts_for_sub_image( exporsubid, issubid, nomeas, limit=None, offset=0
 
         scaler = astropy.visualization.ZScaleInterval()
 
-        for row in rows:
-            subid = row[cols['subid']]
-            if row[cols['filepath']] not in hdf5files:
-                hdf5files[row[cols['filepath']]] = h5py.File( ARCHIVE_DIR / row[cols['filepath']], 'r' )
-            grp = hdf5files[row[cols['filepath']]][f'source_{row[cols["index_in_sources"]]}']
+        # Open all the hdf5 files
+
+        hdf5files = {}
+        for subid in cutoutsfiles.keys():
+            hdf5files[ subid ] = h5py.File( ARCHIVE_DIR / cutoutsfiles[subid], 'r' )
+
+        def append_to_retval( subid, index_in_sources, row ):
+            grp = hdf5files[ subid ][f'source_index_{row[cols["index_in_sources"]]}']
             vmin, vmax = scaler.get_limits( grp['new_data'] )
             scalednew = ( grp['new_data'] - vmin ) * 255. / ( vmax - vmin )
             # TODO : there's an assumption here that the ref is background
@@ -429,25 +632,28 @@ def png_cutouts_for_sub_image( exporsubid, issubid, nomeas, limit=None, offset=0
             retval['cutouts']['new_png'].append( base64.b64encode( newim.getvalue() ).decode('ascii') )
             retval['cutouts']['ref_png'].append( base64.b64encode( refim.getvalue() ).decode('ascii') )
             retval['cutouts']['sub_png'].append( base64.b64encode( subim.getvalue() ).decode('ascii') )
-            retval['cutouts']['id'].append( row[cols['id']] )
-            retval['cutouts']['ra'].append( row[cols['ra']] )
-            retval['cutouts']['dec'].append( row[cols['dec']] )
-            retval['cutouts']['x'].append( row[cols['x']] )
-            retval['cutouts']['y'].append( row[cols['y']] )
+            # TODO : if we want to return x and y, we also have
+            #   to read the source list file...
+            # We could also copy them to the cutouts file as attributes
+            # retval['cutouts']['x'].append( row[cols['x']] )
+            # retval['cutouts']['y'].append( row[cols['y']] )
             retval['cutouts']['w'].append( scalednew.shape[0] )
             retval['cutouts']['h'].append( scalednew.shape[1] )
+
+            retval['cutouts']['is_bad'].append( row[cols['is_bad']] )
             retval['cutouts']['objname'].append( row[cols['name']] )
             retval['cutouts']['is_test'].append( row[cols['is_test']] )
             retval['cutouts']['is_fake'].append( row[cols['is_fake']] )
 
-            # Measurements columns
-
-            # WARNING : assumption here that the aper cor radii list in the
-            #   zero point is the same as was used in the measurements.
-            # (I think that's a good assumption, but still.)
+            if row[cols['psfflux']] is None:
+                flux = row[cols['flux']]
+                dflux = row[cols['dflux']]
+                aperrad = aperradses[subid][ row[cols['best_aperture']] ]
+            else:
+                flux = row[cols['psfflux']]
+                dflux = row[cols['dpsfflux']]
+                aperrad = 0.
 
-            flux = row[cols['flux']]
-            dflux = row[cols['dflux']]
             if flux is None:
                 for field in [ 'flux', 'dflux', 'aperrad', 'mag', 'dmag', 'measra', 'measdec' ]:
                     retval['cutouts'][field].append( None )
@@ -462,10 +668,20 @@ def png_cutouts_for_sub_image( exporsubid, issubid, nomeas, limit=None, offset=0
                 retval['cutouts']['measdec'].append( row[cols['measdec']] )
                 retval['cutouts']['flux'].append( flux )
                 retval['cutouts']['dflux'].append( dflux )
-                retval['cutouts']['aperrad'].append( aperradses[subid][ row[cols['best_aperture']] ] )
+                retval['cutouts']['aperrad'].append( aperrad )
                 retval['cutouts']['mag'].append( mag )
                 retval['cutouts']['dmag'].append( dmag )
 
+        # First: put in all the measurements, in the order we got them
+
+        alredy_done = set()
+        for row in rows:
+            subid = row[cols['subid']]
+            index_in_sources = row[ cols['index_in_sources'] ]
+            append_to_retval( subid, index_in_sources, row )
+
+        # TODO : things that we don't have measurements of
+
         for f in hdf5files.values():
             f.close()
 
diff --git a/webap/static/seechange.js b/webap/static/seechange.js
index 57c2675f..62fa745a 100644
--- a/webap/static/seechange.js
+++ b/webap/static/seechange.js
@@ -27,24 +27,36 @@ seechange.Context = function()
 seechange.Context.prototype.render_page = function()
 {
     var self = this;
+    let p, button;
 
     if ( this.frontpagediv == null ) {
 
         // TODO : users, login
 
         this.frontpagediv = rkWebUtil.elemaker( "div", this.maindiv );
-        let p = rkWebUtil.elemaker( "p", this.frontpagediv );
-        let button = rkWebUtil.button( p, "Show Exposures", function() { self.show_exposures(); } );
+
+        p = rkWebUtil.elemaker( "p", this.frontpagediv, { "text": "Search provenance tag: " } );
+        this.provtag_wid = rkWebUtil.elemaker( "select", p, { "attributes": { "id": "provtag_wid" } } );
+        // rkWebUtil.elemaker( "option", this.provtag_wid,
+        //                     { "text": "<all>",
+        //                       "attributes": { "value": "<all>",
+        //                                       "selected": 1 } } );
+        this.connector.sendHttpRequest( "provtags", {}, (data) => { self.populate_provtag_wid(data) } );
+
+        p = rkWebUtil.elemaker( "p", this.frontpagediv );
+        button = rkWebUtil.button( p, "Show Exposures", function() { self.show_exposures(); } );
         p.appendChild( document.createTextNode( " from " ) );
         this.startdatewid = rkWebUtil.elemaker( "input", p,
-                                                { "attributes": { "type": "text",
+                                                { "attributes": { "id": "show_exposures_from_wid",
+                                                                  "type": "text",
                                                                   "size": 20 } } );
         this.startdatewid.addEventListener( "blur", function(e) {
             rkWebUtil.validateWidgetDate( self.startdatewid );
         } );
         p.appendChild( document.createTextNode( " to " ) );
         this.enddatewid = rkWebUtil.elemaker( "input", p,
-                                              { "attributes": { "type": "text",
+                                              { "attributes": { "id": "show_exposures_to_wid",
+                                                                "type": "text",
                                                                 "size": 20 } } );
         this.enddatewid.addEventListener( "blur", function(e) {
             rkWebUtil.validateWidgetDate( self.enddatewid );
@@ -54,6 +66,9 @@ seechange.Context.prototype.render_page = function()
         rkWebUtil.elemaker( "hr", this.frontpagediv );
         this.subdiv = rkWebUtil.elemaker( "div", this.frontpagediv );
 
+        rkWebUtil.elemaker( "input", this.frontpagediv,
+                            { "attributes": { "id": "seechange_context_render_page_complete",
+                                              "type": "hidden" } } );
     }
     else {
         rkWebUtil.wipeDiv( this.maindiv );
@@ -61,10 +76,17 @@ seechange.Context.prototype.render_page = function()
     }
 }
 
+seechange.Context.prototype.populate_provtag_wid = function( data )
+{
+    for ( let provtag of data.provenance_tags ) {
+        rkWebUtil.elemaker( "option", this.provtag_wid, { "text": provtag, "attributes": { "value": provtag } } );
+    }
+}
+
 seechange.Context.prototype.show_exposures = function()
 {
     var self = this;
-    var startdate, enddate;
+    var startdate, enddate, provtag;
     try {
         startdate = this.startdatewid.value.trim();
         if ( startdate.length > 0 )
@@ -81,12 +103,15 @@ seechange.Context.prototype.show_exposures = function()
         console.log( "Exception parsing dates: " + ex.toString() );
         return;
     }
+    provtag = this.provtag_wid.value;
+    if ( provtag == '<all>' ) provtag = null;
 
     rkWebUtil.wipeDiv( this.subdiv );
     rkWebUtil.elemaker( "p", this.subdiv, { "text": "Loading exposures...",
                                             "classes": [ "warning", "bold", "italic" ] } );
 
-    this.connector.sendHttpRequest( "exposures", { "startdate": startdate, "enddate": enddate },
+    this.connector.sendHttpRequest( "exposures",
+                                    { "startdate": startdate, "enddate": enddate, "provenancetag": provtag },
                                     function( data ) { self.actually_show_exposures( data ); } );
 }
 
@@ -97,7 +122,11 @@ seechange.Context.prototype.actually_show_exposures = function( data )
         window.alert( "Unexpected response from server when looking for exposures." );
         return
     }
-    let exps = new seechange.ExposureList( this, this.subdiv, data["exposures"], data["startdate"], data["enddate"] );
+    let exps = new seechange.ExposureList( this, this.subdiv,
+                                           data["exposures"],
+                                           data["startdate"],
+                                           data["enddate"],
+                                           data["provenance_tag"] );
     exps.render_page();
 }
 
@@ -105,13 +134,14 @@ seechange.Context.prototype.actually_show_exposures = function( data )
 // **********************************************************************
 // **********************************************************************
 
-seechange.ExposureList = function( context, parentdiv, exposures, fromtime, totime )
+seechange.ExposureList = function( context, parentdiv, exposures, fromtime, totime, provtag )
 {
     this.context = context;
     this.parentdiv = parentdiv;
     this.exposures = exposures;
     this.fromtime = fromtime;
     this.totime = totime;
+    this.provtag = provtag;
     this.masterdiv = null;
     this.listdiv = null;
     this.exposurediv = null;
@@ -162,16 +192,27 @@ seechange.ExposureList.prototype.render_page = function()
         h2.appendChild( document.createTextNode( " from " + this.fromtime + " to " + this.totime ) );
     }
 
-    table = rkWebUtil.elemaker( "table", this.listdiv, { "classes": [ "exposurelist" ] } );
+    if ( this.provtag == null ) {
+        h2.appendChild( document.createTextNode( " including all provenances" ) );
+    } else {
+        h2.appendChild( document.createTextNode( " with provenance tag " + this.provtag ) );
+    }
+
+    rkWebUtil.elemaker( "p", this.listdiv,
+                        { "text": '"Detections" are everything found on subtratcions; ' +
+                          '"Sources" are things that passed preliminary cuts.' } )
+
+    table = rkWebUtil.elemaker( "table", this.listdiv, { "classes": [ "exposurelist" ],
+                                                         "attributes": { "id": "exposure_list_table" } } );
     tr = rkWebUtil.elemaker( "tr", table );
     th = rkWebUtil.elemaker( "th", tr, { "text": "Exposure" } );
     th = rkWebUtil.elemaker( "th", tr, { "text": "MJD" } );
     th = rkWebUtil.elemaker( "th", tr, { "text": "target" } );
     th = rkWebUtil.elemaker( "th", tr, { "text": "filter" } );
     th = rkWebUtil.elemaker( "th", tr, { "text": "t_exp (s)" } );
-    th = rkWebUtil.elemaker( "th", tr, { "text": "n_images" } );
-    th = rkWebUtil.elemaker( "th", tr, { "text": "n_cutouts" } );
-    th = rkWebUtil.elemaker( "th", tr, { "text": "n_sources" } );
+    th = rkWebUtil.elemaker( "th", tr, { "text": "subs" } );
+    th = rkWebUtil.elemaker( "th", tr, { "text": "detections" } );
+    th = rkWebUtil.elemaker( "th", tr, { "text": "sources" } );
     th = rkWebUtil.elemaker( "th", tr, { "text": "n_successim" } );
     th = rkWebUtil.elemaker( "th", tr, { "text": "n_errors" } );
 
@@ -199,9 +240,9 @@ seechange.ExposureList.prototype.render_page = function()
         td = rkWebUtil.elemaker( "td", row, { "text": exps["target"][i] } );
         td = rkWebUtil.elemaker( "td", row, { "text": exps["filter"][i] } );
         td = rkWebUtil.elemaker( "td", row, { "text": exps["exp_time"][i] } );
-        td = rkWebUtil.elemaker( "td", row, { "text": exps["n_images"][i] } );
-        td = rkWebUtil.elemaker( "td", row, { "text": exps["n_cutouts"][i] } );
+        td = rkWebUtil.elemaker( "td", row, { "text": exps["n_subs"][i] } );
         td = rkWebUtil.elemaker( "td", row, { "text": exps["n_sources"][i] } );
+        td = rkWebUtil.elemaker( "td", row, { "text": exps["n_measurements"][i] } );
         td = rkWebUtil.elemaker( "td", row, { "text": exps["n_successim"][i] } );
         td = rkWebUtil.elemaker( "td", row, { "text": exps["n_errors"][i] } );
         countdown -= 1;
@@ -225,7 +266,8 @@ seechange.ExposureList.prototype.show_exposure = function( id, name, mjd, filter
         rkWebUtil.wipeDiv( this.exposurediv );
         rkWebUtil.elemaker( "p", this.exposurediv, { "text": "Loading...",
                                                      "classes": [ "warning", "bold", "italic" ] } );
-        this.context.connector.sendHttpRequest( "exposure_images/" + id, null,
+        this.context.connector.sendHttpRequest( "exposure_images/" + id + "/" + this.provtag,
+                                                null,
                                                 (data) => {
                                                     self.actually_show_exposure( id, name, mjd, filter,
                                                                                  target, exp_time, data );
@@ -316,6 +358,8 @@ seechange.Exposure.prototype.render_page = function()
     h2 = rkWebUtil.elemaker( "h2", this.div, { "text": "Exposure " + this.name } );
     ul = rkWebUtil.elemaker( "ul", this.div );
     li = rkWebUtil.elemaker( "li", ul );
+    li.innerHTML = "<b>provenance tag:</b> " + this.data.provenancetag;
+    li = rkWebUtil.elemaker( "li", ul );
     li.innerHTML = "<b>target:</b> " + this.target;
     li = rkWebUtil.elemaker( "li", ul );
     li.innerHTML = "<b>mjd:</b> " + this.mjd
@@ -332,15 +376,19 @@ seechange.Exposure.prototype.render_page = function()
     let totncutouts = 0;
     let totnsources = 0;
     for ( let i in this.data['id'] ) {
-        totncutouts += this.data['numcutouts'][i];
+        totncutouts += this.data['numsources'][i];
         totnsources += this.data['nummeasurements'][i];
     }
 
+    let numsubs = 0;
+    for ( let sid of this.data.subid ) if ( sid != null ) numsubs += 1;
     p = rkWebUtil.elemaker( "p", this.imagesdiv,
-                            { "text": "Exposure has " + this.data.id.length + " completed subtractions." } )
+                            { "text": ( "Exposure has " + this.data.id.length + " images and " + numsubs +
+                                        " completed subtractions" ) } )
     p = rkWebUtil.elemaker( "p", this.imagesdiv,
                             { "text": ( totnsources.toString() + " out of " +
-                                        totncutouts.toString() + " sources pass preliminary cuts." ) } );
+                                        totncutouts.toString() + " detections pass preliminary cuts " +
+                                        "(i.e. are \"sources\")." ) } );
 
     p = rkWebUtil.elemaker( "p", this.imagesdiv );
 
@@ -358,7 +406,7 @@ seechange.Exposure.prototype.render_page = function()
                                           { "type": "checkbox",
                                             "id": "cutouts_sans_measurements",
                                             "name": "cutouts_sans_measurements_checkbox" } } );
-    rkWebUtil.elemaker( "label", p, { "text": "Show cutouts that failed the preliminary cuts",
+    rkWebUtil.elemaker( "label", p, { "text": "Show detections that failed the preliminary cuts (i.e. aren't sources)",
                                       "attributes": { "for": "cutouts_sans_measurements_checkbox" } } );
 
 
@@ -373,8 +421,8 @@ seechange.Exposure.prototype.render_page = function()
     th = rkWebUtil.elemaker( "th", tr, { "text": "fwhm" } );
     th = rkWebUtil.elemaker( "th", tr, { "text": "zp" } );
     th = rkWebUtil.elemaker( "th", tr, { "text": "mag_lim" } );
-    th = rkWebUtil.elemaker( "th", tr, { "text": "n_cutouts" } );
-    th = rkWebUtil.elemaker( "th", tr, { "text": "n_sources" } );
+    th = rkWebUtil.elemaker( "th", tr, { "text": "detections" } );
+    th = rkWebUtil.elemaker( "th", tr, { "text": "sources" } );
     th = rkWebUtil.elemaker( "th", tr, { "text": "compl. step" } );
     th = rkWebUtil.elemaker( "th", tr, {} ); // products exist
     th = rkWebUtil.elemaker( "th", tr, {} ); // error
@@ -404,7 +452,7 @@ seechange.Exposure.prototype.render_page = function()
         td = rkWebUtil.elemaker( "td", tr,
                                  { "text": seechange.nullorfixed( this.data["zero_point_estimate"][i], 2 ) } );
         td = rkWebUtil.elemaker( "td", tr, { "text": seechange.nullorfixed( this.data["lim_mag_estimate"][i], 1 ) } );
-        td = rkWebUtil.elemaker( "td", tr, { "text": this.data["numcutouts"][i] } );
+        td = rkWebUtil.elemaker( "td", tr, { "text": this.data["numsources"][i] } );
         td = rkWebUtil.elemaker( "td", tr, { "text": this.data["nummeasurements"][i] } );
 
         td = rkWebUtil.elemaker( "td", tr );
@@ -476,7 +524,7 @@ seechange.Exposure.prototype.update_cutouts = function()
 
     if ( this.cutoutsallimages_checkbox.checked ) {
         rkWebUtil.elemaker( "p", this.cutoutsdiv,
-                            { "text": "Sources for all succesfully completed chips" } );
+                            { "text": "Sources for all successfully completed chips" } );
         let div = rkWebUtil.elemaker( "div", this.cutoutsdiv );
         rkWebUtil.elemaker( "p", div,
                             { "text": "...updating cutouts...",
@@ -490,7 +538,7 @@ seechange.Exposure.prototype.update_cutouts = function()
         }
         else {
             this.context.connector.sendHttpRequest(
-                "png_cutouts_for_sub_image/" + this.id + "/0/" + withnomeas,
+                "png_cutouts_for_sub_image/" + this.id + "/" + this.data.provenancetag + "/0/" + withnomeas,
                 {},
                 (data) => { self.show_cutouts_for_image( div, prop, data ); }
             );
@@ -517,7 +565,8 @@ seechange.Exposure.prototype.update_cutouts = function()
                 }
                 else {
                     this.context.connector.sendHttpRequest(
-                        "png_cutouts_for_sub_image/" + this.data['subid'][i] + "/1/" + withnomeas,
+                        "png_cutouts_for_sub_image/" + this.data['subid'][i] + "/" + this.data.provenancetag +
+                            "/1/" + withnomeas,
                         {},
                         (data) => { self.show_cutouts_for_image( div, prop, data ); }
                     );
@@ -598,19 +647,25 @@ seechange.Exposure.prototype.show_cutouts_for_image = function( div, dex, indata
         // TODO: use "warning" color for low r/b
         if ( data.cutouts['flux'][i] == null ) td.classList.add( 'bad' );
         else td.classList.add( 'good' );
-        subdiv.innerHTML = ( "<b>chip:</b> " + data.cutouts.section_id[i] + "<br>" +
-                             // "<b>cutout (α, δ):</b> (" + data.cutouts['ra'][i].toFixed(5) + " , "
-                             // + data.cutouts['dec'][i].toFixed(5) + ")<br>" +
-                             "<b>(α, δ):</b> (" + seechange.nullorfixed( data.cutouts['measra'][i], 5 ) + " , "
-                             + seechange.nullorfixed( data.cutouts['measdec'][i],5 ) + ")<br>" +
-                             "<b>(x, y):</b> (" + data.cutouts['x'][i].toFixed(2) + " , "
-                             + data.cutouts['y'][i].toFixed(2) + ")<br>" +
-                             "<b>Flux:</b> " + seechange.nullorfixed( data.cutouts['flux'][i], 0 )
-                             + " ± " + seechange.nullorfixed( data.cutouts['dflux'][i], 0 )
-                             + "  (aper r=" + seechange.nullorfixed( data.cutouts['aperrad'][i], 2) + " px)"
-                             + "<br>" + "<b>Mag:</b> " + seechange.nullorfixed( data.cutouts['mag'][i], 2 )
-                             + " ± " + seechange.nullorfixed( data.cutouts['dmag'][i], 2 )
-                           );
+        let textblob = ( "<b>chip:</b> " + data.cutouts.section_id[i] + "<br>" +
+                         // "<b>cutout (α, δ):</b> (" + data.cutouts['ra'][i].toFixed(5) + " , "
+                         // + data.cutouts['dec'][i].toFixed(5) + ")<br>" +
+                         "<b>(α, δ):</b> (" + seechange.nullorfixed( data.cutouts['measra'][i], 5 ) + " , "
+                         + seechange.nullorfixed( data.cutouts['measdec'][i],5 ) + ")<br>" +
+                         // TODO : put x, y back if the server ever starts returning it again! -- Issue #340
+                         // "<b>(x, y):</b> (" + data.cutouts['x'][i].toFixed(2) + " , "
+                         // + data.cutouts['y'][i].toFixed(2) + ")<br>" +
+                         "<b>Flux:</b> " + seechange.nullorfixed( data.cutouts['flux'][i], 0 )
+                         + " ± " + seechange.nullorfixed( data.cutouts['dflux'][i], 0 )
+                       );
+        if ( ( data.cutouts['aperrad'][i] == null ) || ( data.cutouts['aperrad'][i] <= 0 ) )
+            textblob += "  (psf)";
+        else
+            textblob +=  + "  (aper r=" + seechange.nullorfixed( data.cutouts['aperrad'][i], 2) + " px)";
+        textblob += ("<br>" + "<b>Mag:</b> " + seechange.nullorfixed( data.cutouts['mag'][i], 2 )
+                     + " ± " + seechange.nullorfixed( data.cutouts['dmag'][i], 2 )
+                    );
+        subdiv.innerHTML = textblob;
     }
 }