Start the process of adding caching to MDC store creation

Start adding infrastructure for caching things used in MDC store creation and for plumbing in stuff from layout creation.
sandialabs · Jun 1, 2024 · 91d5ebb · 91d5ebb
1 parent 1d4e5a0
commit 91d5ebb
Show file tree

Hide file tree

Showing 4 changed files with 122 additions and 51 deletions.
diff --git a/pygsti/algorithms/core.py b/pygsti/algorithms/core.py
@@ -897,12 +897,25 @@ def _max_array_types(artypes_list):  # get the maximum number of each array type
         precomp_layout_circuit_cache = _layouts.matrixlayout.create_matrix_copa_layout_circuit_cache(unique_circuits, mdl)
     else:
         precomp_layout_circuit_cache = None
-    #print(completed_circuit_cache)
+
     for i, circuit_list in enumerate(circuit_lists):
         printer.log(f'Layout for iteration {i}', 2)
         precomp_layouts.append(mdl.sim.create_layout(circuit_list, dataset, resource_alloc, array_types, verbosity= printer - 1,
                                                      layout_creation_circuit_cache = precomp_layout_circuit_cache))
 
+    #precompute a cache of possible outcome counts for each circuits to accelerate MDC store creation
+    if isinstance(mdl, _models.model.OpModel):
+        if precomp_layout_circuit_cache is not None: #then grab the split circuits from there.
+            expanded_circuit_outcome_list = mdl.bulk_expand_instruments_and_separate_povm(unique_circuits, 
+                                                                                        split_circuits = precomp_layout_circuit_cache['split_circuits'])
+            outcome_count_by_circuit_cache = {ckt: len(outcome_tup) for ckt,outcome_tup in zip(unique_circuits, expanded_circuit_outcome_list)}
+        else:
+            expanded_circuit_outcome_list = mdl.bulk_expand_instruments_and_separate_povm(unique_circuits)    
+            outcome_count_by_circuit_cache = {ckt: len(outcome_tup) for ckt,outcome_tup in zip(unique_circuits, expanded_circuit_outcome_list)}
+    else:
+        outcome_count_by_circuit_cache = {ckt: mdl.compute_num_outcomes(ckt) for ckt in unique_circuits}
+
+
     with printer.progress_logging(1):
         for i in range(starting_index, len(circuit_lists)):
             circuitsToEstimate = circuit_lists[i]
@@ -919,7 +932,8 @@ def _max_array_types(artypes_list):  # get the maximum number of each array type
             mdl.basis = start_model.basis  # set basis in case of CPTP constraints (needed?)
             initial_mdc_store = _objfns.ModelDatasetCircuitsStore(mdl, dataset, circuitsToEstimate, resource_alloc,
                                                                   array_types=array_types, verbosity=printer - 1, 
-                                                                  precomp_layout = precomp_layouts[i])
+                                                                  precomp_layout = precomp_layouts[i],
+                                                                  outcome_count_by_circuit=outcome_count_by_circuit_cache)
             mdc_store = initial_mdc_store
 
             for j, obj_fn_builder in enumerate(iteration_objfn_builders):

diff --git a/pygsti/layouts/matrixlayout.py b/pygsti/layouts/matrixlayout.py
@@ -61,14 +61,17 @@ class _MatrixCOPALayoutAtom(_DistributableAtom):
     model : Model
         The model being used to construct this layout.  Used for expanding instruments
         within the circuits.
+    
+    unique_circuits : list of Circuits
+        A list of the unique :class:`Circuit` objects representing the circuits this layout will include.
 
     dataset : DataSet
         The dataset, used to include only observed circuit outcomes in this atom
         and therefore the parent layout.
     """
 
     def __init__(self, unique_complete_circuits, unique_nospam_circuits, circuits_by_unique_nospam_circuits,
-                 ds_circuits, group, helpful_scratch, model, dataset=None, expanded_and_separated_circuit_cache=None,
+                 ds_circuits, group, helpful_scratch, model, unique_circuits, dataset=None, expanded_and_separated_circuit_cache=None,
                  double_expanded_nospam_circuits_cache = None):
 
         #Note: group gives unique_nospam_circuits indices, which circuits_by_unique_nospam_circuits
@@ -84,11 +87,13 @@ def add_expanded_circuits(indices, add_to_this_dict):
                         expc_outcomes = model.expand_instruments_and_separate_povm(unique_complete_circuits[unique_i], observed_outcomes)
                         #Note: unique_complete_circuits may have duplicates (they're only unique *pre*-completion)
                     else:
-                        expc_outcomes = expanded_and_separated_circuit_cache.get(unique_complete_circuits[unique_i], None)
+                        #the cache is indexed into using the (potentially) incomplete circuits
+                        expc_outcomes = expanded_and_separated_circuit_cache.get(unique_circuits[unique_i], None)
                         if expc_outcomes is None: #fall back on original non-cache behavior.
                             observed_outcomes = None if (dataset is None) else dataset[ds_circuits[unique_i]].unique_outcomes
                             expc_outcomes = model.expand_instruments_and_separate_povm(unique_complete_circuits[unique_i], observed_outcomes)
-
+                            #and add this new value to the cache.
+                            expanded_and_separated_circuit_cache[unique_circuits[unique_i]] = expc_outcomes 
                     for sep_povm_c, outcomes in expc_outcomes.items():  # for each expanded cir from unique_i-th circuit
                         prep_lbl = sep_povm_c.circuit_without_povm[0]
                         exp_nospam_c = sep_povm_c.circuit_without_povm[1:]  # sep_povm_c *always* has prep lbl
@@ -242,7 +247,7 @@ class MatrixCOPALayout(_DistributableCOPALayout):
     Parameters
     ----------
     circuits : list
-        A list of:class:`Circuit` objects representing the circuits this layout will include.
+        A list of :class:`Circuit` objects representing the circuits this layout will include.
 
     model : Model
         The model that will be used to compute circuit outcome probabilities using this layout.
@@ -309,23 +314,23 @@ def __init__(self, circuits, model, dataset=None, num_sub_trees=None, num_tree_p
 
         #extract subcaches from layout_creation_circuit_cache:
         if layout_creation_circuit_cache is not None:
-            completed_circuit_cache = layout_creation_circuit_cache.get('completed_circuits', None)
-            split_circuit_cache = layout_creation_circuit_cache.get('split_circuits', None)
-            expanded_and_separated_circuits_cache = layout_creation_circuit_cache.get('expanded_and_separated_circuits', None)
-            expanded_subcircuits_no_spam_cache = layout_creation_circuit_cache.get('expanded_subcircuits_no_spam', None)
+            self.completed_circuit_cache = layout_creation_circuit_cache.get('completed_circuits', None)
+            self.split_circuit_cache = layout_creation_circuit_cache.get('split_circuits', None)
+            self.expanded_and_separated_circuits_cache = layout_creation_circuit_cache.get('expanded_and_separated_circuits', None)
+            self.expanded_subcircuits_no_spam_cache = layout_creation_circuit_cache.get('expanded_subcircuits_no_spam', None)
         else:
-            completed_circuit_cache = None
-            split_circuit_cache = None
-            expanded_and_separated_circuits_cache = None
-            expanded_subcircuits_no_spam_cache = None
+            self.completed_circuit_cache = None
+            self.split_circuit_cache = None
+            self.expanded_and_separated_circuits_cache = None
+            self.expanded_subcircuits_no_spam_cache = None
 
-        if completed_circuit_cache is None:
+        if self.completed_circuit_cache is None:
             unique_complete_circuits, split_unique_circuits = model.complete_circuits(unique_circuits, return_split=True)
         else:
             unique_complete_circuits = []
             for c in unique_circuits:
-                comp_ckt = completed_circuit_cache.get(c, None)
-                if completed_circuit_cache is not None:
+                comp_ckt = self.completed_circuit_cache.get(c, None)
+                if comp_ckt is not None:
                     unique_complete_circuits.append(comp_ckt)
                 else:
                     unique_complete_circuits.append(model.complete_circuit(c))
@@ -334,17 +339,24 @@ def __init__(self, circuits, model, dataset=None, num_sub_trees=None, num_tree_p
         # "unique circuits" after completion, e.g. "rho0Gx" and "Gx" could both complete to "rho0GxMdefault_0".
 
         circuits_by_unique_nospam_circuits = _collections.OrderedDict()
-        if completed_circuit_cache is None:
+        if self.completed_circuit_cache is None:
             for i, (_, nospam_c, _) in enumerate(split_unique_circuits):
                 if nospam_c in circuits_by_unique_nospam_circuits:
                     circuits_by_unique_nospam_circuits[nospam_c].append(i)
                 else:
                     circuits_by_unique_nospam_circuits[nospam_c] = [i]
+            #also create the split circuit cache at this point for future use.
+            self.split_circuit_cache = {unique_ckt:split_ckt for unique_ckt, split_ckt in zip(unique_circuits, split_unique_circuits)}
+
         else:
-            for i, c in enumerate(unique_complete_circuits):
-                _, nospam_c, _ = split_circuit_cache.get(c, None)
+            for i, (c_unique_complete, c_unique) in enumerate(zip(unique_complete_circuits, unique_circuits)):
+                split_ckt_tup = self.split_circuit_cache.get(c_unique, None)
+                nospam_c= split_ckt_tup[1] 
                 if nospam_c is None:
-                    _, nospam_c, _ = model.split_circuit(c)
+                    split_ckt_tup = model.split_circuit(c_unique_complete)
+                    nospam_c= split_ckt_tup[1]
+                    #also add this missing circuit to the cache for future use.
+                    self.split_circuit_cache[c_unique] = split_ckt_tup
                 if nospam_c in circuits_by_unique_nospam_circuits:
                     circuits_by_unique_nospam_circuits[nospam_c].append(i)
                 else:
@@ -367,9 +379,10 @@ def _create_atom(args):
             group, helpful_scratch_group = args
             return _MatrixCOPALayoutAtom(unique_complete_circuits, unique_nospam_circuits,
                                          circuits_by_unique_nospam_circuits, ds_circuits,
-                                         group, helpful_scratch_group, model, dataset,
-                                         expanded_and_separated_circuits_cache,
-                                         expanded_subcircuits_no_spam_cache)
+                                         group, helpful_scratch_group, model, 
+                                         unique_circuits, dataset,
+                                         self.expanded_and_separated_circuits_cache,
+                                         self.expanded_subcircuits_no_spam_cache)
 
         super().__init__(circuits, unique_circuits, to_unique, unique_complete_circuits,
                          _create_atom, list(zip(groups, helpful_scratch)), num_tree_processors,
@@ -385,7 +398,7 @@ def create_matrix_copa_layout_circuit_cache(circuits, model, dataset=None):
     completed_circuits, split_circuits = model.complete_circuits(circuits, return_split=True)
 
     cache['completed_circuits'] = {ckt: comp_ckt for ckt, comp_ckt in zip(circuits, completed_circuits)}
-    cache['split_circuits'] = {ckt: split_ckt for ckt, split_ckt in zip(cache['completed_circuits'].values(), split_circuits)}
+    cache['split_circuits'] = {ckt: split_ckt for ckt, split_ckt in zip(circuits, split_circuits)}
 
     #There is some potential aliasing that happens in the init that I am not
     #doing here, but I think 90+% of the time this ought to be fine.
@@ -401,7 +414,7 @@ def create_matrix_copa_layout_circuit_cache(circuits, model, dataset=None):
                                                                                     observed_outcomes_list = unique_outcomes_list, 
                                                                                     split_circuits = split_circuits)
 
-    expanded_circuit_cache = {ckt: expanded_ckt for ckt,expanded_ckt in zip(cache['completed_circuits'].values(), expanded_circuit_outcome_list)}
+    expanded_circuit_cache = {ckt: expanded_ckt for ckt,expanded_ckt in zip(circuits, expanded_circuit_outcome_list)}
 
     cache['expanded_and_separated_circuits'] = expanded_circuit_cache
 

diff --git a/pygsti/models/model.py b/pygsti/models/model.py
@@ -1141,10 +1141,45 @@ def circuit_outcomes(self, circuit):
 
         Returns
         -------
-        tuple
+        tuple corresponding to the possible outcomes for circuit.
         """
-        outcomes = circuit.expand_instruments_and_separate_povm(self)  # dict w/keys=sep-povm-circuits, vals=outcomes
+        outcomes = self.expand_instruments_and_separate_povm(circuit)  # dict w/keys=sep-povm-circuits, vals=outcomes
         return tuple(_itertools.chain(*outcomes.values()))  # concatenate outputs from all sep-povm-circuits
+
+    def bulk_circuit_outcomes(self, circuits, split_circuits=None, completed_circuits=None):
+        """
+        Get all the possible outcome labels produced by simulating each of the circuits
+        in this list of circuits.
+
+        Parameters
+        ----------
+        circuits : list of Circuits
+            list of Circuits to get outcomes of.
+        
+        split_circuits : list of tuples, optional (default None)
+            If specified, this is a list of tuples for each circuit corresponding to the splitting of
+            the circuit into the prep label, spam-free circuit, and povm label. This is the same format
+            produced by the :meth:split_circuit(s) method, and so this option can allow for accelerating this
+            method when that has previously been run. When using this kwarg only one of this or 
+            the `complete_circuits` kwargs should be used.
+
+        completed_circuits : list of Circuits, optional (default None)
+            If specified, this is a list of compeleted circuits with prep and povm labels included.
+            This is the format produced by the :meth:complete_circuit(s) method, and this can
+            be used to accelerate this method call when that has been previously run. Should not
+            be used in conjunction with `split_circuits`.
+
+        Returns
+        -------
+        list of tuples corresponding to the possible outcomes for each circuit.
+        """
+
+        # list of dict w/keys=sep-povm-circuits, vals=outcomes
+        outcomes_list = self.bulk_expand_instruments_and_separate_povm(circuits, 
+                                                                       split_circuits=split_circuits,
+                                                                       completed_circuits=completed_circuits)  
+
+        return [tuple(_itertools.chain(*outcomes.values())) for outcomes in outcomes_list]  # concatenate outputs from all sep-povm-circuits
 
     def split_circuit(self, circuit, erroron=('prep', 'povm'), split_prep=True, split_povm=True):
         """
@@ -1516,17 +1551,17 @@ def bulk_expand_instruments_and_separate_povm(self, circuits, observed_outcomes_
             method when that has previously been run. When using this kwarg only one of this or 
             the `complete_circuits` kwargs should be used.
 
-        complete_circuits : list of Circuits, optional (default None)
+        completed_circuits : list of Circuits, optional (default None)
             If specified, this is a list of compeleted circuits with prep and povm labels included.
             This is the format produced by the :meth:complete_circuit(s) method, and this can
             be used to accelerate this method call when that has been previously run. Should not
             be used in conjunction with `split_circuits`.
 
         Returns
         -------
-        OrderedDict
-            A dict whose keys are :class:`SeparatePOVMCircuit` objects and whose
-            values are tuples of the outcome labels corresponding to this circuit,
+        list of OrderedDicts
+            A list of dictionaries whose keys are :class:`SeparatePOVMCircuit` objects and whose
+            values are tuples of the outcome labels corresponding to each circuit,
             one per POVM effect held in the key.
         """