From 91d5ebb4644a123fc7dcdbf117b49781e562a63f Mon Sep 17 00:00:00 2001 From: Corey Ostrove Date: Fri, 31 May 2024 18:32:55 -0600 Subject: [PATCH] Start the process of adding caching to MDC store creation Start adding infrastructure for caching things used in MDC store creation and for plumbing in stuff from layout creation. --- pygsti/algorithms/core.py | 18 ++++++++- pygsti/layouts/matrixlayout.py | 61 +++++++++++++++++------------ pygsti/models/model.py | 47 +++++++++++++++++++--- pygsti/objectivefns/objectivefns.py | 47 +++++++++++++--------- 4 files changed, 122 insertions(+), 51 deletions(-) diff --git a/pygsti/algorithms/core.py b/pygsti/algorithms/core.py index b4f67c286..59a696d85 100644 --- a/pygsti/algorithms/core.py +++ b/pygsti/algorithms/core.py @@ -897,12 +897,25 @@ def _max_array_types(artypes_list): # get the maximum number of each array type precomp_layout_circuit_cache = _layouts.matrixlayout.create_matrix_copa_layout_circuit_cache(unique_circuits, mdl) else: precomp_layout_circuit_cache = None - #print(completed_circuit_cache) + for i, circuit_list in enumerate(circuit_lists): printer.log(f'Layout for iteration {i}', 2) precomp_layouts.append(mdl.sim.create_layout(circuit_list, dataset, resource_alloc, array_types, verbosity= printer - 1, layout_creation_circuit_cache = precomp_layout_circuit_cache)) + #precompute a cache of possible outcome counts for each circuits to accelerate MDC store creation + if isinstance(mdl, _models.model.OpModel): + if precomp_layout_circuit_cache is not None: #then grab the split circuits from there. + expanded_circuit_outcome_list = mdl.bulk_expand_instruments_and_separate_povm(unique_circuits, + split_circuits = precomp_layout_circuit_cache['split_circuits']) + outcome_count_by_circuit_cache = {ckt: len(outcome_tup) for ckt,outcome_tup in zip(unique_circuits, expanded_circuit_outcome_list)} + else: + expanded_circuit_outcome_list = mdl.bulk_expand_instruments_and_separate_povm(unique_circuits) + outcome_count_by_circuit_cache = {ckt: len(outcome_tup) for ckt,outcome_tup in zip(unique_circuits, expanded_circuit_outcome_list)} + else: + outcome_count_by_circuit_cache = {ckt: mdl.compute_num_outcomes(ckt) for ckt in unique_circuits} + + with printer.progress_logging(1): for i in range(starting_index, len(circuit_lists)): circuitsToEstimate = circuit_lists[i] @@ -919,7 +932,8 @@ def _max_array_types(artypes_list): # get the maximum number of each array type mdl.basis = start_model.basis # set basis in case of CPTP constraints (needed?) initial_mdc_store = _objfns.ModelDatasetCircuitsStore(mdl, dataset, circuitsToEstimate, resource_alloc, array_types=array_types, verbosity=printer - 1, - precomp_layout = precomp_layouts[i]) + precomp_layout = precomp_layouts[i], + outcome_count_by_circuit=outcome_count_by_circuit_cache) mdc_store = initial_mdc_store for j, obj_fn_builder in enumerate(iteration_objfn_builders): diff --git a/pygsti/layouts/matrixlayout.py b/pygsti/layouts/matrixlayout.py index 2825eaa51..654f32c86 100644 --- a/pygsti/layouts/matrixlayout.py +++ b/pygsti/layouts/matrixlayout.py @@ -61,6 +61,9 @@ class _MatrixCOPALayoutAtom(_DistributableAtom): model : Model The model being used to construct this layout. Used for expanding instruments within the circuits. + + unique_circuits : list of Circuits + A list of the unique :class:`Circuit` objects representing the circuits this layout will include. dataset : DataSet The dataset, used to include only observed circuit outcomes in this atom @@ -68,7 +71,7 @@ class _MatrixCOPALayoutAtom(_DistributableAtom): """ def __init__(self, unique_complete_circuits, unique_nospam_circuits, circuits_by_unique_nospam_circuits, - ds_circuits, group, helpful_scratch, model, dataset=None, expanded_and_separated_circuit_cache=None, + ds_circuits, group, helpful_scratch, model, unique_circuits, dataset=None, expanded_and_separated_circuit_cache=None, double_expanded_nospam_circuits_cache = None): #Note: group gives unique_nospam_circuits indices, which circuits_by_unique_nospam_circuits @@ -84,11 +87,13 @@ def add_expanded_circuits(indices, add_to_this_dict): expc_outcomes = model.expand_instruments_and_separate_povm(unique_complete_circuits[unique_i], observed_outcomes) #Note: unique_complete_circuits may have duplicates (they're only unique *pre*-completion) else: - expc_outcomes = expanded_and_separated_circuit_cache.get(unique_complete_circuits[unique_i], None) + #the cache is indexed into using the (potentially) incomplete circuits + expc_outcomes = expanded_and_separated_circuit_cache.get(unique_circuits[unique_i], None) if expc_outcomes is None: #fall back on original non-cache behavior. observed_outcomes = None if (dataset is None) else dataset[ds_circuits[unique_i]].unique_outcomes expc_outcomes = model.expand_instruments_and_separate_povm(unique_complete_circuits[unique_i], observed_outcomes) - + #and add this new value to the cache. + expanded_and_separated_circuit_cache[unique_circuits[unique_i]] = expc_outcomes for sep_povm_c, outcomes in expc_outcomes.items(): # for each expanded cir from unique_i-th circuit prep_lbl = sep_povm_c.circuit_without_povm[0] exp_nospam_c = sep_povm_c.circuit_without_povm[1:] # sep_povm_c *always* has prep lbl @@ -242,7 +247,7 @@ class MatrixCOPALayout(_DistributableCOPALayout): Parameters ---------- circuits : list - A list of:class:`Circuit` objects representing the circuits this layout will include. + A list of :class:`Circuit` objects representing the circuits this layout will include. model : Model The model that will be used to compute circuit outcome probabilities using this layout. @@ -309,23 +314,23 @@ def __init__(self, circuits, model, dataset=None, num_sub_trees=None, num_tree_p #extract subcaches from layout_creation_circuit_cache: if layout_creation_circuit_cache is not None: - completed_circuit_cache = layout_creation_circuit_cache.get('completed_circuits', None) - split_circuit_cache = layout_creation_circuit_cache.get('split_circuits', None) - expanded_and_separated_circuits_cache = layout_creation_circuit_cache.get('expanded_and_separated_circuits', None) - expanded_subcircuits_no_spam_cache = layout_creation_circuit_cache.get('expanded_subcircuits_no_spam', None) + self.completed_circuit_cache = layout_creation_circuit_cache.get('completed_circuits', None) + self.split_circuit_cache = layout_creation_circuit_cache.get('split_circuits', None) + self.expanded_and_separated_circuits_cache = layout_creation_circuit_cache.get('expanded_and_separated_circuits', None) + self.expanded_subcircuits_no_spam_cache = layout_creation_circuit_cache.get('expanded_subcircuits_no_spam', None) else: - completed_circuit_cache = None - split_circuit_cache = None - expanded_and_separated_circuits_cache = None - expanded_subcircuits_no_spam_cache = None + self.completed_circuit_cache = None + self.split_circuit_cache = None + self.expanded_and_separated_circuits_cache = None + self.expanded_subcircuits_no_spam_cache = None - if completed_circuit_cache is None: + if self.completed_circuit_cache is None: unique_complete_circuits, split_unique_circuits = model.complete_circuits(unique_circuits, return_split=True) else: unique_complete_circuits = [] for c in unique_circuits: - comp_ckt = completed_circuit_cache.get(c, None) - if completed_circuit_cache is not None: + comp_ckt = self.completed_circuit_cache.get(c, None) + if comp_ckt is not None: unique_complete_circuits.append(comp_ckt) else: unique_complete_circuits.append(model.complete_circuit(c)) @@ -334,17 +339,24 @@ def __init__(self, circuits, model, dataset=None, num_sub_trees=None, num_tree_p # "unique circuits" after completion, e.g. "rho0Gx" and "Gx" could both complete to "rho0GxMdefault_0". circuits_by_unique_nospam_circuits = _collections.OrderedDict() - if completed_circuit_cache is None: + if self.completed_circuit_cache is None: for i, (_, nospam_c, _) in enumerate(split_unique_circuits): if nospam_c in circuits_by_unique_nospam_circuits: circuits_by_unique_nospam_circuits[nospam_c].append(i) else: circuits_by_unique_nospam_circuits[nospam_c] = [i] + #also create the split circuit cache at this point for future use. + self.split_circuit_cache = {unique_ckt:split_ckt for unique_ckt, split_ckt in zip(unique_circuits, split_unique_circuits)} + else: - for i, c in enumerate(unique_complete_circuits): - _, nospam_c, _ = split_circuit_cache.get(c, None) + for i, (c_unique_complete, c_unique) in enumerate(zip(unique_complete_circuits, unique_circuits)): + split_ckt_tup = self.split_circuit_cache.get(c_unique, None) + nospam_c= split_ckt_tup[1] if nospam_c is None: - _, nospam_c, _ = model.split_circuit(c) + split_ckt_tup = model.split_circuit(c_unique_complete) + nospam_c= split_ckt_tup[1] + #also add this missing circuit to the cache for future use. + self.split_circuit_cache[c_unique] = split_ckt_tup if nospam_c in circuits_by_unique_nospam_circuits: circuits_by_unique_nospam_circuits[nospam_c].append(i) else: @@ -367,9 +379,10 @@ def _create_atom(args): group, helpful_scratch_group = args return _MatrixCOPALayoutAtom(unique_complete_circuits, unique_nospam_circuits, circuits_by_unique_nospam_circuits, ds_circuits, - group, helpful_scratch_group, model, dataset, - expanded_and_separated_circuits_cache, - expanded_subcircuits_no_spam_cache) + group, helpful_scratch_group, model, + unique_circuits, dataset, + self.expanded_and_separated_circuits_cache, + self.expanded_subcircuits_no_spam_cache) super().__init__(circuits, unique_circuits, to_unique, unique_complete_circuits, _create_atom, list(zip(groups, helpful_scratch)), num_tree_processors, @@ -385,7 +398,7 @@ def create_matrix_copa_layout_circuit_cache(circuits, model, dataset=None): completed_circuits, split_circuits = model.complete_circuits(circuits, return_split=True) cache['completed_circuits'] = {ckt: comp_ckt for ckt, comp_ckt in zip(circuits, completed_circuits)} - cache['split_circuits'] = {ckt: split_ckt for ckt, split_ckt in zip(cache['completed_circuits'].values(), split_circuits)} + cache['split_circuits'] = {ckt: split_ckt for ckt, split_ckt in zip(circuits, split_circuits)} #There is some potential aliasing that happens in the init that I am not #doing here, but I think 90+% of the time this ought to be fine. @@ -401,7 +414,7 @@ def create_matrix_copa_layout_circuit_cache(circuits, model, dataset=None): observed_outcomes_list = unique_outcomes_list, split_circuits = split_circuits) - expanded_circuit_cache = {ckt: expanded_ckt for ckt,expanded_ckt in zip(cache['completed_circuits'].values(), expanded_circuit_outcome_list)} + expanded_circuit_cache = {ckt: expanded_ckt for ckt,expanded_ckt in zip(circuits, expanded_circuit_outcome_list)} cache['expanded_and_separated_circuits'] = expanded_circuit_cache diff --git a/pygsti/models/model.py b/pygsti/models/model.py index 641bb8d05..940e31303 100644 --- a/pygsti/models/model.py +++ b/pygsti/models/model.py @@ -1141,10 +1141,45 @@ def circuit_outcomes(self, circuit): Returns ------- - tuple + tuple corresponding to the possible outcomes for circuit. """ - outcomes = circuit.expand_instruments_and_separate_povm(self) # dict w/keys=sep-povm-circuits, vals=outcomes + outcomes = self.expand_instruments_and_separate_povm(circuit) # dict w/keys=sep-povm-circuits, vals=outcomes return tuple(_itertools.chain(*outcomes.values())) # concatenate outputs from all sep-povm-circuits + + def bulk_circuit_outcomes(self, circuits, split_circuits=None, completed_circuits=None): + """ + Get all the possible outcome labels produced by simulating each of the circuits + in this list of circuits. + + Parameters + ---------- + circuits : list of Circuits + list of Circuits to get outcomes of. + + split_circuits : list of tuples, optional (default None) + If specified, this is a list of tuples for each circuit corresponding to the splitting of + the circuit into the prep label, spam-free circuit, and povm label. This is the same format + produced by the :meth:split_circuit(s) method, and so this option can allow for accelerating this + method when that has previously been run. When using this kwarg only one of this or + the `complete_circuits` kwargs should be used. + + completed_circuits : list of Circuits, optional (default None) + If specified, this is a list of compeleted circuits with prep and povm labels included. + This is the format produced by the :meth:complete_circuit(s) method, and this can + be used to accelerate this method call when that has been previously run. Should not + be used in conjunction with `split_circuits`. + + Returns + ------- + list of tuples corresponding to the possible outcomes for each circuit. + """ + + # list of dict w/keys=sep-povm-circuits, vals=outcomes + outcomes_list = self.bulk_expand_instruments_and_separate_povm(circuits, + split_circuits=split_circuits, + completed_circuits=completed_circuits) + + return [tuple(_itertools.chain(*outcomes.values())) for outcomes in outcomes_list] # concatenate outputs from all sep-povm-circuits def split_circuit(self, circuit, erroron=('prep', 'povm'), split_prep=True, split_povm=True): """ @@ -1516,7 +1551,7 @@ def bulk_expand_instruments_and_separate_povm(self, circuits, observed_outcomes_ method when that has previously been run. When using this kwarg only one of this or the `complete_circuits` kwargs should be used. - complete_circuits : list of Circuits, optional (default None) + completed_circuits : list of Circuits, optional (default None) If specified, this is a list of compeleted circuits with prep and povm labels included. This is the format produced by the :meth:complete_circuit(s) method, and this can be used to accelerate this method call when that has been previously run. Should not @@ -1524,9 +1559,9 @@ def bulk_expand_instruments_and_separate_povm(self, circuits, observed_outcomes_ Returns ------- - OrderedDict - A dict whose keys are :class:`SeparatePOVMCircuit` objects and whose - values are tuples of the outcome labels corresponding to this circuit, + list of OrderedDicts + A list of dictionaries whose keys are :class:`SeparatePOVMCircuit` objects and whose + values are tuples of the outcome labels corresponding to each circuit, one per POVM effect held in the key. """ diff --git a/pygsti/objectivefns/objectivefns.py b/pygsti/objectivefns/objectivefns.py index 191fd736b..9476f1c1c 100644 --- a/pygsti/objectivefns/objectivefns.py +++ b/pygsti/objectivefns/objectivefns.py @@ -19,11 +19,13 @@ from pygsti import tools as _tools from pygsti.layouts.distlayout import DistributableCOPALayout as _DistributableCOPALayout +from pygsti.layouts.matrixlayout import MatrixCOPALayout as _MatrixCOPALayout from pygsti.tools import slicetools as _slct, mpitools as _mpit, sharedmemtools as _smt from pygsti.circuits.circuitlist import CircuitList as _CircuitList from pygsti.baseobjs.resourceallocation import ResourceAllocation as _ResourceAllocation from pygsti.baseobjs.nicelyserializable import NicelySerializable as _NicelySerializable from pygsti.baseobjs.verbosityprinter import VerbosityPrinter as _VerbosityPrinter +from pygsti.models.model import OpModel as _OpModel def _objfn(objfn_cls, model, dataset, circuits=None, @@ -843,12 +845,10 @@ class ModelDatasetCircuitsStore(object): point. """ def __init__(self, model, dataset, circuits=None, resource_alloc=None, array_types=(), - precomp_layout=None, verbosity=0): + precomp_layout=None, outcome_count_by_circuit=None, verbosity=0): self.dataset = dataset self.model = model - #self.opBasis = mdl.basis self.resource_alloc = _ResourceAllocation.cast(resource_alloc) - # expand = ??? get from model based on fwdsim type? circuit_list = circuits if (circuits is not None) else list(dataset.keys()) bulk_circuit_list = circuit_list if isinstance( @@ -872,8 +872,21 @@ def __init__(self, model, dataset, circuits=None, resource_alloc=None, array_typ else: self.global_circuits = self.circuits - #self.circuits = bulk_circuit_list[:] - #self.circuit_weights = bulk_circuit_list.circuit_weights + #If a matrix layout then we have some precached circuit structures we can + #grab to speed up store generation. + if isinstance(self.layout, _MatrixCOPALayout): + #Grab the split_circuit_cache and down select to those in + #self.circuits + self.split_circuit_cache = self.layout.split_circuit_cache + self.split_circuits = [self.split_circuit_cache[ckt] for ckt in self.circuits] + + #currently only implemented for matrix, will eventually add map support. + else: + self.split_circuit_cache = None + + #set the value of the circuit outcome count cache (can be None) + self.outcome_count_by_circuit_cache = outcome_count_by_circuit + self.ds_circuits = self.circuits.apply_aliases() # computed by add_count_vectors @@ -888,18 +901,6 @@ def __init__(self, model, dataset, circuits=None, resource_alloc=None, array_typ self.time_dependent = False # indicates whether the data should be treated as time-resolved - #if not self.cache.has_evaltree(): - # subcalls = self.get_evaltree_subcalls() - # evt_resource_alloc = _ResourceAllocation(self.raw_objfn.comm, evt_mlim, - # self.raw_objfn.profiler, self.raw_objfn.distribute_method) - # self.cache.add_evaltree(self.mdl, self.dataset, bulk_circuit_list, evt_resource_alloc, - # subcalls, self.raw_objfn.printer - 1) - #self.eval_tree = self.cache.eval_tree - #self.lookup = self.cache.lookup - #self.outcomes_lookup = self.cache.outcomes_lookup - #self.wrt_block_size = self.cache.wrt_block_size - #self.wrt_block_size2 = self.cache.wrt_block_size2 - #convenience attributes (could make properties?) if isinstance(self.layout, _DistributableCOPALayout): self.global_nelements = self.layout.global_num_elements @@ -941,10 +942,18 @@ def add_omitted_freqs(self, printer=None, force=False): if self.firsts is None or force: # FUTURE: add any tracked memory? self.resource_alloc.add_tracked_memory(...) self.firsts = []; self.indicesOfCircuitsWithOmittedData = [] - for i, c in enumerate(self.circuits): + + #bulk compute the number of outcomes. + if isinstance(self.model, _OpModel): + bulk_outcomes_list = self.model.bulk_circuit_outcomes(self.circuits, split_circuits=self.split_circuits) + num_outcomes_list = [len(outcome_tup) for outcome_tup in bulk_outcomes_list] + else: + num_outcomes_list = [self.model.compute_num_outcomes(c) for c in self.circuits] + + for i in range(len(self.circuits)): indices = _slct.to_array(self.layout.indices_for_index(i)) lklen = _slct.length(self.layout.indices_for_index(i)) - if 0 < lklen < self.model.compute_num_outcomes(c): + if 0 < lklen < num_outcomes_list[i]: self.firsts.append(indices[0]) self.indicesOfCircuitsWithOmittedData.append(i) if len(self.firsts) > 0: