Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve handling of XGM data recording the wrong number of pulses #161

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,12 @@ Fixed:

- Sometimes an XGM will record the wrong number of pulses in its slow data
property, which would cause the pulse energy to not be retrieved properly. Now
the XGM will rely only on the fast data to find the number of pulses in
`.pulse_energy()` (!153).
the [XGM][extra.components.XGM] component will rely only on the fast data to
find the number of pulses in `.pulse_energy()` and will emit warnings when it
detects the wrong number of pulses have been saved in the slow data
property. There is also a new `force_slow_data` argument to
[XGM.pulse_counts()][extra.XGM.pulse_counts] to always return whatever was
saved in the slow data (!153, !161).

## [2024.1.1]

Expand Down
69 changes: 42 additions & 27 deletions src/extra/components/xgm.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from enum import Enum
from warnings import warn
from textwrap import dedent

import numpy as np
Expand Down Expand Up @@ -192,7 +193,8 @@ def __init__(self, run, device=None, default_sase=None):
self._pulse_energy = { }
self._slow_train_energy = { }
self._npulses = { }
self._npulses_by_train = { }
self._pulse_counts = { }
self._slow_pulse_counts = { }
self._max_pulses = { }

self._proposal = None
Expand Down Expand Up @@ -410,15 +412,7 @@ def _get_main_nbunches_key(self):
def npulses(self, sase=None) -> int:
"""The nominal number of pulses.

This calls
[KeyData.as_single_value()][extra_data.KeyData.as_single_value]
internally, which means it will throw an exception if the number of
pulses is not constant.

Note that this returns the number of pulses recorded by the XGM, which
can be unreliable. Use something like the
[XrayPulses][extra.components.XrayPulses] component to find the real
number of pulses from the bunch pattern table.
This will throw an exception if the number of pulses is not constant.

Args:
sase (int): Same meaning as in
Expand All @@ -429,40 +423,64 @@ def npulses(self, sase=None) -> int:
"""
pg = self._check_sase_arg(sase)
if pg not in self._npulses:
if pg == PropertyGroup.MAIN:
key = self._get_main_nbunches_key()
else:
key = f"pulseEnergy.numberOfSa{pg.value}BunchesActual"
self._npulses[pg] = int(self.control_source[key].as_single_value())
pulse_counts = self.pulse_counts(sase=sase)
if not np.allclose(pulse_counts[0], pulse_counts):
raise ValueError("Number of pulses is changing, there is no nominal number.")

self._npulses[pg] = int(pulse_counts[0])

return self._npulses[pg]

def pulse_counts(self, sase=None):
def pulse_counts(self, sase=None, force_slow_data=False):
"""Return a 1D [DataArray][xarray.DataArray] of the number of pulses in each train.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm sorry I didn't notice that earlier, but it is unfortunate we're using xarray.DataArray here and pd.Series for PulsePattern-derived components. We should avoid mixing these types arbitrarily between components in the future.

Out of curiosity, any particular reason or just preference?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No reason, it's just a preference since I tend to use DataArrays/Datasets a lot.


See the docs for [XGM.npulses()][extra.components.XGM.npulses]
for information on retrieving the true number of pulses.
Because the slow data `pulseEnergy.numberOf[SAx]BunchesActual` property
can be unreliable this will always check the slow data counts against
the counts in the fast data from
[XGM.pulse_energy()][extra.components.XGM.pulse_energy] and return the
fast data counts if there is a difference. This can be overridden by
passing `force_slow_data=True`.

Warning:
Using `force_slow_data=True` can give unreliable results, only use
it if you specifically want to find what numbers the XGM
recorded. In general, prefer using something like the
[XrayPulses][extra.components.XrayPulses] component to find the real
number of pulses from the bunch pattern table.

Args:
sase (int): Same meaning as in
[XGM.pulse_energy()][extra.components.XGM.pulse_energy].
"""
import xarray as xr

pg = self._check_sase_arg(sase)
if pg not in self._npulses_by_train:
if pg not in self._pulse_counts:
if pg == PropertyGroup.MAIN:
key = self._get_main_nbunches_key()
else:
key = f"pulseEnergy.numberOfSa{pg.value}BunchesActual"
self._npulses_by_train[pg] = self._control_source[key].xarray()
slow_counts = self._control_source[key].xarray()
self._slow_pulse_counts[pg] = slow_counts

pulse_energy = self.pulse_energy(sase=sase)
common_tids = np.intersect1d(pulse_energy.trainId, slow_counts.trainId)
fast_counts = np.count_nonzero(pulse_energy.sel(trainId=common_tids), axis=1)
fast_counts = xr.DataArray(fast_counts, dims=("trainId",),
coords={"trainId": pulse_energy.trainId})

counts_match = np.allclose(fast_counts, slow_counts.sel(trainId=common_tids))
if not counts_match:
warn(f"Slow data pulse counts ({key}) don't match the counts from fast data (data.intensityTD), data may be invalid!")
Comment on lines +473 to +474
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we know the slow data counts aren't always reliable and the difference from the fast data is a useful signal of that, does that imply the fast data are more reliable? Should we be using that instead?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, in my experience the fast data is more reliable and in some ways more useful than what we get from the pulses components because it will show the real number of pulses that are getting delivered. I don't know why but sometimes the number of pulses will change for... reasons... and for whatever reason that doesn't seem to be reflected in the bunch pattern table.

As for whether we should be using that instead of the slow data, I'm not sure. I am a little hesitant to make it do something other than the obvious 'just return whatever was saved', but as you say that's probably not very useful.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it will show the real number of pulses that are getting delivered. I don't know why but sometimes the number of pulses will change for... reasons... and for whatever reason that doesn't seem to be reflected in the bunch pattern table.

Hu, do you have a concrete example? That should not be possible, as they both are looking at exactly the same data - the XGM DOOCS server takes the bunch pattern to slice out the correct values from the train signal for data.intensityTD. That's the entire reason I don't trust it, it's an interpretation of the same signal through a 3rd party I don't control.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, so weirdly enough the XrayPulses component does report the right number but not PumpProbePulses 🐙 Example from p6156, r185 with the XGM:
image

And the pulses components:
image

My code was using PumpProbePulses so it was giving the wrong number of pulses. Is that a bug in PumpProbePulses?

Copy link
Collaborator

@philsmt philsmt Apr 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it's doing exactly what it is supposed to do:

image

PumpProbePulses creates a unified pattern from both, i.e. it looks at every possible pulse the machine could have, and counts it as a filled pulse if either FEL, PPL or both are present.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah ok, makes sense.

self._pulse_counts[pg] = fast_counts
else:
self._pulse_counts[pg] = slow_counts

return self._npulses_by_train[pg]
return self._pulse_counts[pg] if not force_slow_data else self._slow_pulse_counts[pg]

def max_npulses(self, sase=None) -> int:
"""The maximum number of pulses.

See the docs for [XGM.npulses()][extra.components.XGM.npulses]
for information on retrieving the true number of pulses.

Args:
sase (int): Same meaning as in
[XGM.pulse_energy()][extra.components.XGM.pulse_energy].
Expand All @@ -476,9 +494,6 @@ def max_npulses(self, sase=None) -> int:
def is_constant_pulse_count(self, sase=None) -> bool:
"""Return whether or not the number of pulses is constant.

See the docs for [XGM.npulses()][extra.components.XGM.npulses]
for information on retrieving the true number of pulses.

Args:
sase (int): Same meaning as in
[XGM.pulse_energy()][extra.components.XGM.pulse_energy].
Expand Down
31 changes: 31 additions & 0 deletions tests/test_components_xgm.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,34 @@ def test_xgm_pulse_energy_series(mock_spb_aux_run):
run.train_ids[:10])
assert np.array_equal(energy_series.index.get_level_values(1).unique(),
np.arange(10))

def test_wrong_pulse_counts(mock_spb_aux_run):
run = mock_spb_aux_run
xgm = XGM(mock_spb_aux_run)

# Create a mock pulse_counts array
n_trains = len(run.train_ids)
mock_pulse_counts = xr.DataArray(np.ones(n_trains),
dims=("trainId",),
coords={"trainId": run.train_ids},
name="slow_counts")

# And a mock pulse_energy array with a different number of pulses
mock_pulse_energy = xr.DataArray(np.ones((n_trains, 100)),
dims=("trainId", "pulseIndex"),
coords={"trainId": run.train_ids})

with patch("extra.components.xgm.KeyData.xarray", return_value=mock_pulse_counts), \
patch.object(xgm, "pulse_energy", return_value=mock_pulse_energy):
with pytest.warns():
# npulses() will call pulse_counts() internally which should emit a
# warning when the number of pulses differ.
assert xgm.npulses() == 100

# pulse_counts() should use the fast data array by default, which
# will not have a name.
assert xgm.pulse_counts().name == None

# Otherwise it should return the slow data counts which should have
# a name.
assert xgm.pulse_counts(force_slow_data=True).name == mock_pulse_counts.name