diff --git a/vegafusion-core/src/planning/stitch.rs b/vegafusion-core/src/planning/stitch.rs index ded556c6..6219eb21 100644 --- a/vegafusion-core/src/planning/stitch.rs +++ b/vegafusion-core/src/planning/stitch.rs @@ -6,6 +6,7 @@ use crate::spec::signal::SignalSpec; use crate::spec::values::MissingNullOrValue; use crate::task_graph::graph::ScopedVariable; use crate::task_graph::scope::TaskScope; +use itertools::Itertools; use serde_json::Value; use std::collections::HashSet; @@ -76,8 +77,8 @@ pub fn stitch_specs( // Return plan which specifies which signals/data need to be communicated between client and server Ok(CommPlan { - server_to_client: server_to_client.into_iter().collect(), - client_to_server: client_to_server.into_iter().collect(), + server_to_client: server_to_client.into_iter().sorted().collect(), + client_to_server: client_to_server.into_iter().sorted().collect(), }) } diff --git a/vegafusion-python/tests/test_pretransform.py b/vegafusion-python/tests/test_pretransform.py index da8072fd..df326685 100644 --- a/vegafusion-python/tests/test_pretransform.py +++ b/vegafusion-python/tests/test_pretransform.py @@ -1761,9 +1761,9 @@ def test_pre_transform_spec_encoded_datasets(): # Pre-transform with supported aggregate function should result in no warnings vega_spec = movies_histogram_spec() - # default list of dict format - tx_spec, _warnings = vf.runtime.pre_transform_spec( - vega_spec, data_encoding_threshold=10, data_encoding_format="pyarrow" + # Inline when threshold is larger than transformed data + tx_spec, datasets, _warnings = vf.runtime.pre_transform_extract( + vega_spec, extract_threshold=10, extracted_format="pyarrow" ) values = tx_spec["data"][0]["values"] @@ -1771,33 +1771,33 @@ def test_pre_transform_spec_encoded_datasets(): assert len(values) == 9 # pyarrow format - tx_spec, _warnings = vf.runtime.pre_transform_spec( - vega_spec, data_encoding_threshold=0, data_encoding_format="pyarrow" + tx_spec, datasets, _warnings = vf.runtime.pre_transform_extract( + vega_spec, extract_threshold=0, extracted_format="pyarrow" ) - values = tx_spec["data"][0]["values"] + name, scope, values = datasets[0] + assert name == "source_0" assert isinstance(values, pa.Table) values_df = values.to_pandas() assert len(values_df) == 9 assert values_df.columns[0] == "bin_maxbins_10_IMDB Rating" # arrow-ipc format - tx_spec, _warnings = vf.runtime.pre_transform_spec( - vega_spec, data_encoding_threshold=0, data_encoding_format="arrow-ipc" + tx_spec, datasets, _warnings = vf.runtime.pre_transform_extract( + vega_spec, extract_threshold=0, extracted_format="arrow-ipc" ) - - values = tx_spec["data"][0]["values"] + name, scope, values = datasets[0] assert isinstance(values, bytes) values_df = pa.ipc.deserialize_pandas(values) assert len(values_df) == 9 assert values_df.columns[0] == "bin_maxbins_10_IMDB Rating" # arrow-ipc-base64 format - tx_spec, _warnings = vf.runtime.pre_transform_spec( - vega_spec, data_encoding_threshold=0, data_encoding_format="arrow-ipc-base64" + tx_spec, datasets, _warnings = vf.runtime.pre_transform_extract( + vega_spec, extract_threshold=0, extracted_format="arrow-ipc-base64" ) - values = tx_spec["data"][0]["values"] + name, scope, values = datasets[0] assert isinstance(values, str) values_df = pa.ipc.deserialize_pandas(base64.standard_b64decode(values)) assert len(values_df) == 9