From 156c27d55321f869fa9827c5e512f08477728c1d Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Fri, 8 Nov 2024 08:30:18 -0500 Subject: [PATCH] v2-docs: Add pre transform and chart state docs (#534) * Document pre_transform_spec * Document pre_transform_extract * Support maxbins property as expression * Add chart state documentation * fmt * fix example * fix test --- docs/source/chart_state.md | 15 + docs/source/index.md | 5 +- ...{transformed_data.md => transform_data.md} | 0 docs/source/transform_extract.md | 15 + docs/source/transform_spec.md | 21 + examples/python-examples/chart_state.py | 441 ++++++++++++++++++ ...nsformed_data.py => pre_transform_data.py} | 0 .../python-examples/pre_transform_extract.py | 384 +++++++++++++++ .../python-examples/pre_transform_spec.py | 420 +++++++++++++++++ .../rust-examples/examples/chart_state.rs | 196 ++++++++ .../examples/pre_transform_extract.rs | 180 +++++++ .../examples/pre_transform_spec.rs | 172 +++++++ vegafusion-core/src/chart_state.rs | 25 +- vegafusion-core/src/planning/stitch.rs | 5 +- .../src/proto/prost_gen/transforms.rs | 4 +- .../src/proto/tonic_gen/transforms.rs | 4 +- vegafusion-core/src/proto/transforms.proto | 2 +- vegafusion-core/src/runtime/runtime.rs | 2 +- vegafusion-core/src/spec/transform/bin.rs | 9 +- vegafusion-core/src/transform/bin.rs | 21 +- vegafusion-python/src/lib.rs | 8 +- vegafusion-python/tests/test_pretransform.py | 26 +- vegafusion-python/vegafusion/runtime.py | 169 +++---- vegafusion-runtime/src/transform/bin.rs | 12 +- vegafusion-runtime/tests/test_chart_state.rs | 12 +- vegafusion-wasm/src/lib.rs | 17 +- 26 files changed, 2020 insertions(+), 145 deletions(-) create mode 100644 docs/source/chart_state.md rename docs/source/{transformed_data.md => transform_data.md} (100%) create mode 100644 docs/source/transform_extract.md create mode 100644 docs/source/transform_spec.md create mode 100644 examples/python-examples/chart_state.py rename examples/python-examples/{pre_transformed_data.py => pre_transform_data.py} (100%) create mode 100644 examples/python-examples/pre_transform_extract.py create mode 100644 examples/python-examples/pre_transform_spec.py create mode 100644 examples/rust-examples/examples/chart_state.rs create mode 100644 examples/rust-examples/examples/pre_transform_extract.rs create mode 100644 examples/rust-examples/examples/pre_transform_spec.rs diff --git a/docs/source/chart_state.md b/docs/source/chart_state.md new file mode 100644 index 000000000..953d5b108 --- /dev/null +++ b/docs/source/chart_state.md @@ -0,0 +1,15 @@ +# Chart State + +The Chart State workflow can be used to support interactive charts with transforms that are updated interactively. For example, for a chart that implements crossfiltering the `filter` transform must be re-evaluated repeatedly against the input dataset. + +This is the foundation of Vega-Altair's [JupyterChart](https://altair-viz.github.io/user_guide/jupyter_chart.html) when combined with the [``"vegafusion"`` data transformer](https://altair-viz.github.io/user_guide/large_datasets.html#vegafusion-data-transformer). + +## Python +```{eval-rst} +.. automethod:: vegafusion.runtime.VegaFusionRuntime.new_chart_state +``` + +**Example**: See [chart_state.py](https://github.com/vega/vegafusion/tree/v2/examples/python-examples/chart_state.py) for a complete example. + +## Rust +See [chart_state.rs](https://github.com/vega/vegafusion/tree/v2/examples/rust-examples/examples/chart_state.rs) for a complete example. diff --git a/docs/source/index.md b/docs/source/index.md index dd7a6b861..0b4386dae 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -18,5 +18,8 @@ If you've arrived here looking for information on how to scale Vega-Altair visua :caption: Contents column_usage -transformed_data +transform_data +transform_spec +transform_extract +chart_state ``` diff --git a/docs/source/transformed_data.md b/docs/source/transform_data.md similarity index 100% rename from docs/source/transformed_data.md rename to docs/source/transform_data.md diff --git a/docs/source/transform_extract.md b/docs/source/transform_extract.md new file mode 100644 index 000000000..6cea26735 --- /dev/null +++ b/docs/source/transform_extract.md @@ -0,0 +1,15 @@ +# Transform Extract + +The `pre_transform_extract` method generates a transformed spec like the [`pre_transform_spec`](./transform_spec.md) method, but instead of inlining the transformed datasets in the spec, these datasets are returned separately in arrow table format. This can be useful in contexts where the inline datasets are large, and it's possible to transmit them more efficiently in arrow format. + +## Python + +```{eval-rst} +.. automethod:: vegafusion.runtime.VegaFusionRuntime.pre_transform_extract +``` + +**Example**: See [pre_transform_extract.py](https://github.com/vega/vegafusion/tree/v2/examples/python-examples/pre_transform_extract.py) for a complete example. + +## Rust + +See [pre_transform_extract.rs](https://github.com/vega/vegafusion/tree/v2/examples/rust-examples/examples/pre_transform_extract.rs) for a complete example. diff --git a/docs/source/transform_spec.md b/docs/source/transform_spec.md new file mode 100644 index 000000000..727815e46 --- /dev/null +++ b/docs/source/transform_spec.md @@ -0,0 +1,21 @@ +# Transformed Spec + +VegaFusion can be used to evaluate datasets in a Vega spec, remove unused columns, and inline the results in a transformed Vega spec. This transformed Vega spec is self-contained and may be displayed with the standard Vega JavaScript library. + +This is the foundation of Vega-Altair's [``"vegafusion"`` data transformer](https://altair-viz.github.io/user_guide/large_datasets.html#vegafusion-data-transformer) when used with the default HTML or static image renderers. + +:::{warning} +The pre-transform process will, by default, preserve the interactive behavior of the input Vega specification. For interactive charts that perform filtering, this may result in the generation of a spec containing the full input dataset. If interactivity does not need to be preserved (e.g. if the resulting chart is used in a static context) then the ``preserve_interactivity`` option should be set to False. If interactivity is needed, then the Chart State workflow may be more appropriate. +::: + +## Python + +```{eval-rst} +.. automethod:: vegafusion.runtime.VegaFusionRuntime.pre_transform_spec +``` + +**Example**: See [pre_transform_spec.py](https://github.com/vega/vegafusion/tree/v2/examples/python-examples/pre_transform_spec.py) for a complete example. + +## Rust + +See [pre_transform_spec.rs](https://github.com/vega/vegafusion/tree/v2/examples/rust-examples/examples/pre_transform_spec.rs) for a complete example. diff --git a/examples/python-examples/chart_state.py b/examples/python-examples/chart_state.py new file mode 100644 index 000000000..fc150d3eb --- /dev/null +++ b/examples/python-examples/chart_state.py @@ -0,0 +1,441 @@ +from typing import Any +import json +import vegafusion as vf + + +# This example demonstrates how to use a chart state, and update it in response to simulated interactive +# updates to the chart +def main(): + spec = get_spec() + + # Build chart state + chart_state = vf.runtime.new_chart_state(spec) + + # Get the initial pre-transformed spec that can be rendered + _init_spec = chart_state.get_client_spec() + + # Get the watch plan, which includes which signals and data variables that should be listened to + # and relayed from the displayed vega chart back to the chart state. + watch_plan = chart_state.get_watch_plan() + print("Watch Plan:\n" + json.dumps(watch_plan, indent=2), end="\n\n") + + # Report an update to the maxbins signal. Update will return the signal and dataset updates that should + # but updated in the displayed chart. + updates = chart_state.update( + [ + { + "name": "maxbins", + "namespace": "signal", + "scope": [], + "value": 4, + } + ] + ) + + print("Server to Client Updates:\n" + json.dumps(updates, indent=2), end="\n\n") + + +def get_spec() -> dict[str, Any]: + """ + Based on https://vega.github.io/editor/#/examples/vega/histogram-null-values + """ + spec_str = """ +{ + "$schema": "https://vega.github.io/schema/vega/v5.json", + "description": "A histogram of film ratings, modified to include null values.", + "width": 400, + "height": 200, + "padding": 5, + "autosize": {"type": "fit", "resize": true}, + + "signals": [ + { + "name": "maxbins", "value": 10, + "bind": {"input": "select", "options": [5, 10, 20]} + }, + { + "name": "binCount", + "update": "(bins.stop - bins.start) / bins.step" + }, + { + "name": "nullGap", "value": 10 + }, + { + "name": "barStep", + "update": "(width - nullGap) / (1 + binCount)" + } + ], + "data": [ + { + "name": "table", + "url": "data/movies.json", + "transform": [ + { + "type": "extent", "field": "IMDB Rating", + "signal": "extent" + }, + { + "type": "bin", "signal": "bins", + "field": "IMDB Rating", "extent": {"signal": "extent"}, + "maxbins": {"signal": "maxbins"} + } + ] + }, + { + "name": "counts", + "source": "table", + "transform": [ + { + "type": "filter", + "expr": "datum['IMDB Rating'] != null" + }, + { + "type": "aggregate", + "groupby": ["bin0", "bin1"] + } + ] + }, + { + "name": "nulls", + "source": "table", + "transform": [ + { + "type": "filter", + "expr": "datum['IMDB Rating'] == null" + }, + { + "type": "aggregate", + "groupby": [] + } + ] + } + ], + + "scales": [ + { + "name": "yscale", + "type": "linear", + "range": "height", + "round": true, "nice": true, + "domain": { + "fields": [ + {"data": "counts", "field": "count"}, + {"data": "nulls", "field": "count"} + ] + } + }, + { + "name": "xscale", + "type": "linear", + "range": [{"signal": "barStep + nullGap"}, {"signal": "width"}], + "round": true, + "domain": {"signal": "[bins.start, bins.stop]"}, + "bins": {"signal": "bins"} + }, + { + "name": "xscale-null", + "type": "band", + "range": [0, {"signal": "barStep"}], + "round": true, + "domain": [null] + } + ], + + "axes": [ + {"orient": "bottom", "scale": "xscale", "tickMinStep": 0.5}, + {"orient": "bottom", "scale": "xscale-null"}, + {"orient": "left", "scale": "yscale", "tickCount": 5, "offset": 5} + ], + + "marks": [ + { + "type": "rect", + "from": {"data": "counts"}, + "encode": { + "update": { + "x": {"scale": "xscale", "field": "bin0", "offset": 1}, + "x2": {"scale": "xscale", "field": "bin1"}, + "y": {"scale": "yscale", "field": "count"}, + "y2": {"scale": "yscale", "value": 0}, + "fill": {"value": "steelblue"} + }, + "hover": { + "fill": {"value": "firebrick"} + } + } + }, + { + "type": "rect", + "from": {"data": "nulls"}, + "encode": { + "update": { + "x": {"scale": "xscale-null", "value": null, "offset": 1}, + "x2": {"scale": "xscale-null", "band": 1}, + "y": {"scale": "yscale", "field": "count"}, + "y2": {"scale": "yscale", "value": 0}, + "fill": {"value": "#aaa"} + }, + "hover": { + "fill": {"value": "firebrick"} + } + } + } + ] +} + """ + return json.loads(spec_str) + + +def expected_spec() -> dict[str, Any]: + return json.loads(""" + { + "$schema": "https://vega.github.io/schema/vega/v5.json", + "data": [ + { + "name": "table" + }, + { + "name": "counts", + "values": [ + { + "bin0": 6.0, + "bin1": 7.0, + "count": 985 + }, + { + "bin0": 3.0, + "bin1": 4.0, + "count": 100 + }, + { + "bin0": 7.0, + "bin1": 8.0, + "count": 741 + }, + { + "bin0": 5.0, + "bin1": 6.0, + "count": 633 + }, + { + "bin0": 8.0, + "bin1": 9.0, + "count": 204 + }, + { + "bin0": 2.0, + "bin1": 3.0, + "count": 43 + }, + { + "bin0": 4.0, + "bin1": 5.0, + "count": 273 + }, + { + "bin0": 9.0, + "bin1": 10.0, + "count": 4 + }, + { + "bin0": 1.0, + "bin1": 2.0, + "count": 5 + } + ] + }, + { + "name": "nulls", + "values": [ + { + "count": 213 + } + ] + } + ], + "signals": [ + { + "name": "bins", + "value": { + "fields": [ + "IMDB Rating" + ], + "fname": "bin_IMDB Rating", + "start": 1.0, + "step": 1.0, + "stop": 10.0 + } + }, + { + "name": "maxbins", + "value": 10 + }, + { + "name": "binCount", + "update": "(bins.stop - bins.start) / bins.step" + }, + { + "name": "nullGap", + "value": 10 + }, + { + "name": "barStep", + "update": "(width - nullGap) / (1 + binCount)" + } + ], + "marks": [ + { + "type": "rect", + "from": { + "data": "counts" + }, + "encode": { + "update": { + "y": { + "field": "count", + "scale": "yscale" + }, + "fill": { + "value": "steelblue" + }, + "x2": { + "field": "bin1", + "scale": "xscale" + }, + "x": { + "field": "bin0", + "scale": "xscale", + "offset": 1 + }, + "y2": { + "value": 0, + "scale": "yscale" + } + }, + "hover": { + "fill": { + "value": "firebrick" + } + } + } + }, + { + "type": "rect", + "from": { + "data": "nulls" + }, + "encode": { + "hover": { + "fill": { + "value": "firebrick" + } + }, + "update": { + "x2": { + "scale": "xscale-null", + "band": 1 + }, + "y": { + "field": "count", + "scale": "yscale" + }, + "y2": { + "value": 0, + "scale": "yscale" + }, + "fill": { + "value": "#aaa" + }, + "x": { + "scale": "xscale-null", + "offset": 1 + } + } + } + } + ], + "scales": [ + { + "name": "yscale", + "type": "linear", + "domain": { + "fields": [ + { + "data": "counts", + "field": "count" + }, + { + "data": "nulls", + "field": "count" + } + ] + }, + "range": "height", + "nice": true, + "round": true + }, + { + "name": "xscale", + "type": "linear", + "domain": { + "signal": "[bins.start, bins.stop]" + }, + "range": [ + { + "signal": "barStep + nullGap" + }, + { + "signal": "width" + } + ], + "bins": { + "signal": "bins" + }, + "round": true + }, + { + "name": "xscale-null", + "type": "band", + "domain": [ + null + ], + "range": [ + 0, + { + "signal": "barStep" + } + ], + "round": true + } + ], + "axes": [ + { + "scale": "xscale", + "tickMinStep": 0.5, + "orient": "bottom" + }, + { + "scale": "xscale-null", + "orient": "bottom" + }, + { + "scale": "yscale", + "tickCount": 5, + "offset": 5, + "orient": "left" + } + ], + "width": 400, + "height": 200, + "description": "A histogram of film ratings, modified to include null values.", + "padding": 5, + "autosize": { + "type": "fit", + "resize": true + } +} + """) + + +if __name__ == "__main__": + main() diff --git a/examples/python-examples/pre_transformed_data.py b/examples/python-examples/pre_transform_data.py similarity index 100% rename from examples/python-examples/pre_transformed_data.py rename to examples/python-examples/pre_transform_data.py diff --git a/examples/python-examples/pre_transform_extract.py b/examples/python-examples/pre_transform_extract.py new file mode 100644 index 000000000..5d44ae710 --- /dev/null +++ b/examples/python-examples/pre_transform_extract.py @@ -0,0 +1,384 @@ +import json +from typing import Any + + +import vegafusion as vf + + +# This example demonstrates how to use the `pre_transform_extract` method to create a new +# spec with supported transforms pre-evaluated and the transformed datasets extract in arrow format +def main(): + spec = get_spec() + transformed_spec, datasets, warnings = vf.runtime.pre_transform_extract( + spec, extract_threshold=4 + ) + print(datasets) + assert warnings == [] + assert transformed_spec == expected_spec() + assert len(datasets) == 1 + + name, scope, data = datasets[0] + assert name == "counts" + assert scope == [] + assert data.num_rows == 9 + assert data.column_names == ["bin0", "bin1", "count"] + + +def get_spec() -> dict[str, Any]: + """ + Based on https://vega.github.io/editor/#/examples/vega/histogram-null-values + """ + spec_str = """ +{ + "$schema": "https://vega.github.io/schema/vega/v5.json", + "description": "A histogram of film ratings, modified to include null values.", + "width": 400, + "height": 200, + "padding": 5, + "autosize": {"type": "fit", "resize": true}, + + "signals": [ + { + "name": "maxbins", "value": 10 + }, + { + "name": "binCount", + "update": "(bins.stop - bins.start) / bins.step" + }, + { + "name": "nullGap", "value": 10 + }, + { + "name": "barStep", + "update": "(width - nullGap) / (1 + binCount)" + } + ], + + "data": [ + { + "name": "table", + "url": "data/movies.json", + "transform": [ + { + "type": "extent", "field": "IMDB Rating", + "signal": "extent" + }, + { + "type": "bin", "signal": "bins", + "field": "IMDB Rating", "extent": {"signal": "extent"}, + "maxbins": 10 + } + ] + }, + { + "name": "counts", + "source": "table", + "transform": [ + { + "type": "filter", + "expr": "datum['IMDB Rating'] != null" + }, + { + "type": "aggregate", + "groupby": ["bin0", "bin1"] + } + ] + }, + { + "name": "nulls", + "source": "table", + "transform": [ + { + "type": "filter", + "expr": "datum['IMDB Rating'] == null" + }, + { + "type": "aggregate", + "groupby": [] + } + ] + } + ], + + "scales": [ + { + "name": "yscale", + "type": "linear", + "range": "height", + "round": true, "nice": true, + "domain": { + "fields": [ + {"data": "counts", "field": "count"}, + {"data": "nulls", "field": "count"} + ] + } + }, + { + "name": "xscale", + "type": "linear", + "range": [{"signal": "barStep + nullGap"}, {"signal": "width"}], + "round": true, + "domain": {"signal": "[bins.start, bins.stop]"}, + "bins": {"signal": "bins"} + }, + { + "name": "xscale-null", + "type": "band", + "range": [0, {"signal": "barStep"}], + "round": true, + "domain": [null] + } + ], + + "axes": [ + {"orient": "bottom", "scale": "xscale", "tickMinStep": 0.5}, + {"orient": "bottom", "scale": "xscale-null"}, + {"orient": "left", "scale": "yscale", "tickCount": 5, "offset": 5} + ], + + "marks": [ + { + "type": "rect", + "from": {"data": "counts"}, + "encode": { + "update": { + "x": {"scale": "xscale", "field": "bin0", "offset": 1}, + "x2": {"scale": "xscale", "field": "bin1"}, + "y": {"scale": "yscale", "field": "count"}, + "y2": {"scale": "yscale", "value": 0}, + "fill": {"value": "steelblue"} + }, + "hover": { + "fill": {"value": "firebrick"} + } + } + }, + { + "type": "rect", + "from": {"data": "nulls"}, + "encode": { + "update": { + "x": {"scale": "xscale-null", "value": null, "offset": 1}, + "x2": {"scale": "xscale-null", "band": 1}, + "y": {"scale": "yscale", "field": "count"}, + "y2": {"scale": "yscale", "value": 0}, + "fill": {"value": "#aaa"} + }, + "hover": { + "fill": {"value": "firebrick"} + } + } + } + ] +} + + """ + return json.loads(spec_str) + + +def expected_spec() -> dict[str, Any]: + return json.loads(""" + { + "$schema": "https://vega.github.io/schema/vega/v5.json", + "data": [ + { + "name": "table" + }, + { + "name": "counts" + }, + { + "name": "nulls", + "values": [ + { + "count": 213 + } + ] + } + ], + "signals": [ + { + "name": "bins", + "value": { + "fields": [ + "IMDB Rating" + ], + "fname": "bin_IMDB Rating", + "start": 1.0, + "step": 1.0, + "stop": 10.0 + } + }, + { + "name": "maxbins", + "value": 10 + }, + { + "name": "binCount", + "update": "(bins.stop - bins.start) / bins.step" + }, + { + "name": "nullGap", + "value": 10 + }, + { + "name": "barStep", + "update": "(width - nullGap) / (1 + binCount)" + } + ], + "marks": [ + { + "type": "rect", + "from": { + "data": "counts" + }, + "encode": { + "update": { + "y": { + "field": "count", + "scale": "yscale" + }, + "fill": { + "value": "steelblue" + }, + "x2": { + "field": "bin1", + "scale": "xscale" + }, + "x": { + "field": "bin0", + "scale": "xscale", + "offset": 1 + }, + "y2": { + "value": 0, + "scale": "yscale" + } + }, + "hover": { + "fill": { + "value": "firebrick" + } + } + } + }, + { + "type": "rect", + "from": { + "data": "nulls" + }, + "encode": { + "hover": { + "fill": { + "value": "firebrick" + } + }, + "update": { + "x2": { + "scale": "xscale-null", + "band": 1 + }, + "y": { + "field": "count", + "scale": "yscale" + }, + "y2": { + "value": 0, + "scale": "yscale" + }, + "fill": { + "value": "#aaa" + }, + "x": { + "scale": "xscale-null", + "offset": 1 + } + } + } + } + ], + "scales": [ + { + "name": "yscale", + "type": "linear", + "domain": { + "fields": [ + { + "data": "counts", + "field": "count" + }, + { + "data": "nulls", + "field": "count" + } + ] + }, + "range": "height", + "nice": true, + "round": true + }, + { + "name": "xscale", + "type": "linear", + "domain": { + "signal": "[bins.start, bins.stop]" + }, + "range": [ + { + "signal": "barStep + nullGap" + }, + { + "signal": "width" + } + ], + "bins": { + "signal": "bins" + }, + "round": true + }, + { + "name": "xscale-null", + "type": "band", + "domain": [ + null + ], + "range": [ + 0, + { + "signal": "barStep" + } + ], + "round": true + } + ], + "axes": [ + { + "scale": "xscale", + "tickMinStep": 0.5, + "orient": "bottom" + }, + { + "scale": "xscale-null", + "orient": "bottom" + }, + { + "scale": "yscale", + "tickCount": 5, + "offset": 5, + "orient": "left" + } + ], + "width": 400, + "height": 200, + "description": "A histogram of film ratings, modified to include null values.", + "padding": 5, + "autosize": { + "type": "fit", + "resize": true + } +} + """) + + +if __name__ == "__main__": + main() diff --git a/examples/python-examples/pre_transform_spec.py b/examples/python-examples/pre_transform_spec.py new file mode 100644 index 000000000..80d99359e --- /dev/null +++ b/examples/python-examples/pre_transform_spec.py @@ -0,0 +1,420 @@ +import json +from typing import Any + +import vegafusion as vf + + +# This example demonstrates how to use the `pre_transform_spec` method to create a new +# spec with supported transforms pre-evaluated. +def main(): + spec = get_spec() + transformed_spec, warnings = vf.runtime.pre_transform_spec(spec) + assert warnings == [] + assert transformed_spec == expected_spec() + + +def get_spec() -> dict[str, Any]: + """ + Based on https://vega.github.io/editor/#/examples/vega/histogram-null-values + """ + spec_str = """ +{ + "$schema": "https://vega.github.io/schema/vega/v5.json", + "description": "A histogram of film ratings, modified to include null values.", + "width": 400, + "height": 200, + "padding": 5, + "autosize": {"type": "fit", "resize": true}, + + "signals": [ + { + "name": "maxbins", "value": 10 + }, + { + "name": "binCount", + "update": "(bins.stop - bins.start) / bins.step" + }, + { + "name": "nullGap", "value": 10 + }, + { + "name": "barStep", + "update": "(width - nullGap) / (1 + binCount)" + } + ], + + "data": [ + { + "name": "table", + "url": "data/movies.json", + "transform": [ + { + "type": "extent", "field": "IMDB Rating", + "signal": "extent" + }, + { + "type": "bin", "signal": "bins", + "field": "IMDB Rating", "extent": {"signal": "extent"}, + "maxbins": 10 + } + ] + }, + { + "name": "counts", + "source": "table", + "transform": [ + { + "type": "filter", + "expr": "datum['IMDB Rating'] != null" + }, + { + "type": "aggregate", + "groupby": ["bin0", "bin1"] + } + ] + }, + { + "name": "nulls", + "source": "table", + "transform": [ + { + "type": "filter", + "expr": "datum['IMDB Rating'] == null" + }, + { + "type": "aggregate", + "groupby": [] + } + ] + } + ], + + "scales": [ + { + "name": "yscale", + "type": "linear", + "range": "height", + "round": true, "nice": true, + "domain": { + "fields": [ + {"data": "counts", "field": "count"}, + {"data": "nulls", "field": "count"} + ] + } + }, + { + "name": "xscale", + "type": "linear", + "range": [{"signal": "barStep + nullGap"}, {"signal": "width"}], + "round": true, + "domain": {"signal": "[bins.start, bins.stop]"}, + "bins": {"signal": "bins"} + }, + { + "name": "xscale-null", + "type": "band", + "range": [0, {"signal": "barStep"}], + "round": true, + "domain": [null] + } + ], + + "axes": [ + {"orient": "bottom", "scale": "xscale", "tickMinStep": 0.5}, + {"orient": "bottom", "scale": "xscale-null"}, + {"orient": "left", "scale": "yscale", "tickCount": 5, "offset": 5} + ], + + "marks": [ + { + "type": "rect", + "from": {"data": "counts"}, + "encode": { + "update": { + "x": {"scale": "xscale", "field": "bin0", "offset": 1}, + "x2": {"scale": "xscale", "field": "bin1"}, + "y": {"scale": "yscale", "field": "count"}, + "y2": {"scale": "yscale", "value": 0}, + "fill": {"value": "steelblue"} + }, + "hover": { + "fill": {"value": "firebrick"} + } + } + }, + { + "type": "rect", + "from": {"data": "nulls"}, + "encode": { + "update": { + "x": {"scale": "xscale-null", "value": null, "offset": 1}, + "x2": {"scale": "xscale-null", "band": 1}, + "y": {"scale": "yscale", "field": "count"}, + "y2": {"scale": "yscale", "value": 0}, + "fill": {"value": "#aaa"} + }, + "hover": { + "fill": {"value": "firebrick"} + } + } + } + ] +} + + """ + return json.loads(spec_str) + + +def expected_spec() -> dict[str, Any]: + return json.loads(""" + { + "$schema": "https://vega.github.io/schema/vega/v5.json", + "data": [ + { + "name": "table" + }, + { + "name": "counts", + "values": [ + { + "bin0": 6.0, + "bin1": 7.0, + "count": 985 + }, + { + "bin0": 3.0, + "bin1": 4.0, + "count": 100 + }, + { + "bin0": 7.0, + "bin1": 8.0, + "count": 741 + }, + { + "bin0": 5.0, + "bin1": 6.0, + "count": 633 + }, + { + "bin0": 8.0, + "bin1": 9.0, + "count": 204 + }, + { + "bin0": 2.0, + "bin1": 3.0, + "count": 43 + }, + { + "bin0": 4.0, + "bin1": 5.0, + "count": 273 + }, + { + "bin0": 9.0, + "bin1": 10.0, + "count": 4 + }, + { + "bin0": 1.0, + "bin1": 2.0, + "count": 5 + } + ] + }, + { + "name": "nulls", + "values": [ + { + "count": 213 + } + ] + } + ], + "signals": [ + { + "name": "bins", + "value": { + "fields": [ + "IMDB Rating" + ], + "fname": "bin_IMDB Rating", + "start": 1.0, + "step": 1.0, + "stop": 10.0 + } + }, + { + "name": "maxbins", + "value": 10 + }, + { + "name": "binCount", + "update": "(bins.stop - bins.start) / bins.step" + }, + { + "name": "nullGap", + "value": 10 + }, + { + "name": "barStep", + "update": "(width - nullGap) / (1 + binCount)" + } + ], + "marks": [ + { + "type": "rect", + "from": { + "data": "counts" + }, + "encode": { + "update": { + "y": { + "field": "count", + "scale": "yscale" + }, + "fill": { + "value": "steelblue" + }, + "x2": { + "field": "bin1", + "scale": "xscale" + }, + "x": { + "field": "bin0", + "scale": "xscale", + "offset": 1 + }, + "y2": { + "value": 0, + "scale": "yscale" + } + }, + "hover": { + "fill": { + "value": "firebrick" + } + } + } + }, + { + "type": "rect", + "from": { + "data": "nulls" + }, + "encode": { + "hover": { + "fill": { + "value": "firebrick" + } + }, + "update": { + "x2": { + "scale": "xscale-null", + "band": 1 + }, + "y": { + "field": "count", + "scale": "yscale" + }, + "y2": { + "value": 0, + "scale": "yscale" + }, + "fill": { + "value": "#aaa" + }, + "x": { + "scale": "xscale-null", + "offset": 1 + } + } + } + } + ], + "scales": [ + { + "name": "yscale", + "type": "linear", + "domain": { + "fields": [ + { + "data": "counts", + "field": "count" + }, + { + "data": "nulls", + "field": "count" + } + ] + }, + "range": "height", + "nice": true, + "round": true + }, + { + "name": "xscale", + "type": "linear", + "domain": { + "signal": "[bins.start, bins.stop]" + }, + "range": [ + { + "signal": "barStep + nullGap" + }, + { + "signal": "width" + } + ], + "bins": { + "signal": "bins" + }, + "round": true + }, + { + "name": "xscale-null", + "type": "band", + "domain": [ + null + ], + "range": [ + 0, + { + "signal": "barStep" + } + ], + "round": true + } + ], + "axes": [ + { + "scale": "xscale", + "tickMinStep": 0.5, + "orient": "bottom" + }, + { + "scale": "xscale-null", + "orient": "bottom" + }, + { + "scale": "yscale", + "tickCount": 5, + "offset": 5, + "orient": "left" + } + ], + "width": 400, + "height": 200, + "description": "A histogram of film ratings, modified to include null values.", + "padding": 5, + "autosize": { + "type": "fit", + "resize": true + } +} + """) + + +if __name__ == "__main__": + main() diff --git a/examples/rust-examples/examples/chart_state.rs b/examples/rust-examples/examples/chart_state.rs new file mode 100644 index 000000000..7756cb0c6 --- /dev/null +++ b/examples/rust-examples/examples/chart_state.rs @@ -0,0 +1,196 @@ +use vegafusion_core::chart_state::ChartState; +use vegafusion_core::planning::watch::{ExportUpdateJSON, ExportUpdateNamespace}; +use vegafusion_core::spec::chart::ChartSpec; +use vegafusion_runtime::task_graph::runtime::VegaFusionRuntime; + +/// This example demonstrates how to use the `pre_transform_spec` method to create a new +/// spec with supported transforms pre-evaluated. +#[tokio::main] +async fn main() { + let spec = get_spec(); + + // Make runtime + let runtime = VegaFusionRuntime::new(None); + + // Construct ChartState + let chart_state = ChartState::try_new( + &runtime, + spec, + Default::default(), // Inline datasets + Default::default(), // Options + ) + .await + .unwrap(); + + // Get initial transformed spec for display + let _transformed_spec = chart_state.get_client_spec(); + + // Get comm plan + let comm_plan = chart_state.get_comm_plan(); + println!("{:#?}", comm_plan); + + // Apply an update to the maxbins signal + let updates = chart_state + .update( + &runtime, + vec![ExportUpdateJSON { + namespace: ExportUpdateNamespace::Signal, + name: "maxbins".to_string(), + scope: vec![], + value: 4.into(), + }], + ) + .await + .unwrap(); + + // Print updates that should be applied to the rendered Vega chart + println!("{}", serde_json::to_string_pretty(&updates).unwrap()); +} + +fn get_spec() -> ChartSpec { + let spec_str = r##" + { + "$schema": "https://vega.github.io/schema/vega/v5.json", + "description": "A histogram of film ratings, modified to include null values.", + "width": 400, + "height": 200, + "padding": 5, + "autosize": {"type": "fit", "resize": true}, + + "signals": [ + { + "name": "maxbins", "value": 10, + "bind": {"input": "select", "options": [5, 10, 20]} + }, + { + "name": "binCount", + "update": "(bins.stop - bins.start) / bins.step" + }, + { + "name": "nullGap", "value": 10 + }, + { + "name": "barStep", + "update": "(width - nullGap) / (1 + binCount)" + } + ], + "data": [ + { + "name": "table", + "url": "data/movies.json", + "transform": [ + { + "type": "extent", "field": "IMDB Rating", + "signal": "extent" + }, + { + "type": "bin", "signal": "bins", + "field": "IMDB Rating", "extent": {"signal": "extent"}, + "maxbins": {"signal": "maxbins"} + } + ] + }, + { + "name": "counts", + "source": "table", + "transform": [ + { + "type": "filter", + "expr": "datum['IMDB Rating'] != null" + }, + { + "type": "aggregate", + "groupby": ["bin0", "bin1"] + } + ] + }, + { + "name": "nulls", + "source": "table", + "transform": [ + { + "type": "filter", + "expr": "datum['IMDB Rating'] == null" + }, + { + "type": "aggregate", + "groupby": [] + } + ] + } + ], + + "scales": [ + { + "name": "yscale", + "type": "linear", + "range": "height", + "round": true, "nice": true, + "domain": { + "fields": [ + {"data": "counts", "field": "count"}, + {"data": "nulls", "field": "count"} + ] + } + }, + { + "name": "xscale", + "type": "linear", + "range": [{"signal": "barStep + nullGap"}, {"signal": "width"}], + "round": true, + "domain": {"signal": "[bins.start, bins.stop]"}, + "bins": {"signal": "bins"} + }, + { + "name": "xscale-null", + "type": "band", + "range": [0, {"signal": "barStep"}], + "round": true, + "domain": [null] + } + ], + + "axes": [ + {"orient": "bottom", "scale": "xscale", "tickMinStep": 0.5}, + {"orient": "bottom", "scale": "xscale-null"}, + {"orient": "left", "scale": "yscale", "tickCount": 5, "offset": 5} + ], + + "marks": [ + { + "type": "rect", + "from": {"data": "counts"}, + "encode": { + "update": { + "x": {"scale": "xscale", "field": "bin0", "offset": 1}, + "x2": {"scale": "xscale", "field": "bin1"}, + "y": {"scale": "yscale", "field": "count"}, + "y2": {"scale": "yscale", "value": 0}, + "fill": {"value": "steelblue"} + }, + "hover": { + "fill": {"value": "firebrick"} + } + } + }, + { + "type": "rect", + "from": {"data": "nulls"}, + "encode": { + "update": { + "x": {"scale": "xscale-null", "value": null, "offset": 1}, + "x2": {"scale": "xscale-null", "band": 1}, + "y": {"scale": "yscale", "field": "count"}, + "y2": {"scale": "yscale", "value": 0}, + "fill": {"value": "#aaa"} + }, + "hover": { + "fill": {"value": "firebrick"} + } + } + } + ] + } + "##; + serde_json::from_str(spec_str).unwrap() +} diff --git a/examples/rust-examples/examples/pre_transform_extract.rs b/examples/rust-examples/examples/pre_transform_extract.rs new file mode 100644 index 000000000..e9d93010b --- /dev/null +++ b/examples/rust-examples/examples/pre_transform_extract.rs @@ -0,0 +1,180 @@ +use vegafusion_core::proto::gen::pretransform::PreTransformExtractOpts; +use vegafusion_core::runtime::{PreTransformExtractTable, VegaFusionRuntimeTrait}; +use vegafusion_core::spec::chart::ChartSpec; +use vegafusion_runtime::task_graph::runtime::VegaFusionRuntime; + +/// This example demonstrates how to use the `pre_transform_extract` method to create a new +/// spec with supported transforms pre-evaluated and the transformed datasets extract in arrow format +#[tokio::main] +async fn main() { + let spec = get_spec(); + + let runtime = VegaFusionRuntime::new(None); + + let (transformed_spec, datasets, warnings) = runtime + .pre_transform_extract( + &spec, + &Default::default(), // Inline datasets + &PreTransformExtractOpts { + extract_threshold: 4, + ..Default::default() + }, + ) + .await + .unwrap(); + + assert_eq!(warnings.len(), 0); + assert_eq!(datasets.len(), 1); + + let PreTransformExtractTable { name, scope, table } = datasets[0].clone(); + println!( + "{name}({scope:?})\n{}\n{}", + table.pretty_format(None).unwrap(), + serde_json::to_string_pretty(&transformed_spec).unwrap() + ); +} + +fn get_spec() -> ChartSpec { + let spec_str = r##" + { + "$schema": "https://vega.github.io/schema/vega/v5.json", + "description": "A histogram of film ratings, modified to include null values.", + "width": 400, + "height": 200, + "padding": 5, + "autosize": {"type": "fit", "resize": true}, + "data": [ + { + "name": "table", + "url": "data/movies.json", + "transform": [ + { + "type": "extent", "field": "IMDB Rating", + "signal": "extent" + }, + { + "type": "bin", "signal": "bins", + "field": "IMDB Rating", "extent": {"signal": "extent"}, + "maxbins": 10 + } + ] + }, + { + "name": "counts", + "source": "table", + "transform": [ + { + "type": "filter", + "expr": "datum['IMDB Rating'] != null" + }, + { + "type": "aggregate", + "groupby": ["bin0", "bin1"] + } + ] + }, + { + "name": "nulls", + "source": "table", + "transform": [ + { + "type": "filter", + "expr": "datum['IMDB Rating'] == null" + }, + { + "type": "aggregate", + "groupby": [] + } + ] + } + ], + "signals": [ + { + "name": "maxbins", "value": 10 + }, + { + "name": "binCount", + "update": "(bins.stop - bins.start) / bins.step" + }, + { + "name": "nullGap", "value": 10 + }, + { + "name": "barStep", + "update": "(width - nullGap) / (1 + binCount)" + } + ], + "scales": [ + { + "name": "yscale", + "type": "linear", + "range": "height", + "round": true, "nice": true, + "domain": { + "fields": [ + {"data": "counts", "field": "count"}, + {"data": "nulls", "field": "count"} + ] + } + }, + { + "name": "xscale", + "type": "linear", + "range": [{"signal": "barStep + nullGap"}, {"signal": "width"}], + "round": true, + "domain": {"signal": "[bins.start, bins.stop]"}, + "bins": {"signal": "bins"} + }, + { + "name": "xscale-null", + "type": "band", + "range": [0, {"signal": "barStep"}], + "round": true, + "domain": [null] + } + ], + + "axes": [ + {"orient": "bottom", "scale": "xscale", "tickMinStep": 0.5}, + {"orient": "bottom", "scale": "xscale-null"}, + {"orient": "left", "scale": "yscale", "tickCount": 5, "offset": 5} + ], + + "marks": [ + { + "type": "rect", + "from": {"data": "counts"}, + "encode": { + "update": { + "x": {"scale": "xscale", "field": "bin0", "offset": 1}, + "x2": {"scale": "xscale", "field": "bin1"}, + "y": {"scale": "yscale", "field": "count"}, + "y2": {"scale": "yscale", "value": 0}, + "fill": {"value": "steelblue"} + }, + "hover": { + "fill": {"value": "firebrick"} + } + } + }, + { + "type": "rect", + "from": {"data": "nulls"}, + "encode": { + "update": { + "x": {"scale": "xscale-null", "value": null, "offset": 1}, + "x2": {"scale": "xscale-null", "band": 1}, + "y": {"scale": "yscale", "field": "count"}, + "y2": {"scale": "yscale", "value": 0}, + "fill": {"value": "#aaa"} + }, + "hover": { + "fill": {"value": "firebrick"} + } + } + } + ] + } + "##; + serde_json::from_str(spec_str).unwrap() +} diff --git a/examples/rust-examples/examples/pre_transform_spec.rs b/examples/rust-examples/examples/pre_transform_spec.rs new file mode 100644 index 000000000..fb035a038 --- /dev/null +++ b/examples/rust-examples/examples/pre_transform_spec.rs @@ -0,0 +1,172 @@ +use vegafusion_core::runtime::VegaFusionRuntimeTrait; +use vegafusion_core::spec::chart::ChartSpec; +use vegafusion_runtime::task_graph::runtime::VegaFusionRuntime; + +/// This example demonstrates how to use the `pre_transform_spec` method to create a new +/// spec with supported transforms pre-evaluated. +#[tokio::main] +async fn main() { + let spec = get_spec(); + + let runtime = VegaFusionRuntime::new(None); + + let (transformed_spec, warnings) = runtime + .pre_transform_spec( + &spec, + &Default::default(), // Inline datasets + &Default::default(), // Options + ) + .await + .unwrap(); + + assert_eq!(warnings.len(), 0); + println!( + "{}", + serde_json::to_string_pretty(&transformed_spec).unwrap() + ); +} + +fn get_spec() -> ChartSpec { + let spec_str = r##" + { + "$schema": "https://vega.github.io/schema/vega/v5.json", + "description": "A histogram of film ratings, modified to include null values.", + "width": 400, + "height": 200, + "padding": 5, + "autosize": {"type": "fit", "resize": true}, + "data": [ + { + "name": "table", + "url": "data/movies.json", + "transform": [ + { + "type": "extent", "field": "IMDB Rating", + "signal": "extent" + }, + { + "type": "bin", "signal": "bins", + "field": "IMDB Rating", "extent": {"signal": "extent"}, + "maxbins": 10 + } + ] + }, + { + "name": "counts", + "source": "table", + "transform": [ + { + "type": "filter", + "expr": "datum['IMDB Rating'] != null" + }, + { + "type": "aggregate", + "groupby": ["bin0", "bin1"] + } + ] + }, + { + "name": "nulls", + "source": "table", + "transform": [ + { + "type": "filter", + "expr": "datum['IMDB Rating'] == null" + }, + { + "type": "aggregate", + "groupby": [] + } + ] + } + ], + "signals": [ + { + "name": "maxbins", "value": 10 + }, + { + "name": "binCount", + "update": "(bins.stop - bins.start) / bins.step" + }, + { + "name": "nullGap", "value": 10 + }, + { + "name": "barStep", + "update": "(width - nullGap) / (1 + binCount)" + } + ], + "scales": [ + { + "name": "yscale", + "type": "linear", + "range": "height", + "round": true, "nice": true, + "domain": { + "fields": [ + {"data": "counts", "field": "count"}, + {"data": "nulls", "field": "count"} + ] + } + }, + { + "name": "xscale", + "type": "linear", + "range": [{"signal": "barStep + nullGap"}, {"signal": "width"}], + "round": true, + "domain": {"signal": "[bins.start, bins.stop]"}, + "bins": {"signal": "bins"} + }, + { + "name": "xscale-null", + "type": "band", + "range": [0, {"signal": "barStep"}], + "round": true, + "domain": [null] + } + ], + + "axes": [ + {"orient": "bottom", "scale": "xscale", "tickMinStep": 0.5}, + {"orient": "bottom", "scale": "xscale-null"}, + {"orient": "left", "scale": "yscale", "tickCount": 5, "offset": 5} + ], + + "marks": [ + { + "type": "rect", + "from": {"data": "counts"}, + "encode": { + "update": { + "x": {"scale": "xscale", "field": "bin0", "offset": 1}, + "x2": {"scale": "xscale", "field": "bin1"}, + "y": {"scale": "yscale", "field": "count"}, + "y2": {"scale": "yscale", "value": 0}, + "fill": {"value": "steelblue"} + }, + "hover": { + "fill": {"value": "firebrick"} + } + } + }, + { + "type": "rect", + "from": {"data": "nulls"}, + "encode": { + "update": { + "x": {"scale": "xscale-null", "value": null, "offset": 1}, + "x2": {"scale": "xscale-null", "band": 1}, + "y": {"scale": "yscale", "field": "count"}, + "y2": {"scale": "yscale", "value": 0}, + "fill": {"value": "#aaa"} + }, + "hover": { + "fill": {"value": "firebrick"} + } + } + } + ] + } + "##; + serde_json::from_str(spec_str).unwrap() +} diff --git a/vegafusion-core/src/chart_state.rs b/vegafusion-core/src/chart_state.rs index 80cd35537..2cbf08b21 100644 --- a/vegafusion-core/src/chart_state.rs +++ b/vegafusion-core/src/chart_state.rs @@ -24,6 +24,24 @@ use vegafusion_common::{ error::{Result, ResultWithContext, VegaFusionError}, }; +#[derive(Clone, Debug)] +pub struct ChartStateOpts { + pub tz_config: TzConfig, + pub row_limit: Option, +} + +impl Default for ChartStateOpts { + fn default() -> Self { + Self { + tz_config: TzConfig { + local_tz: "UTC".to_string(), + default_input_tz: None, + }, + row_limit: None, + } + } +} + #[derive(Clone)] pub struct ChartState { input_spec: ChartSpec, @@ -41,8 +59,7 @@ impl ChartState { runtime: &dyn VegaFusionRuntimeTrait, spec: ChartSpec, inline_datasets: HashMap, - tz_config: TzConfig, - row_limit: Option, + opts: ChartStateOpts, ) -> Result { let dataset_fingerprints = inline_datasets .iter() @@ -57,7 +74,7 @@ impl ChartState { .with_context(|| "Failed to create task scope for server spec")?; let tasks = plan .server_spec - .to_tasks(&tz_config, &dataset_fingerprints) + .to_tasks(&opts.tz_config, &dataset_fingerprints) .unwrap(); let task_graph = TaskGraph::new(tasks, &task_scope).unwrap(); let task_graph_mapping = task_graph.build_mapping(); @@ -97,7 +114,7 @@ impl ChartState { } let (transformed_spec, warnings) = - apply_pre_transform_datasets(&spec, &plan, init, row_limit)?; + apply_pre_transform_datasets(&spec, &plan, init, opts.row_limit)?; Ok(Self { input_spec: spec, diff --git a/vegafusion-core/src/planning/stitch.rs b/vegafusion-core/src/planning/stitch.rs index ded556c6a..6219eb214 100644 --- a/vegafusion-core/src/planning/stitch.rs +++ b/vegafusion-core/src/planning/stitch.rs @@ -6,6 +6,7 @@ use crate::spec::signal::SignalSpec; use crate::spec::values::MissingNullOrValue; use crate::task_graph::graph::ScopedVariable; use crate::task_graph::scope::TaskScope; +use itertools::Itertools; use serde_json::Value; use std::collections::HashSet; @@ -76,8 +77,8 @@ pub fn stitch_specs( // Return plan which specifies which signals/data need to be communicated between client and server Ok(CommPlan { - server_to_client: server_to_client.into_iter().collect(), - client_to_server: client_to_server.into_iter().collect(), + server_to_client: server_to_client.into_iter().sorted().collect(), + client_to_server: client_to_server.into_iter().sorted().collect(), }) } diff --git a/vegafusion-core/src/proto/prost_gen/transforms.rs b/vegafusion-core/src/proto/prost_gen/transforms.rs index 4c48a1b42..19bca1240 100644 --- a/vegafusion-core/src/proto/prost_gen/transforms.rs +++ b/vegafusion-core/src/proto/prost_gen/transforms.rs @@ -52,8 +52,8 @@ pub struct Bin { #[prost(double, optional, tag = "6")] pub anchor: ::core::option::Option, /// The maximum number of bins allowed - #[prost(double, tag = "7")] - pub maxbins: f64, + #[prost(message, optional, tag = "7")] + pub maxbins: ::core::option::Option, /// The number base to use for automatic bin selection (e.g. base 10) #[prost(double, tag = "8")] pub base: f64, diff --git a/vegafusion-core/src/proto/tonic_gen/transforms.rs b/vegafusion-core/src/proto/tonic_gen/transforms.rs index 4c48a1b42..19bca1240 100644 --- a/vegafusion-core/src/proto/tonic_gen/transforms.rs +++ b/vegafusion-core/src/proto/tonic_gen/transforms.rs @@ -52,8 +52,8 @@ pub struct Bin { #[prost(double, optional, tag = "6")] pub anchor: ::core::option::Option, /// The maximum number of bins allowed - #[prost(double, tag = "7")] - pub maxbins: f64, + #[prost(message, optional, tag = "7")] + pub maxbins: ::core::option::Option, /// The number base to use for automatic bin selection (e.g. base 10) #[prost(double, tag = "8")] pub base: f64, diff --git a/vegafusion-core/src/proto/transforms.proto b/vegafusion-core/src/proto/transforms.proto index 87aa745fa..f8eac798c 100644 --- a/vegafusion-core/src/proto/transforms.proto +++ b/vegafusion-core/src/proto/transforms.proto @@ -44,7 +44,7 @@ message Bin { optional double anchor = 6; // The maximum number of bins allowed - double maxbins = 7; + expression.Expression maxbins = 7; // The number base to use for automatic bin selection (e.g. base 10) double base = 8; diff --git a/vegafusion-core/src/runtime/runtime.rs b/vegafusion-core/src/runtime/runtime.rs index 99344e4fa..400d0f7dd 100644 --- a/vegafusion-core/src/runtime/runtime.rs +++ b/vegafusion-core/src/runtime/runtime.rs @@ -30,7 +30,7 @@ use crate::{ }, }; -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct PreTransformExtractTable { pub name: String, pub scope: Vec, diff --git a/vegafusion-core/src/spec/transform/bin.rs b/vegafusion-core/src/spec/transform/bin.rs index 2524c5502..7b6bd7284 100644 --- a/vegafusion-core/src/spec/transform/bin.rs +++ b/vegafusion-core/src/spec/transform/bin.rs @@ -3,7 +3,7 @@ use crate::expression::parser::parse; use crate::expression::column_usage::{ColumnUsage, DatasetsColumnUsage, VlSelectionFields}; use crate::spec::transform::{TransformColumns, TransformSpecTrait}; -use crate::spec::values::{Field, SignalExpressionSpec}; +use crate::spec::values::{Field, SignalExpressionSpec, ValueOrSignalSpec}; use crate::task_graph::graph::ScopedVariable; use crate::task_graph::scope::TaskScope; use crate::task_graph::task::InputVariable; @@ -26,7 +26,7 @@ pub struct BinTransformSpec { pub anchor: Option, #[serde(skip_serializing_if = "Option::is_none")] - pub maxbins: Option, + pub maxbins: Option, #[serde(skip_serializing_if = "Option::is_none")] pub base: Option, @@ -106,6 +106,11 @@ impl TransformSpecTrait for BinTransformSpec { input_vars.extend(expression.input_vars()) } + if let Some(ValueOrSignalSpec::Signal(maxbins)) = &self.maxbins { + let expression = parse(&maxbins.signal)?; + input_vars.extend(expression.input_vars()) + } + Ok(input_vars.into_iter().collect()) } diff --git a/vegafusion-core/src/transform/bin.rs b/vegafusion-core/src/transform/bin.rs index 6fe72d378..3947009c5 100644 --- a/vegafusion-core/src/transform/bin.rs +++ b/vegafusion-core/src/transform/bin.rs @@ -5,7 +5,7 @@ use crate::proto::gen::expression::{ArrayExpression, Expression, Literal}; use crate::proto::gen::tasks::Variable; use crate::proto::gen::transforms::Bin; use crate::spec::transform::bin::{BinExtent, BinSpan, BinTransformSpec}; -use crate::spec::values::SignalExpressionSpec; +use crate::spec::values::{SignalExpressionSpec, ValueOrSignalSpec}; use crate::task_graph::task::InputVariable; use crate::transform::TransformDependencies; @@ -42,7 +42,7 @@ impl Bin { alias_0: as_.first().cloned(), alias_1: as_.get(1).cloned(), anchor: config.anchor, - maxbins: config.maxbins, + maxbins: Some(config.maxbins), base: config.base, step: config.step, steps: config.steps.into_iter().flatten().collect(), @@ -73,7 +73,7 @@ pub struct BinConfig { divide: Vec, /// The maximum number of bins allowed - maxbins: f64, + maxbins: Expression, /// A minimum distance between adjacent bins minstep: f64, @@ -99,7 +99,7 @@ impl Default for BinConfig { anchor: None, base: 10.0, divide: vec![5.0, 2.0], - maxbins: 20.0, + maxbins: Expression::from(20.0), minstep: 0.0, nice: true, step: None, @@ -121,11 +121,19 @@ impl BinConfig { }, }; + let maxbins = match &spec.maxbins { + None => None, + Some(maxbins) => match maxbins { + ValueOrSignalSpec::Value(maxbins) => maxbins.as_f64().map(Expression::from), + ValueOrSignalSpec::Signal(signal) => Some(parse(&signal.signal)?), + }, + }; + Ok(Self { anchor: spec.anchor, base: spec.base.unwrap_or(dflt.base), divide: spec.divide.unwrap_or(dflt.divide), - maxbins: spec.maxbins.unwrap_or(dflt.maxbins), + maxbins: maxbins.unwrap_or(dflt.maxbins), minstep: spec.minstep.unwrap_or(dflt.minstep), nice: spec.nice.unwrap_or(dflt.nice), step: spec.step, @@ -141,6 +149,9 @@ impl TransformDependencies for Bin { if let Some(span) = self.span.as_ref() { input_vars.extend(span.input_vars()); } + if let Some(maxbins) = self.maxbins.as_ref() { + input_vars.extend(maxbins.input_vars()); + } input_vars } diff --git a/vegafusion-python/src/lib.rs b/vegafusion-python/src/lib.rs index 96de16745..2c5983ceb 100644 --- a/vegafusion-python/src/lib.rs +++ b/vegafusion-python/src/lib.rs @@ -8,7 +8,7 @@ use std::str::FromStr; use std::sync::{Arc, Once}; use tokio::runtime::Runtime; use tonic::transport::{Channel, Uri}; -use vegafusion_core::chart_state::ChartState as RsChartState; +use vegafusion_core::chart_state::{ChartState as RsChartState, ChartStateOpts}; use vegafusion_core::error::{ToExternalError, VegaFusionError}; use vegafusion_core::proto::gen::pretransform::pre_transform_extract_warning::WarningType as ExtractWarningType; use vegafusion_core::proto::gen::pretransform::pre_transform_values_warning::WarningType as ValueWarningType; @@ -73,8 +73,10 @@ impl PyChartState { runtime.as_ref(), spec, inline_datasets, - tz_config, - row_limit, + ChartStateOpts { + tz_config, + row_limit, + }, ))?; Ok(Self { runtime, diff --git a/vegafusion-python/tests/test_pretransform.py b/vegafusion-python/tests/test_pretransform.py index da8072fd2..df3266857 100644 --- a/vegafusion-python/tests/test_pretransform.py +++ b/vegafusion-python/tests/test_pretransform.py @@ -1761,9 +1761,9 @@ def test_pre_transform_spec_encoded_datasets(): # Pre-transform with supported aggregate function should result in no warnings vega_spec = movies_histogram_spec() - # default list of dict format - tx_spec, _warnings = vf.runtime.pre_transform_spec( - vega_spec, data_encoding_threshold=10, data_encoding_format="pyarrow" + # Inline when threshold is larger than transformed data + tx_spec, datasets, _warnings = vf.runtime.pre_transform_extract( + vega_spec, extract_threshold=10, extracted_format="pyarrow" ) values = tx_spec["data"][0]["values"] @@ -1771,33 +1771,33 @@ def test_pre_transform_spec_encoded_datasets(): assert len(values) == 9 # pyarrow format - tx_spec, _warnings = vf.runtime.pre_transform_spec( - vega_spec, data_encoding_threshold=0, data_encoding_format="pyarrow" + tx_spec, datasets, _warnings = vf.runtime.pre_transform_extract( + vega_spec, extract_threshold=0, extracted_format="pyarrow" ) - values = tx_spec["data"][0]["values"] + name, scope, values = datasets[0] + assert name == "source_0" assert isinstance(values, pa.Table) values_df = values.to_pandas() assert len(values_df) == 9 assert values_df.columns[0] == "bin_maxbins_10_IMDB Rating" # arrow-ipc format - tx_spec, _warnings = vf.runtime.pre_transform_spec( - vega_spec, data_encoding_threshold=0, data_encoding_format="arrow-ipc" + tx_spec, datasets, _warnings = vf.runtime.pre_transform_extract( + vega_spec, extract_threshold=0, extracted_format="arrow-ipc" ) - - values = tx_spec["data"][0]["values"] + name, scope, values = datasets[0] assert isinstance(values, bytes) values_df = pa.ipc.deserialize_pandas(values) assert len(values_df) == 9 assert values_df.columns[0] == "bin_maxbins_10_IMDB Rating" # arrow-ipc-base64 format - tx_spec, _warnings = vf.runtime.pre_transform_spec( - vega_spec, data_encoding_threshold=0, data_encoding_format="arrow-ipc-base64" + tx_spec, datasets, _warnings = vf.runtime.pre_transform_extract( + vega_spec, extract_threshold=0, extracted_format="arrow-ipc-base64" ) - values = tx_spec["data"][0]["values"] + name, scope, values = datasets[0] assert isinstance(values, str) values_df = pa.ipc.deserialize_pandas(base64.standard_b64decode(values)) assert len(values_df) == 9 diff --git a/vegafusion-python/vegafusion/runtime.py b/vegafusion-python/vegafusion/runtime.py index 501e849fe..b7a86acc8 100644 --- a/vegafusion-python/vegafusion/runtime.py +++ b/vegafusion-python/vegafusion/runtime.py @@ -318,9 +318,7 @@ def pre_transform_spec( inline_datasets: dict[str, Any] | None = None, keep_signals: list[Union[str, tuple[str, list[int]]]] | None = None, keep_datasets: list[Union[str, tuple[str, list[int]]]] | None = None, - data_encoding_threshold: int | None = None, - data_encoding_format: str = "arro3", - ) -> tuple[Union[dict[str, Any], str], list[dict[str, str]]]: + ) -> tuple[dict[str, Any], list[PreTransformWarning]]: """ Evaluate supported transforms in an input Vega specification @@ -338,7 +336,7 @@ def pre_transform_spec( of rows and a RowLimitExceeded warning will be included in the resulting warnings list preserve_interactivity: If True (default) then the interactive behavior of - the chart will pre preserved. This requires that all the data that + the chart will be preserved. This requires that all the data that participates in interactions be included in the resulting spec rather than being pre-transformed. If False, then all possible data transformations are applied even if they break the original interactive @@ -348,82 +346,52 @@ def pre_transform_spec( using the following url syntax 'vegafusion+dataset://{dataset_name}' or 'table://{dataset_name}'. keep_signals: Signals from the input spec that must be included in the - pre-transformed spec. A list with elements that are either: - - The name of a top-level signal as a string - - A two-element tuple where the first element is the name of a signal + pre-transformed spec, even if they are no longer referenced. + A list with elements that are either: + + * The name of a top-level signal as a string + * A two-element tuple where the first element is the name of a signal as a string and the second element is the nested scope of the dataset as a list of integers keep_datasets: Datasets from the input spec that must be included in the - pre-transformed spec. A list with elements that are either: - - The name of a top-level dataset as a string - - A two-element tuple where the first element is the name of a dataset + pre-transformed spec even if they are no longer referenced. + A list with elements that are either: + + * The name of a top-level dataset as a string + * A two-element tuple where the first element is the name of a dataset as a string and the second element is the nested scope of the dataset as a list of integers - data_encoding_threshold: threshold for encoding datasets. When length of - pre-transformed datasets exceeds data_encoding_threshold, datasets are - encoded into an alternative format (as determined by the - data_encoding_format argument). When None (the default), - pre-transformed datasets are never encoded and are always included as - JSON compatible lists of dictionaries. - data_encoding_format: format of encoded datasets. Format to use to encode - datasets with length exceeding the data_encoding_threshold argument. - - "arro3": Encode datasets as arro3 Tables. Not JSON compatible. - - "pyarrow": Encode datasets as pyarrow Tables. Not JSON compatible. - - "arrow-ipc": Encode datasets as bytes in Arrow IPC format. Not JSON - compatible. - - "arrow-ipc-base64": Encode datasets as strings in base64 encoded - Arrow IPC format. JSON compatible. - Returns: - A tuple containing: - - A string containing the JSON representation of a Vega specification - with pre-transformed datasets included inline - - A list of warnings as dictionaries. Each warning dict has a 'type' - key indicating the warning type, and a 'message' key containing + Returns: Two-element tuple + + * The Vega specification as a dict with pre-transformed datasets + included inline + * A list of warnings as dictionaries. Each warning dict has a ``'type'`` + key indicating the warning type, and a ``'message'`` key containing a description of the warning. Potential warning types include: - 'RowLimitExceeded': Some datasets in resulting Vega specification - have been truncated to the provided row limit - 'BrokenInteractivity': Some interactive features may have been - broken in the resulting Vega specification - 'Unsupported': No transforms in the provided Vega specification were - eligible for pre-transforming + + * ``'RowLimitExceeded'``: Some datasets in resulting Vega specification + have been truncated to the provided row limit + * ``'BrokenInteractivity'``: Some interactive features may have been + broken in the resulting Vega specification + * ``'Unsupported'``: No transforms in the provided Vega specification + were eligible for pre-transforming """ local_tz = local_tz or get_local_tz() imported_inline_dataset = self._import_inline_datasets( inline_datasets, get_inline_column_usage(spec) ) - if data_encoding_threshold is None: - new_spec, warnings = self.runtime.pre_transform_spec( - spec, - local_tz=local_tz, - default_input_tz=default_input_tz, - row_limit=row_limit, - preserve_interactivity=preserve_interactivity, - inline_datasets=imported_inline_dataset, - keep_signals=parse_variables(keep_signals), - keep_datasets=parse_variables(keep_datasets), - ) - else: - # Use pre_transform_extract to extract large datasets - new_spec, datasets, warnings = self.runtime.pre_transform_extract( - spec, - local_tz=local_tz, - default_input_tz=default_input_tz, - preserve_interactivity=preserve_interactivity, - extract_threshold=data_encoding_threshold, - extracted_format=data_encoding_format, - inline_datasets=imported_inline_dataset, - keep_signals=parse_variables(keep_signals), - keep_datasets=parse_variables(keep_datasets), - ) - - # Insert encoded datasets back into spec - for name, scope, tbl in datasets: - group = get_mark_group_for_scope(new_spec, scope) or {} - for data in group.get("data", []): - if data.get("name", None) == name: - data["values"] = tbl + new_spec, warnings = self.runtime.pre_transform_spec( + spec, + local_tz=local_tz, + default_input_tz=default_input_tz, + row_limit=row_limit, + preserve_interactivity=preserve_interactivity, + inline_datasets=imported_inline_dataset, + keep_signals=parse_variables(keep_signals), + keep_datasets=parse_variables(keep_datasets), + ) return new_spec, warnings @@ -435,7 +403,8 @@ def new_chart_state( row_limit: int | None = None, inline_datasets: dict[str, DataFrameLike] | None = None, ) -> ChartState: - """Construct new ChartState object. + """ + Construct new ChartState object. Args: spec: A Vega specification dict or JSON string. @@ -476,7 +445,7 @@ def pre_transform_datasets( inline_datasets: dict[str, DataFrameLike] | None = None, trim_unused_columns: bool = False, dataset_format: DatasetFormat = "auto", - ) -> tuple[list[DataFrameLike], list[dict[str, str]]]: + ) -> tuple[list[DataFrameLike], list[PreTransformWarning]]: """ Extract the fully evaluated form of the requested datasets from a Vega specification. @@ -627,7 +596,7 @@ def pre_transform_extract( keep_signals: list[str | tuple[str, list[int]]] | None = None, keep_datasets: list[str | tuple[str, list[int]]] | None = None, ) -> tuple[ - dict[str, Any], list[tuple[str, list[int], pa.Table]], list[dict[str, str]] + dict[str, Any], list[tuple[str, list[int], pa.Table]], list[PreTransformWarning] ]: """ Evaluate supported transforms in an input Vega specification. @@ -644,7 +613,7 @@ def pre_transform_extract( default_input_tz: Name of timezone (e.g. 'America/New_York') that naive datetime strings should be interpreted in. Defaults to `local_tz`. preserve_interactivity: If True (default) then the interactive behavior of - the chart will pre preserved. This requires that all the data that + the chart will be preserved. This requires that all the data that participates in interactions be included in the resulting spec rather than being pre-transformed. If False, then all possible data transformations are applied even if they break the original interactive @@ -652,39 +621,49 @@ def pre_transform_extract( extract_threshold: Datasets with length below extract_threshold will be inlined. extracted_format: The format for the extracted datasets. Options are: - - "arro3": arro3.Table - - "pyarrow": pyarrow.Table - - "arrow-ipc": bytes in arrow IPC format - - "arrow-ipc-base64": base64 encoded arrow IPC format + + * ``"arro3"``: (default) arro3.Table + * ``"pyarrow"``: pyarrow.Table + * ``"arrow-ipc"``: bytes in arrow IPC format + * ``"arrow-ipc-base64"``: base64 encoded arrow IPC format inline_datasets: A dict from dataset names to pandas DataFrames or pyarrow Tables. Inline datasets may be referenced by the input specification using the following url syntax 'vegafusion+dataset://{dataset_name}' or 'table://{dataset_name}'. keep_signals: Signals from the input spec that must be included in the - pre-transformed spec. A list with elements that are either: - - The name of a top-level signal as a string - - A two-element tuple where the first element is the name of a signal as - a string and the second element is the nested scope of the dataset as - a list of integers + pre-transformed spec, even if they are no longer referenced. + A list with elements that are either: + + * The name of a top-level signal as a string + * A two-element tuple where the first element is the name of a signal + as a string and the second element is the nested scope of the dataset + as a list of integers keep_datasets: Datasets from the input spec that must be included in the - pre-transformed spec. A list with elements that are either: - - The name of a top-level dataset as a string - - A two-element tuple where the first element is the name of a dataset + pre-transformed spec even if they are no longer referenced. + A list with elements that are either: + + * The name of a top-level dataset as a string + * A two-element tuple where the first element is the name of a dataset as a string and the second element is the nested scope of the dataset as a list of integers - Returns: - A tuple containing three elements: - 1. A dict containing the JSON representation of the pre-transformed Vega - specification without pre-transformed datasets included inline - 2. Extracted datasets as a list of three element tuples: - - dataset name - - dataset scope - - pyarrow Table - 3. A list of warnings as dictionaries. Each warning dict has a 'type' key - indicating the warning type, and a 'message' key containing a description - of the warning. Potential warning types include: - - 'Planner': Planner warning + Returns: Three-element tuple + * The Vega specification as a dict with pre-transformed datasets + included but left empty. + * Extracted datasets as a list of three element tuples + * dataset name + * dataset scope list + * arrow data + * A list of warnings as dictionaries. Each warning dict has a ``'type'`` + key indicating the warning type, and a ``'message'`` key containing + a description of the warning. Potential warning types include: + + * ``'RowLimitExceeded'``: Some datasets in resulting Vega specification + have been truncated to the provided row limit + * ``'BrokenInteractivity'``: Some interactive features may have been + broken in the resulting Vega specification + * ``'Unsupported'``: No transforms in the provided Vega specification + were eligible for pre-transforming """ local_tz = local_tz or get_local_tz() diff --git a/vegafusion-runtime/src/transform/bin.rs b/vegafusion-runtime/src/transform/bin.rs index f8de61962..b76124a0a 100644 --- a/vegafusion-runtime/src/transform/bin.rs +++ b/vegafusion-runtime/src/transform/bin.rs @@ -194,6 +194,10 @@ pub fn calculate_bin_params( } } + let maxbins = compile(&tx.maxbins.as_ref().unwrap(), config, Some(schema))? + .eval_to_scalar()? + .to_f64()?; + let logb = tx.base.ln(); let step = if let Some(step) = tx.step { @@ -202,7 +206,7 @@ pub fn calculate_bin_params( } else if !tx.steps.is_empty() { // If steps is provided, limit step to one of the elements. // Choose the first element of steps that will result in fewer than maxmins - let min_step_size = span / tx.maxbins; + let min_step_size = span / maxbins; let valid_steps: Vec<_> = tx .steps .clone() @@ -214,19 +218,19 @@ pub fn calculate_bin_params( .unwrap_or_else(|| tx.steps.last().unwrap()) } else { // Otherwise, use span to determine the step size - let level = (tx.maxbins.ln() / logb).ceil(); + let level = (maxbins.ln() / logb).ceil(); let minstep = tx.minstep; let mut step = minstep.max(tx.base.powf((span.ln() / logb).round() - level)); // increase step size if too many bins - while (span / step).ceil() > tx.maxbins { + while (span / step).ceil() > maxbins { step *= tx.base; } // decrease step size if allowed for div in &tx.divide { let v = step / div; - if v >= minstep && span / v <= tx.maxbins { + if v >= minstep && span / v <= maxbins { step = v } } diff --git a/vegafusion-runtime/tests/test_chart_state.rs b/vegafusion-runtime/tests/test_chart_state.rs index 6f9afe887..40d27b149 100644 --- a/vegafusion-runtime/tests/test_chart_state.rs +++ b/vegafusion-runtime/tests/test_chart_state.rs @@ -8,7 +8,7 @@ mod tests { use crate::crate_dir; use serde_json::json; use std::fs; - use vegafusion_core::chart_state::ChartState; + use vegafusion_core::chart_state::{ChartState, ChartStateOpts}; use vegafusion_core::planning::watch::{ExportUpdateJSON, ExportUpdateNamespace}; use vegafusion_core::proto::gen::tasks::TzConfig; use vegafusion_core::spec::chart::ChartSpec; @@ -31,11 +31,13 @@ mod tests { &runtime, spec, Default::default(), - TzConfig { - local_tz: "UTC".to_string(), - default_input_tz: None, + ChartStateOpts { + tz_config: TzConfig { + local_tz: "UTC".to_string(), + default_input_tz: None, + }, + row_limit: None, }, - None, ) .await .unwrap(); diff --git a/vegafusion-wasm/src/lib.rs b/vegafusion-wasm/src/lib.rs index b581b7773..637a3bd4f 100644 --- a/vegafusion-wasm/src/lib.rs +++ b/vegafusion-wasm/src/lib.rs @@ -29,7 +29,7 @@ use vegafusion_core::proto::gen::services::{ use vegafusion_core::runtime::{encode_inline_datasets, VegaFusionRuntimeTrait}; use vegafusion_core::spec::chart::ChartSpec; -use vegafusion_core::chart_state::ChartState; +use vegafusion_core::chart_state::{ChartState, ChartStateOpts}; use vegafusion_core::data::dataset::VegaFusionDataset; use vegafusion_core::get_column_usage; use vegafusion_runtime::task_graph::runtime::VegaFusionRuntime; @@ -449,10 +449,17 @@ pub async fn vegafusion_embed( Box::new(QueryFnVegaFusionRuntime::new(query_fn)) }; - let chart_state = - ChartState::try_new(runtime.as_ref(), spec, Default::default(), tz_config, None) - .await - .map_err(|e| JsError::new(&e.to_string()))?; + let chart_state = ChartState::try_new( + runtime.as_ref(), + spec, + Default::default(), + ChartStateOpts { + tz_config, + row_limit: None, + }, + ) + .await + .map_err(|e| JsError::new(&e.to_string()))?; // Serializer that can be used to convert serde types to JSON compatible objects let serializer = serde_wasm_bindgen::Serializer::json_compatible();