From a0b24e7386b6101cacf2c1b31b1ce2bd3a13029e Mon Sep 17 00:00:00 2001 From: Lucas Vieira Date: Thu, 9 Jan 2025 14:03:26 -0300 Subject: [PATCH] line and scatter, group by x axis, x axis sort --- apps/api/src/python/visualization-v2.test.ts | 176 ++++++++++++- apps/api/src/python/visualizations-v2.ts | 233 ++++++++++++++---- apps/api/src/yjs/v2/executor/visualization.ts | 7 +- .../customBlocks/visualizationV2/index.tsx | 7 +- .../editor/src/blocks/visualization-v2.ts | 15 +- 5 files changed, 383 insertions(+), 55 deletions(-) diff --git a/apps/api/src/python/visualization-v2.test.ts b/apps/api/src/python/visualization-v2.test.ts index 60f0ec57..d0eb9a0d 100644 --- a/apps/api/src/python/visualization-v2.test.ts +++ b/apps/api/src/python/visualization-v2.test.ts @@ -64,7 +64,6 @@ async function getPythonRunner( switch (message.header.msg_type) { case 'stream': if ('name' in message.content) { - console.log(message.content.text) onOutputs([ { type: 'stdio', @@ -139,8 +138,8 @@ describe('.createVisualizationV2', () => { const code = `import pandas as pd df = pd.DataFrame({ - 'integers': [1, 2, 3], - 'datetimes': pd.to_datetime(['2021-01-01', '2021-01-02', '2021-01-03']), + 'integers': [11, 12, 13, 21, 22, 33], + 'datetimes': pd.to_datetime(['2021-01-01', '2021-01-02', '2021-01-03', '2021-02-01', '2021-03-02', '2021-03-03']), })` const integersDFColumn: DataFrameColumn = { @@ -187,9 +186,175 @@ df = pd.DataFrame({ ], }, result: { + dataset: { + dimensions: ['datetimes', 'integers'], + source: [ + { datetimes: '2021-01-01T00:00:00', integers: 11 }, + { datetimes: '2021-01-02T00:00:00', integers: 12 }, + { datetimes: '2021-01-03T00:00:00', integers: 13 }, + { datetimes: '2021-02-01T00:00:00', integers: 21 }, + { datetimes: '2021-03-02T00:00:00', integers: 22 }, + { datetimes: '2021-03-03T00:00:00', integers: 33 }, + ], + }, + xAxis: [ + { + type: 'category', + }, + ], + yAxis: [ + { + type: 'value', + }, + ], + series: [ + { + type: 'bar', + }, + ], + }, + }, + { + name: 'integer by integer groupedColumn', + input: { + dataframeName: 'df', + chartType: 'groupedColumn', + xAxis: integersDFColumn, + xAxisName: null, + xAxisSort: 'ascending', + xAxisGroupFunction: null, + yAxes: [ + { + series: [ + { + axisName: null, + chartType: null, + column: integersDFColumn, + aggregateFunction: null, + colorBy: null, + }, + ], + }, + ], + }, + result: { + dataset: { + dimensions: ['integers', 'integers'], + source: [ + { integers: 11 }, + { integers: 12 }, + { integers: 13 }, + { integers: 21 }, + { integers: 22 }, + { integers: 33 }, + ], + }, + xAxis: [ + { + type: 'category', + }, + ], + yAxis: [ + { + type: 'value', + }, + ], + series: [ + { + type: 'bar', + }, + ], + }, + }, + { + name: 'datetime by integer group by month', + input: { + dataframeName: 'df', + chartType: 'groupedColumn', + xAxis: datetimesDFColumn, + xAxisName: null, + xAxisSort: 'ascending', + xAxisGroupFunction: 'month', + yAxes: [ + { + series: [ + { + axisName: null, + chartType: null, + column: integersDFColumn, + aggregateFunction: null, + colorBy: null, + }, + ], + }, + ], + }, + result: { + dataset: { + dimensions: ['datetimes', 'integers'], + source: [ + { datetimes: '2021-01-01T00:00:00', integers: 11 }, + { datetimes: '2021-01-01T00:00:00', integers: 12 }, + { datetimes: '2021-01-01T00:00:00', integers: 13 }, + { datetimes: '2021-02-01T00:00:00', integers: 21 }, + { datetimes: '2021-03-01T00:00:00', integers: 22 }, + { datetimes: '2021-03-01T00:00:00', integers: 33 }, + ], + }, + xAxis: [ + { + type: 'category', + }, + ], + yAxis: [ + { + type: 'value', + }, + ], + series: [ + { + type: 'bar', + }, + ], + }, + }, + { + name: 'datetime desc by integer groupedColumn', + input: { + dataframeName: 'df', + chartType: 'groupedColumn', + xAxis: datetimesDFColumn, + xAxisName: null, + xAxisSort: 'descending', + xAxisGroupFunction: null, + yAxes: [ + { + series: [ + { + axisName: null, + chartType: null, + column: integersDFColumn, + aggregateFunction: null, + colorBy: null, + }, + ], + }, + ], + }, + result: { + dataset: { + dimensions: ['datetimes', 'integers'], + source: [ + { datetimes: '2021-03-03T00:00:00', integers: 33 }, + { datetimes: '2021-03-02T00:00:00', integers: 22 }, + { datetimes: '2021-02-01T00:00:00', integers: 21 }, + { datetimes: '2021-01-03T00:00:00', integers: 13 }, + { datetimes: '2021-01-02T00:00:00', integers: 12 }, + { datetimes: '2021-01-01T00:00:00', integers: 11 }, + ], + }, xAxis: [ { - data: [1, 2, 3], type: 'category', }, ], @@ -200,7 +365,6 @@ df = pd.DataFrame({ ], series: [ { - data: [1, 2, 3], type: 'bar', }, ], @@ -235,7 +399,7 @@ df = pd.DataFrame({ expect(result.success).toBe(true) if (result.success) { - expect(result.output.result).toEqual(result) + expect(result.data).toEqual(test.result) } }) } diff --git a/apps/api/src/python/visualizations-v2.ts b/apps/api/src/python/visualizations-v2.ts index 64a7697c..3ed765f2 100644 --- a/apps/api/src/python/visualizations-v2.ts +++ b/apps/api/src/python/visualizations-v2.ts @@ -9,10 +9,11 @@ import { executeCode, PythonExecutionError } from './index.js' import { IJupyterManager } from '../jupyter/index.js' import { VisualizationV2BlockInput, - VisualizationV2BlockOutput, + VisualizationV2BlockOutputResult, } from '@briefer/editor' import AggregateError from 'aggregate-error' import { z } from 'zod' +import { logger } from '../logger.js' function getCode(dataframe: DataFrame, input: VisualizationV2BlockInput) { const strInput = JSON.stringify(input) @@ -20,34 +21,135 @@ function getCode(dataframe: DataFrame, input: VisualizationV2BlockInput) { from datetime import datetime def _briefer_create_visualization(df, options): - x_axis = [{ - "type": "category", - "data": df[options["xAxis"]["name"]].tolist() - }] - y_axis = [] - batata = [] - - for y_axis_options in options["yAxes"]: - y_axis.append({ - "type": "value", - }) - for series_options in y_axis_options["series"]: - batata.append({ - "type": "bar", - "data": df[series_options["column"]["name"]].tolist() - }) - - print(json.dumps({ - "success": True, - "output": { - "executedAt": datetime.now().isoformat(), - "result":{ - "xAxis": x_axis, - "yAxis": y_axis, - "series": batata - } - } - })) + def extract_chart_type(chartType): + if chartType == "groupedColumn": + return "bar" + elif chartType == "stackedColumn": + return "bar" + elif chartType == "hundredPercentStackedColumn": + return "bar" + elif chartType == "line": + return "line" + elif chartType == "area": + return "line" + elif chartType == "hundredPercentStackedArea": + return "line" + elif chartType == "scatterPlot": + return "scatter" + elif chartType == "pie": + raise ValueError("Pie chart is not implemented yet") + elif chartType == "histogram": + raise ValueError("Histogram chart is not supported") + elif chartType == "trend": + raise ValueError("Trend chart is not supported") + elif chartType == "number": + raise ValueError("Number chart is not supported") + + def convert_value(column, value): + if pd.api.types.is_numeric_dtype(column): + return pd.to_numeric(value, errors='coerce') + + if pd.api.types.is_datetime64_any_dtype(column): + return value.isoformat() + + return value + + def group_dataframe(df, options): + if options["xAxisGroupFunction"]: + freq = { + 'year': 'Y', + 'quarter': 'Q', + 'month': 'M', + 'week': 'W', + 'date': 'D', + 'hours': 'h', + 'minutes': 'min', + 'seconds': 's' + }.get(options["xAxisGroupFunction"], None) + + if freq: + df[options["xAxis"]["name"]] = pd.to_datetime(df[options["xAxis"]["name"]]) + # Group by the specified frequency but keep all rows + df["_grouped"] = df[options["xAxis"]["name"]].dt.to_period(freq).dt.start_time + return df + + return df + + def sort_dataframe(df, options): + if options["xAxisSort"]: + return df.sort_values( + by=options["xAxis"]["name"], + ascending=options["xAxisSort"] == "ascending" + ) + + return df + + def prepare_chart_df(df, options): + # Prepare data by grouping + result = group_dataframe(df.copy(), options) + if "_grouped" in result: + result[options["xAxis"]["name"]] = result["_grouped"] + result = result.drop(columns=["_grouped"]) + + result = sort_dataframe(result, options) + + return result + + + chart_df = prepare_chart_df(df, options) + + + data = { + "dataset": { + "dimensions": [options["xAxis"]["name"]], + "source": [], + }, + "xAxis": [{ + "type": "category", + }], + "yAxis": [], + "series": [], + } + + + defaultType = extract_chart_type(options["chartType"]) + for y_axis in options["yAxes"]: + for series in y_axis["series"]: + chart_type = extract_chart_type(series["chartType"] or options["chartType"]) + data["dataset"]["dimensions"].append(series["column"]["name"]) + data["yAxis"].append({ + "type": "value", + }) + data["series"].append({ + "type": chart_type, + }) + + index = 0 + for _, row in chart_df.iterrows(): + x_name = options["xAxis"]["name"] + x_value = convert_value(chart_df[x_name], row[x_name]) + + data["dataset"]["source"].append({ + x_name: x_value, + }) + for y_axis in options["yAxes"]: + for series in y_axis["series"]: + y_name = series["column"]["name"] + y_value = convert_value(chart_df[y_name], row[y_name]) + + data["dataset"]["source"][index][y_name] = y_value + index += 1 + + + output = json.dumps({ + "type": "result", + "data": { + "success": True, + "data": data + } + }, default=str) + + print(output) if "${dataframe.name}" in globals(): @@ -55,7 +157,16 @@ if "${dataframe.name}" in globals(): options = json.loads(${JSON.stringify(strInput)}) _briefer_create_visualization(df, options) else: - print(json.dumps({"success": False, "reason": "dataframe-not-found"}))` + output = json.dumps({ + "type":"result", + "data": { + "success": False, + "reason": "dataframe-not-found" + } + }, default=str) + print(output)` + + console.log(code) return code } @@ -63,7 +174,7 @@ else: const CreateVisualizationResult = z.union([ z.object({ success: z.literal(true), - output: VisualizationV2BlockOutput, + data: VisualizationV2BlockOutputResult, }), z.object({ success: z.literal(false), @@ -118,21 +229,59 @@ export async function createVisualizationV2( case 'stdio': switch (output.name) { case 'stdout': - for (const line of output.text.split('\n')) { - if (!result?.success) { - try { - console.log(line) - console.log(line) - console.log(JSON.parse(line)) - } catch (e) { - console.log(e) - } + for (const l of output.text.split('\n')) { + const line = l.trim() + if (line === '') { + continue + } + if (!result?.success) { const parsed = jsonString - .pipe(CreateVisualizationResult) + .pipe( + z.union([ + z.object({ + type: z.literal('log'), + message: z.string(), + }), + z.object({ + type: z.literal('result'), + data: CreateVisualizationResult, + }), + ]) + ) .safeParse(line.trim()) if (parsed.success) { - result = parsed.data + switch (parsed.data.type) { + case 'log': + console.log( + JSON.stringify( + { + workspaceId, + sessionId, + message: parsed.data.message, + input, + }, + null, + 2 + ), + 'createVisualizationV2 log' + ) + logger().info( + { + workspaceId, + sessionId, + message: parsed.data.message, + input, + }, + 'createVisualizationV2 log' + ) + break + case 'result': + result = parsed.data.data + break + default: + exhaustiveCheck(parsed.data) + } } else { outputParsingErrors.push(parsed.error) } diff --git a/apps/api/src/yjs/v2/executor/visualization.ts b/apps/api/src/yjs/v2/executor/visualization.ts index 622d3235..f39a0db0 100644 --- a/apps/api/src/yjs/v2/executor/visualization.ts +++ b/apps/api/src/yjs/v2/executor/visualization.ts @@ -275,8 +275,11 @@ export class VisualizationExecutor implements IVisualizationExecutor { events.visUpdate(attrs.input.chartType) if (result.success) { - console.log(result.output) - block.setAttribute('output', result.output) + const output = { + executedAt: new Date().toISOString(), + result: result.data, + } + block.setAttribute('output', output) } else { } executionItem.setCompleted('success') diff --git a/apps/web/src/components/v2Editor/customBlocks/visualizationV2/index.tsx b/apps/web/src/components/v2Editor/customBlocks/visualizationV2/index.tsx index b041c92e..c6c6bbab 100644 --- a/apps/web/src/components/v2Editor/customBlocks/visualizationV2/index.tsx +++ b/apps/web/src/components/v2Editor/customBlocks/visualizationV2/index.tsx @@ -255,9 +255,12 @@ function VisualizationV2Block(props: Props) { const onChangeXAxisSort = useCallback( (sort: 'ascending' | 'descending') => { - // props.block.setAttribute('xAxisSort', sort) + props.block.setAttribute('input', { + ...attrs.input, + xAxisSort: sort, + }) }, - [props.block] + [props.block, attrs.input] ) const onChangeHistogramFormat = useCallback( diff --git a/packages/editor/src/blocks/visualization-v2.ts b/packages/editor/src/blocks/visualization-v2.ts index 74098d98..6011b81c 100644 --- a/packages/editor/src/blocks/visualization-v2.ts +++ b/packages/editor/src/blocks/visualization-v2.ts @@ -49,7 +49,6 @@ const OrdinalRawValue = z.string().or(z.number()) const CategoryAxisBaseOption = z.object({ type: z.literal('category'), - data: z.array(OrdinalRawValue), }) const TimeAxisBaseOption = z.object({ @@ -68,13 +67,23 @@ const CartesianAxisOption = z }) .and(AxisBaseOption) +const DataSet = z.object({ + dimensions: z.array(z.string()), + source: z.array(z.record(OrdinalRawValue)), +}) + +const EchartsType = z.union([ + z.literal('bar'), + z.literal('line'), + z.literal('scatter'), +]) export const VisualizationV2BlockOutputResult = z.object({ + dataset: DataSet, xAxis: z.array(CartesianAxisOption), yAxis: z.array(CartesianAxisOption), series: z.array( z.object({ - data: z.array(OrdinalRawValue), - type: z.literal('bar'), + type: EchartsType, }) ), })