Skip to content

Commit

Permalink
feat(ava/lite-insight): 新增多维度下钻归因方法
Browse files Browse the repository at this point in the history
  • Loading branch information
GuangMingYouBei committed Apr 25, 2023
1 parent 8ccdbf2 commit 81e2e7d
Show file tree
Hide file tree
Showing 7 changed files with 397 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import { dimensionDrillDownAttribution } from '../../../../../src/insight/insights/extractors/causalInference/dimensionDrillDown';

import type { DrillDownProps } from '../../../../../src/insight/insights/extractors/causalInference/types';

const data = [
{
City: 'huangshishi',
Province: 'shanxi',
ClientGender: 'male',
OrderDate: '2022/2/19',
Price: 13722.76,
UnitCost: 292.72,
},
{
City: 'huangshishi',
Province: 'shanxi',
ClientGender: 'male',
OrderDate: '2022/2/21',
Price: 24020.88,
UnitCost: 5447.57,
},
{
City: 'huangshishi',
Province: 'shanxi',
ClientGender: 'male',
OrderDate: '2022/2/16',
Price: 40145.8,
UnitCost: 3696.42,
},
{
City: 'huanggangshi',
Province: 'qinghai',
ClientGender: 'male',
OrderDate: '2022/2/18',
Price: 99980.16,
UnitCost: 34393.38,
},
{
City: 'huanggangshi',
Province: 'qinghai',
ClientGender: 'ma le',
OrderDate: '2022/2/17',
Price: 12656.11,
UnitCost: 6012.96,
},
{
City: 'huanggangshi',
Province: 'qinghai',
ClientGender: 'male',
OrderDate: '2022/2/19',
Price: 42464.05,
UnitCost: 3113.68,
},
];
describe('MultiDim Test', () => {
test('check the disassemable result', () => {
const props: DrillDownProps = {
sourceData: data,
dimensions: ['Province', 'City'],
targetMeasure: 'Price',
timeSeriesDim: 'OrderDate',
baseInterval: {
startPoint: '2022/2/17',
endPoint: '2022/2/18',
},
currInterval: {
startPoint: '2022/2/19',
endPoint: '2022/2/20',
},
};
const tempResult = dimensionDrillDownAttribution(props);
const treeResult = tempResult.resultInTree;
expect(treeResult?.City.huangshishi.info.currValue).toBe(13722.76);
});
});
4 changes: 3 additions & 1 deletion packages/ava/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@
"heap-js": "^2.1.6",
"lodash": "^4.17.21",
"regression": "^2.0.1",
"tslib": "^2.3.1"
"tslib": "^2.3.1",
"expr-eval": "^2.0.2",
"moment": "^2.29.4"
},
"devDependencies": {
"@antv/algorithm": "^0.1.25",
Expand Down
3 changes: 2 additions & 1 deletion packages/ava/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ export type {
} from './data';

/* insight */
export { getInsights, generateInsightVisualizationSpec } from './insight';
export { getInsights, generateInsightVisualizationSpec, dimensionDrillDownAttribution } from './insight';
export type {
Datum,
DomainType,
Expand Down Expand Up @@ -124,6 +124,7 @@ export type {
LowVarianceInfo,
CorrelationInfo,
InsightsResult,
DimensionDrillDownProps,
} from './insight';

/* NTV (Narrative Text Vis) */
Expand Down
2 changes: 2 additions & 0 deletions packages/ava/src/insight/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
export { getInsights } from './pipeline';
export { generateInsightVisualizationSpec } from './pipeline/visualize';
export { dimensionDrillDownAttribution } from './insights/extractors/causalInference/dimensionDrillDown';
export type { DrillDownProps as DimensionDrillDownProps } from './insights/extractors/causalInference/types';
export * from './types';
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import { locatedInInterval, enumerateAllDimensionCombinationsByDFS } from './util';

import type { InfoType, DimensionDrillDownResult, TreeDim, DataLocation, DrillDownProps } from './types';

/** Main function for dimension drill down attribution */
export const dimensionDrillDownAttribution = ({
sourceData,
dimensions,
targetMeasure: measure,
timeSeriesDim: fluctuationDim,
baseInterval,
currInterval,
}: DrillDownProps): DimensionDrillDownResult => {
/** remove invalid data */
const data = sourceData.filter((item) => !Object.values(item).some((v) => v === null || v === undefined));

const globalDiff: InfoType = {
baseValue: 0,
currValue: 0,
diff: 0,
};

const resultTree: TreeDim = {};
const DictFlatten = {};
/** traverse the input data and build the result data structure; */
data.forEach((item) => {
let location: DataLocation = 'none';
if (locatedInInterval(item[fluctuationDim], baseInterval.startPoint, baseInterval.endPoint)) {
location = 'left';
globalDiff.baseValue += item[measure] as number;
}
if (locatedInInterval(item[fluctuationDim], currInterval.startPoint, currInterval.endPoint)) {
location = 'right';
globalDiff.currValue += item[measure] as number;
}
if (location !== 'none') {
const deque: string[] = [];
enumerateAllDimensionCombinationsByDFS(
item,
0,
dimensions,
resultTree,
DictFlatten,
deque,
measure,
fluctuationDim,
location
);
}
});
globalDiff.diff = globalDiff.currValue - globalDiff.baseValue;

return { resultInTree: resultTree, globalDiff, resultInList: Object.values(DictFlatten) };
};
133 changes: 133 additions & 0 deletions packages/ava/src/insight/insights/extractors/causalInference/types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import type { Datum } from '../../../types';

/** DataConfig specifies the input data with its focused dimensions and target measure to be analysed. */
export type DataConfig = {
sourceData: Datum[];
dimensions: string[];
measures: string[];
expression?: string;
};

/** FluctInfo is the necessary input for fluctuation analysis. */
export type FluctInfo = {
/** Typically, flucDim is a time dimension that the measure value varies on */
fluctDim: string;
/** baseInterval is the time interval that is assigned as the basement */
baseInterval: CompareInterval;
/** currInterval is the time interval that user is focusing on */
currInterval: CompareInterval;
};

/** Time Interval in Fluctuation Analysis */
export type CompareInterval = {
/** start time of this interval */
startPoint: string | number;
/** end time of this interval */
endPoint: string | number;
};

/** a flag that indicates the belongings of a single line of data, which is useful for aggregation */
export type DataLocation = 'left' | 'right' | 'none';

/** Record the calculation result */
export type InfoType = {
/** Calculation value corresponding to baseInterval */
baseValue: number;
/** Calculation value corresponding to currInterval */
currValue: number;
/** diff = currValue - baseValue */
diff: number;
};

/** Dimension drill down Result type */
export type DimensionDrillDownResult = {
/** total difference */
globalDiff: InfoType;
/** Tree like returned data */
resultInTree?: TreeDim;
/** Flatten returned data */
resultInList?: FlattenResult[];
};

/** Dimension drill down Result type */
export type MeasureDecomposeResult = {
/** total difference */
globalDiff: InfoType;

resultByMeasure?: FunctionBasedResult;
};

/** The first string is dimension name, the second string is dimension value */
export type DimWithValue = Record<string, string>;

/** Dimension drill down attribution Result type that has been formalized into Datum[] */
export type FlattenResult = Partial<DimWithValue & InfoType>;

/** Function based attribution Result type */
export type FunctionBasedResult = Record<string, InfoType>;

/** Example: Tree Data Structure for dimension drill down analysis
*
* {
* dimNameA: {
* dimValueA1: {
* info: {baseValue: 222, curValue: 555, diff: 333},
* drillDown: {
* dimNameB: {
* dimValueB1: {
* info: {baseValue: 000, curValue: 111, diff: 111},
* drillDown: {}
* },
* dimValueB2: {
* info: {baseValue: 222, curValue: 444, diff: 222},
* drillDown: {}
* }
* }
* }
* },
* },
* dimNameB: {
* dimValueB1: {
* info: {baseValue: 000, curValue: 111, diff: 111},
* drillDown: {}
* },
* dimValueB2: {
* info: {baseValue: 222, curValue: 444, diff: 222},
* drillDown: {}
* }
* }
* }
*
*/

/** The dimension name level of the tree or the first level in drillDown */
export interface TreeDim {
[dimName: string]: TreeDimVal;
}

/** The dimension value level of the tree which is inside the dimension name level */
export type TreeDimVal = Record<string | number, TreeDrillDown>;

/** The information level inside the dimension value */
export interface TreeDrillDown {
info: InfoType;
drillDown: TreeDim;
}

interface CausalInferenceProps {
sourceData: Datum[];
dimensions: string[];
timeSeriesDim: string;
baseInterval: CompareInterval;
currInterval: CompareInterval;
}

/** Dimension drill down based attribution function props */
export interface DrillDownProps extends CausalInferenceProps {
targetMeasure: string;
}

export interface MeasureDecomposeProps extends CausalInferenceProps {
allMeasures: string[];
expression: string;
}
Loading

0 comments on commit 81e2e7d

Please sign in to comment.