-
Notifications
You must be signed in to change notification settings - Fork 143
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(ava/lite-insight): 新增多维度下钻波动归因方法 #658
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
import { dimensionDrillDownAttribution } from '../../../../../src/insight/insights/extractors/causalInference/dimensionDrillDown'; | ||
|
||
import type { DrillDownProps } from '../../../../../src/insight/insights/extractors/causalInference/types'; | ||
|
||
const data = [ | ||
{ | ||
City: 'huangshishi', | ||
Province: 'shanxi', | ||
ClientGender: 'male', | ||
OrderDate: '2022/2/19', | ||
Price: 13722.76, | ||
UnitCost: 292.72, | ||
}, | ||
{ | ||
City: 'huangshishi', | ||
Province: 'shanxi', | ||
ClientGender: 'male', | ||
OrderDate: '2022/2/21', | ||
Price: 24020.88, | ||
UnitCost: 5447.57, | ||
}, | ||
{ | ||
City: 'huangshishi', | ||
Province: 'shanxi', | ||
ClientGender: 'male', | ||
OrderDate: '2022/2/16', | ||
Price: 40145.8, | ||
UnitCost: 3696.42, | ||
}, | ||
{ | ||
City: 'huanggangshi', | ||
Province: 'qinghai', | ||
ClientGender: 'male', | ||
OrderDate: '2022/2/18', | ||
Price: 99980.16, | ||
UnitCost: 34393.38, | ||
}, | ||
{ | ||
City: 'huanggangshi', | ||
Province: 'qinghai', | ||
ClientGender: 'ma le', | ||
OrderDate: '2022/2/17', | ||
Price: 12656.11, | ||
UnitCost: 6012.96, | ||
}, | ||
{ | ||
City: 'huanggangshi', | ||
Province: 'qinghai', | ||
ClientGender: 'male', | ||
OrderDate: '2022/2/19', | ||
Price: 42464.05, | ||
UnitCost: 3113.68, | ||
}, | ||
]; | ||
describe('MultiDim Test', () => { | ||
test('check the disassemable result', () => { | ||
const props: DrillDownProps = { | ||
sourceData: data, | ||
dimensions: ['Province', 'City'], | ||
targetMeasure: 'Price', | ||
timeSeriesDim: 'OrderDate', | ||
baseInterval: { | ||
startPoint: '2022/2/17', | ||
endPoint: '2022/2/18', | ||
}, | ||
currInterval: { | ||
startPoint: '2022/2/19', | ||
endPoint: '2022/2/20', | ||
}, | ||
}; | ||
const tempResult = dimensionDrillDownAttribution(props); | ||
const treeResult = tempResult.resultInTree; | ||
expect(treeResult?.City.huangshishi.info.currValue).toBe(13722.76); | ||
}); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 除了 currValue, 还有 diff, 贡献度等结果需要在测试用例中体现? |
||
}); |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -93,7 +93,7 @@ export type { | |
} from './data'; | ||
|
||
/* insight */ | ||
export { getInsights, generateInsightVisualizationSpec } from './insight'; | ||
export { getInsights, generateInsightVisualizationSpec, dimensionDrillDownAttribution } from './insight'; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. dimensionDrillDownAttribution 需要单独导出么,目前其他算法都是通过 getInsights 指定 insightType 导出的。单独导出 dimensionDrillDownAttribution 不太符合统一的调用形式 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 暂时去掉了单独导出,但是感觉这种算子是有可能单独调用的,以后再考虑怎么导出。 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
export type { | ||
Datum, | ||
DomainType, | ||
|
@@ -124,6 +124,7 @@ export type { | |
LowVarianceInfo, | ||
CorrelationInfo, | ||
InsightsResult, | ||
DimensionDrillDownProps, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 改为了 options 的格式。 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
} from './insight'; | ||
|
||
/* NTV (Narrative Text Vis) */ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
export { getInsights } from './pipeline'; | ||
export { generateInsightVisualizationSpec } from './pipeline/visualize'; | ||
export { dimensionDrillDownAttribution } from './insights/extractors/causalInference/dimensionDrillDown'; | ||
export type { DrillDownProps as DimensionDrillDownProps } from './insights/extractors/causalInference/types'; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 同上,确认下 dimensionDrillDownAttribution 是否可以不单独导出 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 去掉了 |
||
export * from './types'; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import { locatedInInterval, enumerateAllDimensionCombinationsByDFS } from './util'; | ||
|
||
import type { InfoType, DimensionDrillDownResult, TreeDim, DataLocation, DrillDownProps } from './types'; | ||
|
||
/** Main function for dimension drill down attribution */ | ||
export const dimensionDrillDownAttribution = ({ | ||
sourceData, | ||
dimensions, | ||
targetMeasure: measure, | ||
timeSeriesDim: fluctuationDim, | ||
baseInterval, | ||
currInterval, | ||
}: DrillDownProps): DimensionDrillDownResult => { | ||
/** remove invalid data */ | ||
const data = sourceData.filter((item) => !Object.values(item).some((v) => v === null || v === undefined)); | ||
|
||
const globalDiff: InfoType = { | ||
baseValue: 0, | ||
currValue: 0, | ||
diff: 0, | ||
}; | ||
|
||
const resultTree: TreeDim = {}; | ||
const DictFlatten = {}; | ||
/** traverse the input data and build the result data structure; */ | ||
data.forEach((item) => { | ||
let location: DataLocation = 'none'; | ||
if (locatedInInterval(item[fluctuationDim], baseInterval.startPoint, baseInterval.endPoint)) { | ||
location = 'left'; | ||
globalDiff.baseValue += item[measure] as number; | ||
} | ||
if (locatedInInterval(item[fluctuationDim], currInterval.startPoint, currInterval.endPoint)) { | ||
location = 'right'; | ||
globalDiff.currValue += item[measure] as number; | ||
} | ||
if (location !== 'none') { | ||
const deque: string[] = []; | ||
enumerateAllDimensionCombinationsByDFS( | ||
item, | ||
0, | ||
dimensions, | ||
resultTree, | ||
DictFlatten, | ||
deque, | ||
measure, | ||
fluctuationDim, | ||
location | ||
); | ||
} | ||
}); | ||
globalDiff.diff = globalDiff.currValue - globalDiff.baseValue; | ||
|
||
return { resultInTree: resultTree, globalDiff, resultInList: Object.values(DictFlatten) }; | ||
}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 建议参考其他算子包一个 extractor function,这样外层可以用统一的方法调用各种类型的 insight extractor |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
import type { Datum } from '../../../types'; | ||
|
||
/** DataConfig specifies the input data with its focused dimensions and target measure to be analysed. */ | ||
export type DataConfig = { | ||
sourceData: Datum[]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这些信息部分在 InsightOptions 里已经定义了,建议走 getInsights 整体的流程,不重复指标、维度这些参数 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 删掉了 |
||
dimensions: string[]; | ||
measures: string[]; | ||
expression?: string; | ||
}; | ||
|
||
/** FluctInfo is the necessary input for fluctuation analysis. */ | ||
export type FluctInfo = { | ||
/** Typically, flucDim is a time dimension that the measure value varies on */ | ||
fluctDim: string; | ||
/** baseInterval is the time interval that is assigned as the basement */ | ||
baseInterval: CompareInterval; | ||
/** currInterval is the time interval that user is focusing on */ | ||
currInterval: CompareInterval; | ||
}; | ||
|
||
/** Time Interval in Fluctuation Analysis */ | ||
export type CompareInterval = { | ||
/** start time of this interval */ | ||
startPoint: string | number; | ||
/** end time of this interval */ | ||
endPoint: string | number; | ||
}; | ||
|
||
/** a flag that indicates the belongings of a single line of data, which is useful for aggregation */ | ||
export type DataLocation = 'left' | 'right' | 'none'; | ||
|
||
/** Record the calculation result */ | ||
export type InfoType = { | ||
/** Calculation value corresponding to baseInterval */ | ||
baseValue: number; | ||
/** Calculation value corresponding to currInterval */ | ||
currValue: number; | ||
/** diff = currValue - baseValue */ | ||
diff: number; | ||
}; | ||
|
||
/** Dimension drill down Result type */ | ||
export type DimensionDrillDownResult = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 类型命名和结果的结构整体改一下,和 insight 中其他算子类似,结果使用 算法入参如 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
/** total difference */ | ||
globalDiff: InfoType; | ||
/** Tree like returned data */ | ||
resultInTree?: TreeDim; | ||
/** Flatten returned data */ | ||
resultInList?: FlattenResult[]; | ||
}; | ||
|
||
/** Dimension drill down Result type */ | ||
export type MeasureDecomposeResult = { | ||
/** total difference */ | ||
globalDiff: InfoType; | ||
|
||
resultByMeasure?: FunctionBasedResult; | ||
}; | ||
|
||
/** The first string is dimension name, the second string is dimension value */ | ||
export type DimWithValue = Record<string, string>; | ||
|
||
/** Dimension drill down attribution Result type that has been formalized into Datum[] */ | ||
export type FlattenResult = Partial<DimWithValue & InfoType>; | ||
|
||
/** Function based attribution Result type */ | ||
export type FunctionBasedResult = Record<string, InfoType>; | ||
|
||
/** Example: Tree Data Structure for dimension drill down analysis | ||
* | ||
* { | ||
* dimNameA: { | ||
* dimValueA1: { | ||
* info: {baseValue: 222, curValue: 555, diff: 333}, | ||
* drillDown: { | ||
* dimNameB: { | ||
* dimValueB1: { | ||
* info: {baseValue: 000, curValue: 111, diff: 111}, | ||
* drillDown: {} | ||
* }, | ||
* dimValueB2: { | ||
* info: {baseValue: 222, curValue: 444, diff: 222}, | ||
* drillDown: {} | ||
* } | ||
* } | ||
* } | ||
* }, | ||
* }, | ||
* dimNameB: { | ||
* dimValueB1: { | ||
* info: {baseValue: 000, curValue: 111, diff: 111}, | ||
* drillDown: {} | ||
* }, | ||
* dimValueB2: { | ||
* info: {baseValue: 222, curValue: 444, diff: 222}, | ||
* drillDown: {} | ||
* } | ||
* } | ||
* } | ||
* | ||
*/ | ||
|
||
/** The dimension name level of the tree or the first level in drillDown */ | ||
export interface TreeDim { | ||
[dimName: string]: TreeDimVal; | ||
} | ||
|
||
/** The dimension value level of the tree which is inside the dimension name level */ | ||
export type TreeDimVal = Record<string | number, TreeDrillDown>; | ||
|
||
/** The information level inside the dimension value */ | ||
export interface TreeDrillDown { | ||
info: InfoType; | ||
drillDown: TreeDim; | ||
} | ||
|
||
interface CausalInferenceProps { | ||
sourceData: Datum[]; | ||
dimensions: string[]; | ||
timeSeriesDim: string; | ||
baseInterval: CompareInterval; | ||
currInterval: CompareInterval; | ||
} | ||
|
||
/** Dimension drill down based attribution function props */ | ||
export interface DrillDownProps extends CausalInferenceProps { | ||
targetMeasure: string; | ||
} | ||
|
||
export interface MeasureDecomposeProps extends CausalInferenceProps { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这个文件里有的地方用的 interface,有的用 type,不太统一 |
||
allMeasures: string[]; | ||
expression: string; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
commit message :
feat(ava/lite-insight)
-->feat(ava/insight)
,且用英文写 commit message。补充下 pr 描述