From 37c340c8d67b2e5368b603133c91c4aa72a747d0 Mon Sep 17 00:00:00 2001 From: Advitya Gemawat Date: Mon, 24 Jul 2023 11:25:14 -0400 Subject: [PATCH] Vision Model Overview E2E Tests - Refactor (#2185) * mo test refactor ckpt * test fixes * test flags for vision * test fixes * auto lint fixes * removed duplicate constants * comment fixes * test & build fixes * test fixes * auto lint fixes * comment updates --- .../modelOverview.spec.ts | 10 +++ .../modelOverview.spec.ts | 9 +++ .../modelOverview.spec.ts | 10 +++ ...FridgeImageClassificationModelDebugging.ts | 15 +++- .../FridgeMultilabelModelDebugging.ts | 10 ++- .../FridgeObjectDetectionModelDebugging.ts | 11 ++- .../modelOverview/describeModelOverview.ts | 26 +++++-- ...tasetCohortsViewBasicElementsArePresent.ts | 74 +++++++++++++------ ...rtsViewElementsAfterSelectionArePresent.ts | 30 ++++---- .../ensureNewCohortsShowUpInCharts.ts | 9 ++- 10 files changed, 155 insertions(+), 49 deletions(-) create mode 100644 apps/widget-e2e/src/integration/modelAssessment/responsibleaitoolboxFridgeImageClassificationModelDebugging/modelOverview.spec.ts create mode 100644 apps/widget-e2e/src/integration/modelAssessment/responsibleaitoolboxFridgeMultilabelImageClassificationModelDebugging/modelOverview.spec.ts create mode 100644 apps/widget-e2e/src/integration/modelAssessment/responsibleaitoolboxFridgeObjectDetectionModelDebugging/modelOverview.spec.ts diff --git a/apps/widget-e2e/src/integration/modelAssessment/responsibleaitoolboxFridgeImageClassificationModelDebugging/modelOverview.spec.ts b/apps/widget-e2e/src/integration/modelAssessment/responsibleaitoolboxFridgeImageClassificationModelDebugging/modelOverview.spec.ts new file mode 100644 index 0000000000..3f9ec58830 --- /dev/null +++ b/apps/widget-e2e/src/integration/modelAssessment/responsibleaitoolboxFridgeImageClassificationModelDebugging/modelOverview.spec.ts @@ -0,0 +1,10 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import { + describeModelOverview, + modelAssessmentDatasets +} from "@responsible-ai/e2e"; +const datasetShape = + modelAssessmentDatasets.FridgeImageClassificationModelDebugging; +describeModelOverview(datasetShape, "FridgeImageClassificationModelDebugging"); diff --git a/apps/widget-e2e/src/integration/modelAssessment/responsibleaitoolboxFridgeMultilabelImageClassificationModelDebugging/modelOverview.spec.ts b/apps/widget-e2e/src/integration/modelAssessment/responsibleaitoolboxFridgeMultilabelImageClassificationModelDebugging/modelOverview.spec.ts new file mode 100644 index 0000000000..e758cd415a --- /dev/null +++ b/apps/widget-e2e/src/integration/modelAssessment/responsibleaitoolboxFridgeMultilabelImageClassificationModelDebugging/modelOverview.spec.ts @@ -0,0 +1,9 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import { + describeModelOverview, + modelAssessmentDatasets +} from "@responsible-ai/e2e"; +const datasetShape = modelAssessmentDatasets.FridgeMultilabelModelDebugging; +describeModelOverview(datasetShape, "FridgeMultilabelModelDebugging"); diff --git a/apps/widget-e2e/src/integration/modelAssessment/responsibleaitoolboxFridgeObjectDetectionModelDebugging/modelOverview.spec.ts b/apps/widget-e2e/src/integration/modelAssessment/responsibleaitoolboxFridgeObjectDetectionModelDebugging/modelOverview.spec.ts new file mode 100644 index 0000000000..31a2a7fde9 --- /dev/null +++ b/apps/widget-e2e/src/integration/modelAssessment/responsibleaitoolboxFridgeObjectDetectionModelDebugging/modelOverview.spec.ts @@ -0,0 +1,10 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import { + describeModelOverview, + modelAssessmentDatasets +} from "@responsible-ai/e2e"; +const datasetShape = + modelAssessmentDatasets.FridgeObjectDetectionModelDebugging; +describeModelOverview(datasetShape, "FridgeObjectDetectionModelDebugging"); diff --git a/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeImageClassificationModelDebugging.ts b/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeImageClassificationModelDebugging.ts index 7afa834ce7..3950762275 100644 --- a/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeImageClassificationModelDebugging.ts +++ b/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeImageClassificationModelDebugging.ts @@ -36,6 +36,19 @@ export const FridgeImageClassificationModelDebugging = { name: "All data", sampleSize: "134" } - ] + ], + newCohort: { + metrics: { + accuracy: "0.9", + macroF1: "0.9", + macroPrecision: "0.9", + macroRecall: "0.9", + microF1: "0.9", + microPrecision: "0.9", + microRecall: "0.9" + }, + name: "CohortCreateE2E-image-classification", + sampleSize: "5" + } } }; diff --git a/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeMultilabelModelDebugging.ts b/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeMultilabelModelDebugging.ts index 72e607f6c3..c994e74830 100644 --- a/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeMultilabelModelDebugging.ts +++ b/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeMultilabelModelDebugging.ts @@ -31,6 +31,14 @@ export const FridgeMultilabelModelDebugging = { name: "All data", sampleSize: "10" } - ] + ], + newCohort: { + metrics: { + exactMatchRatio: "1", + hammingScore: "1" + }, + name: "CohortCreateE2E-multilabel", + sampleSize: "3" + } } }; diff --git a/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeObjectDetectionModelDebugging.ts b/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeObjectDetectionModelDebugging.ts index d7e3c1cb88..7a97a35e13 100644 --- a/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeObjectDetectionModelDebugging.ts +++ b/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeObjectDetectionModelDebugging.ts @@ -32,6 +32,15 @@ export const FridgeObjectDetectionModelDebugging = { name: "All data", sampleSize: "5" } - ] + ], + newCohort: { + metrics: { + averagePrecision: "1", + averageRecall: "1", + meanAveragePrecision: "1" + }, + name: "CohortCreateE2E-object-detection", + sampleSize: "2" + } } }; diff --git a/libs/e2e/src/lib/describer/modelAssessment/modelOverview/describeModelOverview.ts b/libs/e2e/src/lib/describer/modelAssessment/modelOverview/describeModelOverview.ts index a95d7a1d89..ec171853be 100644 --- a/libs/e2e/src/lib/describer/modelAssessment/modelOverview/describeModelOverview.ts +++ b/libs/e2e/src/lib/describer/modelAssessment/modelOverview/describeModelOverview.ts @@ -22,13 +22,22 @@ export function describeModelOverview( isNotebookTest = true ): void { describe(testName, () => { + const isVision = + datasetShape.isObjectDetection || + datasetShape.isMultiLabel || + datasetShape.isImageClassification + ? true + : false; if (isNotebookTest) { before(() => { visit(name); }); } else { before(() => { - cy.visit(`#/modelAssessment/${name}/light/english/Version-2`); + const dashboardName = isVision + ? "modelAssessmentVision" + : "modelAssessment"; + cy.visit(`#/${dashboardName}/${name}/light/english/Version-2`); }); } @@ -38,7 +47,8 @@ export function describeModelOverview( ensureAllModelOverviewDatasetCohortsViewBasicElementsArePresent( datasetShape, false, - isNotebookTest + isNotebookTest, + isVision ); }); @@ -57,7 +67,8 @@ export function describeModelOverview( ); ensureAllModelOverviewFeatureCohortsViewElementsAfterSelectionArePresent( datasetShape, - 1 + 1, + isVision ); }); @@ -69,16 +80,19 @@ export function describeModelOverview( ); ensureAllModelOverviewFeatureCohortsViewElementsAfterSelectionArePresent( datasetShape, - 2 + 2, + isVision ); }); it("should show new cohorts in charts", () => { - ensureNewCohortsShowUpInCharts(datasetShape, isNotebookTest); + ensureNewCohortsShowUpInCharts(datasetShape, isNotebookTest, isVision); }); it("should pivot between charts when clicking", () => { - ensureChartsPivot(datasetShape, isNotebookTest, true); + if (!isVision) { + ensureChartsPivot(datasetShape, isNotebookTest, true); + } }); } else { it("should not have 'Model overview' component", () => { diff --git a/libs/e2e/src/lib/describer/modelAssessment/modelOverview/ensureAllModelOverviewDatasetCohortsViewBasicElementsArePresent.ts b/libs/e2e/src/lib/describer/modelAssessment/modelOverview/ensureAllModelOverviewDatasetCohortsViewBasicElementsArePresent.ts index aef7b38769..cc1c846203 100644 --- a/libs/e2e/src/lib/describer/modelAssessment/modelOverview/ensureAllModelOverviewDatasetCohortsViewBasicElementsArePresent.ts +++ b/libs/e2e/src/lib/describer/modelAssessment/modelOverview/ensureAllModelOverviewDatasetCohortsViewBasicElementsArePresent.ts @@ -14,7 +14,8 @@ import { getNumberOfCohorts } from "./numberOfCohorts"; export function ensureAllModelOverviewDatasetCohortsViewBasicElementsArePresent( datasetShape: IModelAssessmentData, includeNewCohort: boolean, - isNotebookTest: boolean + isNotebookTest: boolean, + isVision: boolean ): void { const data = datasetShape.modelOverviewData; const initialCohorts = data?.initialCohorts; @@ -23,7 +24,10 @@ export function ensureAllModelOverviewDatasetCohortsViewBasicElementsArePresent( "not.exist" ); if (isNotebookTest) { - if (getNumberOfCohorts(datasetShape, includeNewCohort) <= 1) { + if ( + getNumberOfCohorts(datasetShape, includeNewCohort) <= 1 || + datasetShape.isObjectDetection + ) { cy.get(Locators.ModelOverviewHeatmapVisualDisplayToggle).should( "not.exist" ); @@ -45,6 +49,24 @@ export function ensureAllModelOverviewDatasetCohortsViewBasicElementsArePresent( "meanSquaredError", "meanPrediction" ); + } else if (datasetShape.isImageClassification) { + metricsOrder.push( + "accuracy", + "f1Score", + "precisionScore", + "recallScore", + "falsePositiveRate", + "falseNegativeRate", + "selectionRate" + ); + } else if (datasetShape.isMultiLabel) { + metricsOrder.push("exactMatchRatio", "hammingScore"); + } else if (datasetShape.isObjectDetection) { + metricsOrder.push( + "meanAveragePrecision", + "averagePrecision", + "averageRecall" + ); } else { metricsOrder.push("accuracy"); if (!datasetShape.isMulticlass) { @@ -69,17 +91,6 @@ export function ensureAllModelOverviewDatasetCohortsViewBasicElementsArePresent( }); }); - if (isNotebookTest) { - cy.get(Locators.ModelOverviewHeatmapCells) - .should("have.length", (cohorts?.length || 0) * (metricsOrder.length + 1)) - .each(($cell) => { - // somehow the cell string is one invisible character longer, trim - expect($cell.text().slice(0, $cell.text().length - 1)).to.be.oneOf( - heatmapCellContents - ); - }); - } - cy.get( Locators.ModelOverviewDisaggregatedAnalysisBaseCohortDisclaimer ).should("not.exist"); @@ -87,17 +98,32 @@ export function ensureAllModelOverviewDatasetCohortsViewBasicElementsArePresent( "not.exist" ); - const defaultVisibleChart = getDefaultVisibleChart( - datasetShape.isRegression, - datasetShape.isBinary - ); - assertChartVisibility(datasetShape, defaultVisibleChart); - - if (defaultVisibleChart === Locators.ModelOverviewMetricChart) { - ensureNotebookModelOverviewMetricChartIsCorrect( - isNotebookTest, - datasetShape, - includeNewCohort + if (!isVision) { + if (isNotebookTest) { + cy.get(Locators.ModelOverviewHeatmapCells) + .should( + "have.length", + (cohorts?.length || 0) * (metricsOrder.length + 1) + ) + .each(($cell) => { + // somehow the cell string is one invisible character longer, trim + expect($cell.text().slice(0, $cell.text().length - 1)).to.be.oneOf( + heatmapCellContents + ); + }); + } + const defaultVisibleChart = getDefaultVisibleChart( + datasetShape.isRegression, + datasetShape.isBinary ); + assertChartVisibility(datasetShape, defaultVisibleChart); + + if (defaultVisibleChart === Locators.ModelOverviewMetricChart) { + ensureNotebookModelOverviewMetricChartIsCorrect( + isNotebookTest, + datasetShape, + includeNewCohort + ); + } } } diff --git a/libs/e2e/src/lib/describer/modelAssessment/modelOverview/ensureAllModelOverviewFeatureCohortsViewElementsAfterSelectionArePresent.ts b/libs/e2e/src/lib/describer/modelAssessment/modelOverview/ensureAllModelOverviewFeatureCohortsViewElementsAfterSelectionArePresent.ts index 5de340c621..5f03d3d399 100644 --- a/libs/e2e/src/lib/describer/modelAssessment/modelOverview/ensureAllModelOverviewFeatureCohortsViewElementsAfterSelectionArePresent.ts +++ b/libs/e2e/src/lib/describer/modelAssessment/modelOverview/ensureAllModelOverviewFeatureCohortsViewElementsAfterSelectionArePresent.ts @@ -12,27 +12,31 @@ import { export function ensureAllModelOverviewFeatureCohortsViewElementsAfterSelectionArePresent( datasetShape: IModelAssessmentData, - selectedFeatures: number + selectedFeatures: number, + isVision: boolean ): void { cy.get(Locators.ModelOverviewFeatureSelection).should("exist"); cy.get(Locators.ModelOverviewFeatureConfigurationActionButton).should( "exist" ); - cy.get(Locators.ModelOverviewHeatmapVisualDisplayToggle).should("exist"); cy.get(Locators.ModelOverviewDatasetCohortStatsTable).should("not.exist"); - cy.get(Locators.ModelOverviewDisaggregatedAnalysisTable).should("exist"); - const defaultVisibleChart = getDefaultVisibleChart( - datasetShape.isRegression, - datasetShape.isBinary - ); - assertChartVisibility(datasetShape, defaultVisibleChart); + if (!isVision) { + cy.get(Locators.ModelOverviewHeatmapVisualDisplayToggle).should("exist"); // TODO: check! + cy.get(Locators.ModelOverviewDisaggregatedAnalysisTable).should("exist"); - assertNumberOfChartRowsEqual( - datasetShape, - selectedFeatures, - defaultVisibleChart - ); + const defaultVisibleChart = getDefaultVisibleChart( + datasetShape.isRegression, + datasetShape.isBinary + ); + assertChartVisibility(datasetShape, defaultVisibleChart); + + assertNumberOfChartRowsEqual( + datasetShape, + selectedFeatures, + defaultVisibleChart + ); + } } function assertNumberOfChartRowsEqual( diff --git a/libs/e2e/src/lib/describer/modelAssessment/modelOverview/ensureNewCohortsShowUpInCharts.ts b/libs/e2e/src/lib/describer/modelAssessment/modelOverview/ensureNewCohortsShowUpInCharts.ts index f7d9d5abce..fe3db8f1dd 100644 --- a/libs/e2e/src/lib/describer/modelAssessment/modelOverview/ensureNewCohortsShowUpInCharts.ts +++ b/libs/e2e/src/lib/describer/modelAssessment/modelOverview/ensureNewCohortsShowUpInCharts.ts @@ -9,18 +9,21 @@ import { ensureAllModelOverviewDatasetCohortsViewBasicElementsArePresent } from export function ensureNewCohortsShowUpInCharts( datasetShape: IModelAssessmentData, - isNotebookTest: boolean + isNotebookTest: boolean, + isVision: boolean ): void { cy.get(Locators.ModelOverviewCohortViewDatasetCohortViewButton).click(); ensureAllModelOverviewDatasetCohortsViewBasicElementsArePresent( datasetShape, false, - isNotebookTest + isNotebookTest, + isVision ); createCohort(datasetShape.modelOverviewData?.newCohort?.name); ensureAllModelOverviewDatasetCohortsViewBasicElementsArePresent( datasetShape, true, - isNotebookTest + isNotebookTest, + isVision ); }