From c41b82264d3668feffb358b7ef059aba126e856b Mon Sep 17 00:00:00 2001
From: David Caravello <119438707+dcaravel@users.noreply.github.com>
Date: Tue, 14 Jan 2025 12:07:02 -0600
Subject: [PATCH] ROX-27618: Add env var to disable RHEL lineage usage (#1764)

---
 database/pgsql/rhelv2_layer.go      |  31 ++++++-
 database/pgsql/rhelv2_layer_test.go | 134 +++++++++++++++++++---------
 pkg/env/list.go                     |   7 ++
 3 files changed, 124 insertions(+), 48 deletions(-)

diff --git a/database/pgsql/rhelv2_layer.go b/database/pgsql/rhelv2_layer.go
index 7bfda747d..a6b9e7d22 100644
--- a/database/pgsql/rhelv2_layer.go
+++ b/database/pgsql/rhelv2_layer.go
@@ -15,6 +15,7 @@ import (
 	"github.com/stackrox/rox/pkg/utils"
 	"github.com/stackrox/scanner/database"
 	"github.com/stackrox/scanner/database/metrics"
+	"github.com/stackrox/scanner/pkg/env"
 )
 
 func (pgSQL *pgSQL) InsertRHELv2Layer(layer *database.RHELv2Layer) error {
@@ -46,11 +47,18 @@ func (pgSQL *pgSQL) InsertRHELv2Layer(layer *database.RHELv2Layer) error {
 func (pgSQL *pgSQL) insertRHELv2Layer(tx *sql.Tx, layer *database.RHELv2Layer) error {
 	defer metrics.ObserveQueryTime("insertRHELv2Layer", "layer", time.Now())
 
-	_, err := tx.Exec(insertRHELv2Layer, layer.Hash, layer.ParentHash, layer.Dist, pq.Array(layer.CPEs), layer.Lineage, layer.ParentLineage)
+	var lineage string
+	var parentLineage string
+	if env.RHLineage.Enabled() {
+		lineage = layer.Lineage
+		parentLineage = layer.ParentLineage
+	}
+
+	_, err := tx.Exec(insertRHELv2Layer, layer.Hash, layer.ParentHash, layer.Dist, pq.Array(layer.CPEs), lineage, parentLineage)
 	return err
 }
 
-func (pgSQL *pgSQL) insertRHELv2Packages(tx *sql.Tx, layer string, pkgs []*database.RHELv2Package, lineage string) error {
+func (pgSQL *pgSQL) insertRHELv2Packages(tx *sql.Tx, layer string, pkgs []*database.RHELv2Package, layerLineage string) error {
 	// Sort packages to avoid potential deadlock.
 	// Sort by the unique index (name, version, module, arch).
 	sort.SliceStable(pkgs, func(i, j int) bool {
@@ -80,6 +88,11 @@ func (pgSQL *pgSQL) insertRHELv2Packages(tx *sql.Tx, layer string, pkgs []*datab
 		}
 	}
 
+	var lineage string
+	if env.RHLineage.Enabled() {
+		lineage = layerLineage
+	}
+
 	for _, pkg := range pkgs {
 		if pkg.Name == "" {
 			continue
@@ -112,7 +125,12 @@ func (pgSQL *pgSQL) GetRHELv2Layers(layerHash, layerLineage string) ([]*database
 		return nil, handleError("GetRHELv2Layers.Begin()", err)
 	}
 
-	rows, err := tx.Query(searchRHELv2Layers, layerHash, layerLineage)
+	var lineage string
+	if env.RHLineage.Enabled() {
+		lineage = layerLineage
+	}
+
+	rows, err := tx.Query(searchRHELv2Layers, layerHash, lineage)
 	if err != nil {
 		return nil, err
 	}
@@ -177,7 +195,12 @@ func (pgSQL *pgSQL) populatePackages(tx *sql.Tx, layers []*database.RHELv2Layer)
 func (pgSQL *pgSQL) getPackagesByLayer(tx *sql.Tx, layer *database.RHELv2Layer) error {
 	defer metrics.ObserveQueryTime("getRHELv2Layers", "packagesByLayer", time.Now())
 
-	rows, err := tx.Query(searchRHELv2Package, layer.Hash, layer.Lineage)
+	var lineage string
+	if env.RHLineage.Enabled() {
+		lineage = layer.Lineage
+	}
+
+	rows, err := tx.Query(searchRHELv2Package, layer.Hash, lineage)
 	if err != nil {
 		return err
 	}
diff --git a/database/pgsql/rhelv2_layer_test.go b/database/pgsql/rhelv2_layer_test.go
index 286bb717b..01f82fa05 100644
--- a/database/pgsql/rhelv2_layer_test.go
+++ b/database/pgsql/rhelv2_layer_test.go
@@ -7,6 +7,7 @@ import (
 	"testing"
 
 	"github.com/stackrox/scanner/database"
+	"github.com/stackrox/scanner/pkg/env"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
@@ -199,31 +200,12 @@ func TestGetRHELv2Layers(t *testing.T) {
 // TestRHELv2LayerLineage verifies that data for duplicate layers with different parent
 // layers (lineage) is pulled correctly.
 func TestRHELv2LayerLineage(t *testing.T) {
-	datastore, err := openDatabaseForTest("RHELv2LayerLineage", false)
-	if err != nil {
-		t.Error(err)
-		return
-	}
-	defer datastore.Close()
-
-	// Two 'fake' images will be created, each with 3 layers, the DB will resemble:
-	// id |      hash       |   parent_hash   |  dist  |      cpes      |  lineage  | parent_lineage
-	// ----+-----------------+-----------------+--------+----------------+-----------+----------------
-	//   1 | sha256:base     |                 | rhel:8 |                |           |
-	//   2 | sha256:layer1-a | sha256:base     | rhel:8 | {cpe-a,cpe2-a} | lineage   |
-	//   3 | sha256:layer1-b | sha256:base     | rhel:8 | {cpe-b,cpe2-b} | lineage   |
-	//   4 | sha256:leaf     | sha256:layer1-a | rhel:8 |                | lineage-a | lineage
-	//   5 | sha256:leaf     | sha256:layer1-b | rhel:8 |                | lineage-b | lineage
-
 	// base layers
 	base := &database.RHELv2Layer{
 		Hash: "sha256:base",
 		Dist: "rhel:8",
 	}
 
-	err = datastore.InsertRHELv2Layer(base)
-	require.NoError(t, err)
-
 	layer1a := &database.RHELv2Layer{
 		Hash:          "sha256:layer1-a",
 		Lineage:       "lineage",
@@ -250,11 +232,6 @@ func TestRHELv2LayerLineage(t *testing.T) {
 		CPEs: []string{"cpe-b", "cpe2-b"},
 	}
 
-	err = datastore.InsertRHELv2Layer(layer1a)
-	require.NoError(t, err)
-	err = datastore.InsertRHELv2Layer(layer1b)
-	require.NoError(t, err)
-
 	leafa := &database.RHELv2Layer{
 		Hash:          "sha256:leaf", // for this test all leafs should have same digest
 		Lineage:       "lineage-a",   // lineage is specific to layer A
@@ -268,34 +245,103 @@ func TestRHELv2LayerLineage(t *testing.T) {
 	leafb.Lineage = "lineage-b"
 	leafb.ParentHash = "sha256:layer1-b"
 
-	err = datastore.InsertRHELv2Layer(leafa)
-	require.NoError(t, err)
-	err = datastore.InsertRHELv2Layer(leafb)
-	require.NoError(t, err)
+	prepDataStore := func(t *testing.T, name string) *pgSQL {
+		datastore, err := openDatabaseForTest("RHELv2LayerLineage_enabled", false)
+		require.NoError(t, err)
+
+		err = datastore.InsertRHELv2Layer(base)
+		require.NoError(t, err)
+		err = datastore.InsertRHELv2Layer(layer1a)
+		require.NoError(t, err)
+		err = datastore.InsertRHELv2Layer(layer1b)
+		require.NoError(t, err)
+		err = datastore.InsertRHELv2Layer(leafa)
+		require.NoError(t, err)
+		err = datastore.InsertRHELv2Layer(leafb)
+		require.NoError(t, err)
+
+		return datastore
+	}
 
-	assertLayersEqual := func(t *testing.T, expected, actual *database.RHELv2Layer) {
+	assertLayersEqual := func(t *testing.T, expected, actual *database.RHELv2Layer, skipLineage bool) {
 		resetPackageIDs(actual)
 		assert.Equal(t, expected.Hash, actual.Hash, "Hash mismatch")
-		assert.Equal(t, expected.Lineage, actual.Lineage, "Lineage mismatch")
 		assert.Equal(t, expected.CPEs, actual.CPEs, "CPEs mistmatch")
 		assert.Equal(t, expected.Pkgs, actual.Pkgs, "Pkgs mismatch")
-	}
-
-	layers, err := datastore.GetRHELv2Layers("sha256:leaf", "lineage-a")
-	require.NoError(t, err)
-	require.Len(t, layers, 3)
 
-	assertLayersEqual(t, base, layers[0])
-	assertLayersEqual(t, layer1a, layers[1])
-	assertLayersEqual(t, leafa, layers[2])
+		expectedLineage := expected.Lineage
+		if skipLineage {
+			expectedLineage = ""
+		}
+		assert.Equal(t, expectedLineage, actual.Lineage, "Lineage mismatch")
+	}
 
-	layers, err = datastore.GetRHELv2Layers("sha256:leaf", "lineage-b")
-	require.NoError(t, err)
-	require.Len(t, layers, 3)
+	t.Run("enabled", func(t *testing.T) {
+		t.Setenv(env.RHLineage.EnvVar(), "true")
+
+		datastore := prepDataStore(t, "RHELv2LayerLineage_enabled")
+		defer datastore.Close()
+
+		// The DB will resemble:
+		// id |      hash       |   parent_hash   |  dist  |      cpes      |  lineage  | parent_lineage
+		// ----+-----------------+-----------------+--------+----------------+-----------+----------------
+		//   1 | sha256:base     |                 | rhel:8 |                |           |
+		//   2 | sha256:layer1-a | sha256:base     | rhel:8 | {cpe-a,cpe2-a} | lineage   |
+		//   3 | sha256:layer1-b | sha256:base     | rhel:8 | {cpe-b,cpe2-b} | lineage   |
+		//   4 | sha256:leaf     | sha256:layer1-a | rhel:8 |                | lineage-a | lineage
+		//   5 | sha256:leaf     | sha256:layer1-b | rhel:8 |                | lineage-b | lineage
+
+		layers, err := datastore.GetRHELv2Layers("sha256:leaf", "lineage-a")
+		require.NoError(t, err)
+		require.Len(t, layers, 3)
+
+		assertLayersEqual(t, base, layers[0], false)
+		assertLayersEqual(t, layer1a, layers[1], false)
+		assertLayersEqual(t, leafa, layers[2], false)
+
+		layers, err = datastore.GetRHELv2Layers("sha256:leaf", "lineage-b")
+		require.NoError(t, err)
+		require.Len(t, layers, 3)
+
+		assertLayersEqual(t, base, layers[0], false)
+		assertLayersEqual(t, layer1b, layers[1], false)
+		assertLayersEqual(t, leafb, layers[2], false)
+	})
+
+	t.Run("disable", func(t *testing.T) {
+		t.Setenv(env.RHLineage.EnvVar(), "false")
+
+		datastore := prepDataStore(t, "RHELv2LayerLineage_disabled")
+		defer datastore.Close()
+
+		// The DB will resemble:
+		// id |      hash       |   parent_hash   |  dist  |      cpes      | lineage | parent_lineage
+		// ----+-----------------+-----------------+--------+----------------+---------+----------------
+		//   1 | sha256:base     |                 | rhel:8 |                |         |
+		//   2 | sha256:layer1-a | sha256:base     | rhel:8 | {cpe-a,cpe2-a} |         |
+		//   3 | sha256:layer1-b | sha256:base     | rhel:8 | {cpe-b,cpe2-b} |         |
+		//   4 | sha256:leaf     | sha256:layer1-a | rhel:8 |                |         |
+		//
+		// Note: only the first leaf layer will be inserted (due to the insert
+		// query 'ON CONFLICT DO NOTHING' clause)
+
+		layers, err := datastore.GetRHELv2Layers("sha256:leaf", "lineage-a")
+		require.NoError(t, err)
+		require.Len(t, layers, 3)
+
+		assertLayersEqual(t, base, layers[0], true)
+		assertLayersEqual(t, layer1a, layers[1], true)
+		assertLayersEqual(t, leafa, layers[2], true)
+
+		layers, err = datastore.GetRHELv2Layers("sha256:leaf", "lineage-b")
+		require.NoError(t, err)
+		require.Len(t, layers, 3)
+
+		assertLayersEqual(t, base, layers[0], true)
+		assertLayersEqual(t, layer1a, layers[1], true) // the bug, would expect layer1b to be here
+		assertLayersEqual(t, leafb, layers[2], true)
+	})
 
-	assertLayersEqual(t, base, layers[0])
-	assertLayersEqual(t, layer1b, layers[1])
-	assertLayersEqual(t, leafb, layers[2])
 }
 
 // resetPackageIDs sets all package IDs to 0. Package IDs are DB sequence numbers
diff --git a/pkg/env/list.go b/pkg/env/list.go
index 36806af06..6a9719a89 100644
--- a/pkg/env/list.go
+++ b/pkg/env/list.go
@@ -44,4 +44,11 @@ var (
 	// LegacyNVDLoader when true will cause the loader to pull NVD data using
 	// the NVD Legacy Data Feeds, if false will pull from the NVD 2.0 API.
 	LegacyNVDLoader = RegisterBooleanSetting("ROX_LEGACY_NVD_LOADER", false)
+
+	// RHLineage when true will cause all parent layers (a.k.a lineage) to be considered when
+	// storing scan results for RHEL image layers.
+	//
+	// Setting this to false will cause known scan inaccuracies and should only be disabled as a
+	// temporary measure to address unforeseen stability issues.
+	RHLineage = RegisterBooleanSetting("ROX_RHEL_LINEAGE", true)
 )