From 9d5d57d64863ca90473f7911bc04c9873e8680be Mon Sep 17 00:00:00 2001 From: Maxwell Elliott Date: Thu, 21 Sep 2023 14:40:42 -0700 Subject: [PATCH] WIP: Reenable modified filepaths entry This should greatly speed up hash calculation time more updates --- cli/BUILD | 43 +++++++++++++++++++ .../com/bazel_diff/bazel/BazelClient.kt | 6 +-- .../com/bazel_diff/bazel/BazelQueryService.kt | 13 +++++- .../bazel_diff/cli/GenerateHashesCommand.kt | 8 +++- .../com/bazel_diff/hash/BuildGraphHasher.kt | 36 ++++++++++------ .../kotlin/com/bazel_diff/hash/RuleHasher.kt | 9 ++-- .../com/bazel_diff/hash/SourceFileHasher.kt | 15 +++++-- .../com/bazel_diff/hash/TargetHasher.kt | 10 +++-- .../interactor/GenerateHashesInteractor.kt | 15 ++++++- 9 files changed, 123 insertions(+), 32 deletions(-) diff --git a/cli/BUILD b/cli/BUILD index e00b77e4..6a0b1be1 100644 --- a/cli/BUILD +++ b/cli/BUILD @@ -19,6 +19,13 @@ genrule( stamp = 1, ) +config_setting( + name = "macos", + constraint_values = [ + "@@platforms//os:macos", + ], +) + java_binary( name = "bazel-diff", jvm_flags = select({ @@ -48,6 +55,10 @@ kt_jvm_library( kt_jvm_test( name = "BuildGraphHasherTest", + jvm_flags = select({ + ":macos": ["-Djava.security.manager=allow"], + "//conditions:default": [], + }), test_class = "com.bazel_diff.hash.BuildGraphHasherTest", runtime_deps = [":cli-test-lib"], ) @@ -57,42 +68,70 @@ kt_jvm_test( data = [ ":src/test/kotlin/com/bazel_diff/hash/fixture/foo.ts", ], + jvm_flags = select({ + ":macos": ["-Djava.security.manager=allow"], + "//conditions:default": [], + }), test_class = "com.bazel_diff.hash.SourceFileHasherTest", runtime_deps = [":cli-test-lib"], ) kt_jvm_test( name = "CalculateImpactedTargetsInteractorTest", + jvm_flags = select({ + ":macos": ["-Djava.security.manager=allow"], + "//conditions:default": [], + }), test_class = "com.bazel_diff.interactor.CalculateImpactedTargetsInteractorTest", runtime_deps = [":cli-test-lib"], ) kt_jvm_test( name = "NormalisingPathConverterTest", + jvm_flags = select({ + ":macos": ["-Djava.security.manager=allow"], + "//conditions:default": [], + }), test_class = "com.bazel_diff.cli.converter.NormalisingPathConverterTest", runtime_deps = [":cli-test-lib"], ) kt_jvm_test( name = "OptionsConverterTest", + jvm_flags = select({ + ":macos": ["-Djava.security.manager=allow"], + "//conditions:default": [], + }), test_class = "com.bazel_diff.cli.converter.OptionsConverterTest", runtime_deps = [":cli-test-lib"], ) kt_jvm_test( name = "DeserialiseHashesInteractorTest", + jvm_flags = select({ + ":macos": ["-Djava.security.manager=allow"], + "//conditions:default": [], + }), test_class = "com.bazel_diff.interactor.DeserialiseHashesInteractorTest", runtime_deps = [":cli-test-lib"], ) kt_jvm_test( name = "BazelRuleTest", + jvm_flags = select({ + ":macos": ["-Djava.security.manager=allow"], + "//conditions:default": [], + }), test_class = "com.bazel_diff.bazel.BazelRuleTest", runtime_deps = [":cli-test-lib"], ) kt_jvm_test( name = "E2ETest", + jvm_flags = select({ + ":macos": ["-Djava.security.manager=allow"], + "//conditions:default": [], + }), test_class = "com.bazel_diff.e2e.E2ETest", runtime_deps = [":cli-test-lib"], ) @@ -103,6 +142,10 @@ kt_jvm_test( ":src/test/kotlin/com/bazel_diff/io/fixture/correct.json", ":src/test/kotlin/com/bazel_diff/io/fixture/wrong.json", ], + jvm_flags = select({ + ":macos": ["-Djava.security.manager=allow"], + "//conditions:default": [], + }), test_class = "com.bazel_diff.io.ContentHashProviderTest", runtime_deps = [ ":cli-test-lib", diff --git a/cli/src/main/kotlin/com/bazel_diff/bazel/BazelClient.kt b/cli/src/main/kotlin/com/bazel_diff/bazel/BazelClient.kt index d68cbbdd..d3d9132c 100644 --- a/cli/src/main/kotlin/com/bazel_diff/bazel/BazelClient.kt +++ b/cli/src/main/kotlin/com/bazel_diff/bazel/BazelClient.kt @@ -2,9 +2,10 @@ package com.bazel_diff.bazel import com.bazel_diff.log.Logger import com.google.devtools.build.lib.query2.proto.proto2api.Build -import org.koin.core.component.KoinComponent -import org.koin.core.component.inject +import java.nio.file.Path import java.util.* +import org.koin.core.component.inject +import org.koin.core.component.KoinComponent class BazelClient(private val useCquery: Boolean, private val fineGrainedHashExternalRepos: Set) : KoinComponent { private val logger: Logger by inject() @@ -54,4 +55,3 @@ class BazelClient(private val useCquery: Boolean, private val fineGrainedHashExt } } } - diff --git a/cli/src/main/kotlin/com/bazel_diff/bazel/BazelQueryService.kt b/cli/src/main/kotlin/com/bazel_diff/bazel/BazelQueryService.kt index 2e7f79b0..6594b85d 100644 --- a/cli/src/main/kotlin/com/bazel_diff/bazel/BazelQueryService.kt +++ b/cli/src/main/kotlin/com/bazel_diff/bazel/BazelQueryService.kt @@ -24,7 +24,11 @@ class BazelQueryService( ) : KoinComponent { private val logger: Logger by inject() - suspend fun query(query: String, useCquery: Boolean = false): List { + suspend fun query( + query: String, + useCquery: Boolean = false, + supressFailure: Boolean = false) + : List { // Unfortunately, there is still no direct way to tell if a target is compatible or not with the proto output // by itself. So we do an extra cquery with the trick at // https://bazel.build/extending/platforms#cquery-incompatible-target-detection to first find all compatible @@ -58,7 +62,12 @@ class BazelQueryService( } @OptIn(ExperimentalCoroutinesApi::class) - private suspend fun runQuery(query: String, useCquery: Boolean, outputCompatibleTargets: Boolean = false): File { + private suspend fun runQuery( + query: String, + useCquery: Boolean, + outputCompatibleTargets: Boolean = false, + supressFailure: Boolean = false + ): File { val queryFile = Files.createTempFile(null, ".txt").toFile() queryFile.deleteOnExit() val outputFile = Files.createTempFile(null, ".bin").toFile() diff --git a/cli/src/main/kotlin/com/bazel_diff/cli/GenerateHashesCommand.kt b/cli/src/main/kotlin/com/bazel_diff/cli/GenerateHashesCommand.kt index 08e461bd..cb4ca7aa 100644 --- a/cli/src/main/kotlin/com/bazel_diff/cli/GenerateHashesCommand.kt +++ b/cli/src/main/kotlin/com/bazel_diff/cli/GenerateHashesCommand.kt @@ -136,6 +136,12 @@ class GenerateHashesCommand : Callable { ) var ignoredRuleHashingAttributes: Set = emptySet() + @CommandLine.Option( + names = ["-m", "--modified-filepaths"], + description = ["Experimental: A text file containing a newline separated list of filepaths (relative to the workspace) these filepaths should represent the modified files between the specified revisions and will be used to scope what files are hashed during hash generation."] + ) + var modifiedFilepaths: File? = null + @CommandLine.Spec lateinit var spec: CommandLine.Model.CommandSpec @@ -160,7 +166,7 @@ class GenerateHashesCommand : Callable { ) } - return when (GenerateHashesInteractor().execute(seedFilepaths, outputPath, ignoredRuleHashingAttributes, targetType, includeTargetType)) { + return when (GenerateHashesInteractor().execute(seedFilepaths, outputPath, ignoredRuleHashingAttributes, targetType, includeTargetType, modifiedFilepaths)) { true -> CommandLine.ExitCode.OK false -> CommandLine.ExitCode.SOFTWARE }.also { stopKoin() } diff --git a/cli/src/main/kotlin/com/bazel_diff/hash/BuildGraphHasher.kt b/cli/src/main/kotlin/com/bazel_diff/hash/BuildGraphHasher.kt index 56510222..950879f4 100644 --- a/cli/src/main/kotlin/com/bazel_diff/hash/BuildGraphHasher.kt +++ b/cli/src/main/kotlin/com/bazel_diff/hash/BuildGraphHasher.kt @@ -8,18 +8,19 @@ import com.bazel_diff.extensions.toHexString import com.bazel_diff.log.Logger import com.google.common.collect.Sets import com.google.devtools.build.lib.query2.proto.proto2api.Build -import kotlinx.coroutines.Dispatchers -import kotlinx.coroutines.async -import kotlinx.coroutines.runBlocking -import org.koin.core.component.KoinComponent -import org.koin.core.component.inject +import java.io.File import java.nio.file.Path +import java.util.Calendar +import java.util.concurrent.atomic.AtomicReference import java.util.concurrent.ConcurrentHashMap import java.util.concurrent.ConcurrentMap -import java.util.concurrent.atomic.AtomicReference import java.util.stream.Collectors import kotlin.io.path.readBytes -import java.util.Calendar +import kotlinx.coroutines.async +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.runBlocking +import org.koin.core.component.inject +import org.koin.core.component.KoinComponent class BuildGraphHasher(private val bazelClient: BazelClient) : KoinComponent { private val targetHasher: TargetHasher by inject() @@ -28,7 +29,8 @@ class BuildGraphHasher(private val bazelClient: BazelClient) : KoinComponent { fun hashAllBazelTargetsAndSourcefiles( seedFilepaths: Set = emptySet(), - ignoredAttrs: Set = emptySet() + ignoredAttrs: Set = emptySet(), + modifiedFilepaths: Set = emptySet() ): Map { val (sourceDigests, allTargets) = runBlocking { val targetsTask = async(Dispatchers.IO) { @@ -39,7 +41,7 @@ class BuildGraphHasher(private val bazelClient: BazelClient) : KoinComponent { val sourceDigestsFuture = async(Dispatchers.IO) { val sourceHashDurationEpoch = Calendar.getInstance().getTimeInMillis() - val sourceFileTargets = hashSourcefiles(sourceTargets) + val sourceFileTargets = hashSourcefiles(sourceTargets, modifiedFilepaths) val sourceHashDuration = Calendar.getInstance().getTimeInMillis() - sourceHashDurationEpoch logger.i { "Source file hashes calculated in $sourceHashDuration" } sourceFileTargets @@ -52,11 +54,15 @@ class BuildGraphHasher(private val bazelClient: BazelClient) : KoinComponent { seedForFilepaths, sourceDigests, allTargets, - ignoredAttrs + ignoredAttrs, + modifiedFilepaths ) } - private fun hashSourcefiles(targets: List): ConcurrentMap { + private fun hashSourcefiles( + targets: List, + modifiedFilepaths: Set + ): ConcurrentMap { val exception = AtomicReference(null) val result: ConcurrentMap = targets.parallelStream() .map { sourceFile: BazelTarget.SourceFile -> @@ -68,7 +74,7 @@ class BuildGraphHasher(private val bazelClient: BazelClient) : KoinComponent { } try { val sourceFileTarget = BazelSourceFileTarget(sourceFile.name, seed) - Pair(sourceFileTarget.name, sourceFileHasher.digest(sourceFileTarget)) + Pair(sourceFileTarget.name, sourceFileHasher.digest(sourceFileTarget, modifiedFilepaths)) } catch (e: Exception) { exception.set(e) null @@ -90,7 +96,8 @@ class BuildGraphHasher(private val bazelClient: BazelClient) : KoinComponent { seedHash: ByteArray, sourceDigests: ConcurrentMap, allTargets: List, - ignoredAttrs: Set + ignoredAttrs: Set, + modifiedFilepaths: Set ): Map { val ruleHashes: ConcurrentMap = ConcurrentHashMap() val targetToRule: MutableMap = HashMap() @@ -104,7 +111,8 @@ class BuildGraphHasher(private val bazelClient: BazelClient) : KoinComponent { sourceDigests, ruleHashes, seedHash, - ignoredAttrs + ignoredAttrs, + modifiedFilepaths ) Pair(target.name, TargetHash(target.javaClass.name.substringAfterLast('$'), targetDigest.toHexString())) } diff --git a/cli/src/main/kotlin/com/bazel_diff/hash/RuleHasher.kt b/cli/src/main/kotlin/com/bazel_diff/hash/RuleHasher.kt index 78a8a1d6..2af8de02 100644 --- a/cli/src/main/kotlin/com/bazel_diff/hash/RuleHasher.kt +++ b/cli/src/main/kotlin/com/bazel_diff/hash/RuleHasher.kt @@ -7,6 +7,7 @@ import com.google.common.annotations.VisibleForTesting import org.koin.core.component.KoinComponent import org.koin.core.component.inject import java.util.concurrent.ConcurrentMap +import java.nio.file.Path class RuleHasher(private val useCquery: Boolean, private val fineGrainedHashExternalRepos: Set) : KoinComponent { private val logger: Logger by inject() @@ -31,7 +32,8 @@ class RuleHasher(private val useCquery: Boolean, private val fineGrainedHashExte sourceDigests: ConcurrentMap, seedHash: ByteArray?, depPath: LinkedHashSet?, - ignoredAttrs: Set + ignoredAttrs: Set, + modifiedFilepaths: Set ): ByteArray { val depPathClone = if (depPath != null) LinkedHashSet(depPath) else LinkedHashSet() if (depPathClone.contains(rule.name)) { @@ -61,13 +63,14 @@ class RuleHasher(private val useCquery: Boolean, private val fineGrainedHashExte sourceDigests, seedHash, depPathClone, - ignoredAttrs + ignoredAttrs, + modifiedFilepaths ) safePutBytes(ruleInputHash) } else -> { - val heuristicDigest = sourceFileHasher.softDigest(BazelSourceFileTarget(ruleInput, ByteArray(0))) + val heuristicDigest = sourceFileHasher.softDigest(BazelSourceFileTarget(ruleInput, ByteArray(0)), modifiedFilepaths) when { heuristicDigest != null -> { logger.i { "Source file $ruleInput picked up as an input for rule ${rule.name}" } diff --git a/cli/src/main/kotlin/com/bazel_diff/hash/SourceFileHasher.kt b/cli/src/main/kotlin/com/bazel_diff/hash/SourceFileHasher.kt index 1496473d..4d88ff7a 100644 --- a/cli/src/main/kotlin/com/bazel_diff/hash/SourceFileHasher.kt +++ b/cli/src/main/kotlin/com/bazel_diff/hash/SourceFileHasher.kt @@ -38,7 +38,10 @@ class SourceFileHasher : KoinComponent { this.externalRepoResolver = externalRepoResolver } - fun digest(sourceFileTarget: BazelSourceFileTarget): ByteArray { + fun digest( + sourceFileTarget: BazelSourceFileTarget, + modifiedFilepaths: Set = emptySet() + ): ByteArray { return sha256 { val name = sourceFileTarget.name val filenamePath = if (name.startsWith("//")) { @@ -69,7 +72,11 @@ class SourceFileHasher : KoinComponent { val file = absoluteFilePath.toFile() if (file.exists()) { if (file.isFile) { - putFile(file) + if (modifiedFilepaths.isEmpty()) { + putFile(file) + } else if (modifiedFilepaths.stream().anyMatch { workingDirectory.resolve(it) == file }) { + putFile(file) + } } } else { logger.w { "File $absoluteFilePath not found" } @@ -80,7 +87,7 @@ class SourceFileHasher : KoinComponent { } } - fun softDigest(sourceFileTarget: BazelSourceFileTarget): ByteArray? { + fun softDigest(sourceFileTarget: BazelSourceFileTarget, modifiedFilepaths: Set = emptySet()): ByteArray? { val name = sourceFileTarget.name if (!name.startsWith("//")) return null @@ -90,6 +97,6 @@ class SourceFileHasher : KoinComponent { val file = absoluteFilePath.toFile() if (!file.exists() || !file.isFile) return null - return digest(sourceFileTarget) + return digest(sourceFileTarget, modifiedFilepaths) } } diff --git a/cli/src/main/kotlin/com/bazel_diff/hash/TargetHasher.kt b/cli/src/main/kotlin/com/bazel_diff/hash/TargetHasher.kt index 5eab92a9..4cc5a458 100644 --- a/cli/src/main/kotlin/com/bazel_diff/hash/TargetHasher.kt +++ b/cli/src/main/kotlin/com/bazel_diff/hash/TargetHasher.kt @@ -5,6 +5,7 @@ import com.bazel_diff.bazel.BazelTarget import org.koin.core.component.KoinComponent import org.koin.core.component.inject import java.util.concurrent.ConcurrentMap +import java.nio.file.Path class TargetHasher : KoinComponent { private val ruleHasher: RuleHasher by inject() @@ -15,7 +16,8 @@ class TargetHasher : KoinComponent { sourceDigests: ConcurrentMap, ruleHashes: ConcurrentMap, seedHash: ByteArray?, - ignoredAttrs: Set + ignoredAttrs: Set, + modifiedFilepaths: Set ): ByteArray { return when (target) { is BazelTarget.GeneratedFile -> { @@ -32,7 +34,8 @@ class TargetHasher : KoinComponent { sourceDigests, seedHash, depPath = null, - ignoredAttrs + ignoredAttrs, + modifiedFilepaths ) } } @@ -44,7 +47,8 @@ class TargetHasher : KoinComponent { sourceDigests, seedHash, depPath = null, - ignoredAttrs + ignoredAttrs, + modifiedFilepaths ) } is BazelTarget.SourceFile -> sha256 { diff --git a/cli/src/main/kotlin/com/bazel_diff/interactor/GenerateHashesInteractor.kt b/cli/src/main/kotlin/com/bazel_diff/interactor/GenerateHashesInteractor.kt index f686ffb6..ba6111ec 100644 --- a/cli/src/main/kotlin/com/bazel_diff/interactor/GenerateHashesInteractor.kt +++ b/cli/src/main/kotlin/com/bazel_diff/interactor/GenerateHashesInteractor.kt @@ -18,7 +18,7 @@ class GenerateHashesInteractor : KoinComponent { private val logger: Logger by inject() private val gson: Gson by inject() - fun execute(seedFilepaths: File?, outputPath: File?, ignoredRuleHashingAttributes: Set, targetTypes:Set?, includeTargetType: Boolean = false): Boolean { + fun execute(seedFilepaths: File?, outputPath: File?, ignoredRuleHashingAttributes: Set, targetTypes:Set?, includeTargetType: Boolean = false, modifiedFilepaths: File?): Boolean { return try { val epoch = Calendar.getInstance().getTimeInMillis() val seedFilepathsSet: Set = when { @@ -31,9 +31,20 @@ class GenerateHashesInteractor : KoinComponent { } else -> emptySet() } + var modifiedFilepathsSet: Set = when { + modifiedFilepaths != null -> { + BufferedReader(FileReader(modifiedFilepaths)).use { + it.readLines() + .map { line: String -> File(line).toPath() } + .toSet() + } + } + else -> emptySet() + } val hashes = buildGraphHasher.hashAllBazelTargetsAndSourcefiles( seedFilepathsSet, - ignoredRuleHashingAttributes + ignoredRuleHashingAttributes, + modifiedFilepathsSet ).let { if(targetTypes == null) { it