Skip to content

Commit

Permalink
Add SCANOSS settings skip support
Browse files Browse the repository at this point in the history
* bug:ES-199 Keeps lines on results on replace rule

* chore:ES-200 Adds local profile on pom.xml file

* chore:SP-2071 Add skip support

* bug:SP-2070 Throws error on scanner failure

* chore: SP-2072 add Skip class on Settings

* chore:SP-2077 Adds documentation to filters package

* chore SP-2074: Add test with GitIgnore & AntPattern

* chore:SP-2078 Downgrade JGit version to v6.10.0

* chore:SP-2079 Renames Settings class to ScanossSettings

* chore:SP-2103 Avoids repeated lowercase conversion of filenames in filter

* chore:SP-2108 Updates CHANGELOG.md

---------

Co-authored-by: Agustin Groh <[email protected]>
  • Loading branch information
isasmendiagus and agustingroh authored Feb 17, 2025
1 parent 3b8a121 commit 7b3897b
Show file tree
Hide file tree
Showing 25 changed files with 4,655 additions and 124 deletions.
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Upcoming changes...

## [0.10.0] - 2025-02-17
### Added
- Add support to skip rule
- Improve file filtering

## [0.9.0] - 2025-02-03
### Added
Expand Down Expand Up @@ -104,4 +108,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
[0.7.1]: https://github.com/scanoss/scanoss.java/compare/v0.7.0...v0.7.1
[0.8.0]: https://github.com/scanoss/scanoss.java/compare/v0.7.1...v0.8.0
[0.8.1]: https://github.com/scanoss/scanoss.java/compare/v0.8.0...v0.8.1
[0.9.0]: https://github.com/scanoss/scanoss.java/compare/v0.8.1...v0.9.0
[0.9.0]: https://github.com/scanoss/scanoss.java/compare/v0.8.1...v0.9.0
[0.10.0]: https://github.com/scanoss/scanoss.java/compare/v0.9.0...v0.10.0
41 changes: 40 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

<groupId>com.scanoss</groupId>
<artifactId>scanoss</artifactId>
<version>0.9.0</version>
<version>0.10.0</version>
<packaging>jar</packaging>
<name>scanoss.java</name>
<url>https://github.com/scanoss/scanoss.java</url>
Expand Down Expand Up @@ -114,7 +114,19 @@
<optional>true</optional>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.eclipse.jgit</groupId>
<artifactId>org.eclipse.jgit</artifactId>
<version>6.10.0.202406032230-r</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.ant</groupId>
<artifactId>ant</artifactId>
<version>1.10.15</version>
<scope>compile</scope>

</dependency>

<!-- Test dependencies -->
<dependency>
Expand Down Expand Up @@ -301,5 +313,32 @@
</plugins>
</build>
</profile>
<profile>
<id>local</id>
<distributionManagement>
<repository>
<id>local-repo</id>
<name>Local Repository</name>
<url>file://${user.home}/.m2/repository</url>
</repository>
</distributionManagement>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-install-plugin</artifactId>
<version>${native.maven.plugin.version}</version>
<configuration>
<skip>false</skip>
</configuration>
</plugin>
</plugins>
</build>
<properties>
<gpg.skip>true</gpg.skip>
<maven.javadoc.skip>true</maven.javadoc.skip>
<maven.source.skip>true</maven.source.skip>
</properties>
</profile>
</profiles>
</project>
160 changes: 66 additions & 94 deletions src/main/java/com/scanoss/Scanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,14 @@
import com.scanoss.dto.ScanFileResult;
import com.scanoss.exceptions.ScannerException;
import com.scanoss.exceptions.WinnowingException;
import com.scanoss.processor.FileProcessor;
import com.scanoss.processor.ScanFileProcessor;
import com.scanoss.processor.WfpFileProcessor;
import com.scanoss.filters.FilterConfig;
import com.scanoss.filters.factories.FileFilterFactory;
import com.scanoss.filters.factories.FolderFilterFactory;
import com.scanoss.processor.*;
import com.scanoss.rest.ScanApi;
import com.scanoss.settings.Settings;
import com.scanoss.settings.ScanossSettings;
import com.scanoss.utils.JsonUtils;
import lombok.Builder;
import lombok.Getter;
import lombok.NonNull;
import lombok.*;
import lombok.extern.slf4j.Slf4j;

import java.io.File;
Expand All @@ -49,6 +48,7 @@
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.function.Predicate;

import static com.scanoss.ScanossConstants.*;

Expand All @@ -64,36 +64,59 @@
public class Scanner {
@Builder.Default
private Boolean skipSnippets = Boolean.FALSE; // Skip snippet generations

@Builder.Default
private Boolean allExtensions = Boolean.FALSE; // Fingerprint all file extensions

@Builder.Default
private Boolean obfuscate = Boolean.FALSE; // Obfuscate file path

@Builder.Default
private Boolean hpsm = Boolean.FALSE; // Enable High Precision Snippet Matching data collection

@Builder.Default
private Boolean hiddenFilesFolders = Boolean.FALSE; // Enable Scanning of hidden files/folders

@Builder.Default
private Boolean allFolders = Boolean.FALSE; // Enable Scanning of all folders (except hidden)

@Builder.Default
private Integer numThreads = DEFAULT_WORKER_THREADS; // Number of parallel threads to use when processing a folder

@Builder.Default
private Duration timeout = Duration.ofSeconds(DEFAULT_TIMEOUT); // API POST timeout

@Builder.Default
private Integer retryLimit = DEFAULT_HTTP_RETRY_LIMIT; // Retry limit for posting scan requests
private String url; // Alternative scanning URL
private String apiKey; // API key
private String scanFlags; // Scan flags to pass to the API
private String sbomType; // SBOM type (identify/ignore)
private String sbom; // SBOM to supply while scanning
private int snippetLimit; // Size limit for a single line of generated snippet
private String customCert; // Custom certificate
private Proxy proxy; // Proxy
private Winnowing winnowing;
private ScanApi scanApi;
private ScanFileProcessor scanFileProcessor;
private WfpFileProcessor wfpFileProcessor;
private Settings settings;
private ScannerPostProcessor postProcessor;

private final String url; // Alternative scanning URL
private final String apiKey; // API key
private final String scanFlags; // Scan flags to pass to the APIç
private final String sbomType; // SBOM type (identify/ignore)
private final String sbom; // SBOM to supply while scanning
private final int snippetLimit; // Size limit for a single line of generated snippet
private final String customCert; // Custom certificate
private final Proxy proxy; // Proxy
private final Winnowing winnowing;
private final ScanApi scanApi;
private final ScanFileProcessor scanFileProcessor;
private final WfpFileProcessor wfpFileProcessor;
private final ScanossSettings settings;
private final ScannerPostProcessor postProcessor;
private final FilterConfig filterConfig;
private Predicate<Path> fileFilter;
private Predicate<Path> folderFilter;

//TODO: Once this Lombok PR is merged https://github.com/projectlombok/lombok/pull/3723#pullrequestreview-2617412643
// Update Lombok dependency
public static class ScannerBuilder {
private ScannerBuilder folderFilter(Predicate<Path> folderFilter) {
return this;
}
private ScannerBuilder fileFilter(Predicate<Path> fileFilter) {
return this;
}
}

@SuppressWarnings("unused")
private Scanner(Boolean skipSnippets, Boolean allExtensions, Boolean obfuscate, Boolean hpsm,
Expand All @@ -102,7 +125,9 @@ private Scanner(Boolean skipSnippets, Boolean allExtensions, Boolean obfuscate,
Integer snippetLimit, String customCert, Proxy proxy,
Winnowing winnowing, ScanApi scanApi,
ScanFileProcessor scanFileProcessor, WfpFileProcessor wfpFileProcessor,
Settings settings, ScannerPostProcessor postProcessor
ScanossSettings settings, ScannerPostProcessor postProcessor, FilterConfig filterConfig,
Predicate<Path> fileFilter,
Predicate<Path> folderFilter
) {
this.skipSnippets = skipSnippets;
this.allExtensions = allExtensions;
Expand Down Expand Up @@ -134,9 +159,20 @@ private Scanner(Boolean skipSnippets, Boolean allExtensions, Boolean obfuscate,
this.wfpFileProcessor = Objects.requireNonNullElseGet(wfpFileProcessor, () -> WfpFileProcessor.builder()
.winnowing(this.winnowing)
.build());
this.settings = Objects.requireNonNullElseGet(settings, () -> Settings.builder().build());
this.settings = Objects.requireNonNullElseGet(settings, () -> ScanossSettings.builder().build());
this.postProcessor = Objects.requireNonNullElseGet(postProcessor, () ->
ScannerPostProcessor.builder().build()); }
ScannerPostProcessor.builder().build());

this.filterConfig = Objects.requireNonNullElseGet(filterConfig, () -> FilterConfig.builder()
.allFolders(allFolders)
.allExtensions(allExtensions)
.hiddenFilesFolders(hiddenFilesFolders)
.gitIgnorePatterns(this.settings.getScanningIgnorePattern())
.build());

this.fileFilter = Objects.requireNonNullElseGet(fileFilter , () -> FileFilterFactory.build(this.filterConfig));
this.folderFilter = Objects.requireNonNullElseGet(folderFilter, () -> FolderFilterFactory.build(this.filterConfig));
}

/**
* Generate a WFP/Fingerprint for the given file
Expand All @@ -157,70 +193,6 @@ public String wfpFile(@NonNull String filename) throws ScannerException, Winnowi
return this.winnowing.wfpForFile(filename, filename);
}

/**
* Determine if a folder should be processed or not
*
* @param name folder/directory to review
* @return <code>true</code> if the folder should be skipped, <code>false</code> otherwise
*/
private Boolean filterFolder(String name) {
String nameLower = name.toLowerCase();
if (!hiddenFilesFolders && name.startsWith(".") && !name.equals(".")) {
log.trace("Skipping hidden folder: {}", name);
return true;
}
boolean ignore = false;
if (!allFolders) { // skip this check if all folders is selected
for (String ending : ScanossConstants.FILTERED_DIRS) {
if (nameLower.endsWith(ending)) {
log.trace("Skipping folder due to ending: {} - {}", name, ending);
ignore = true;
}
}
if(!ignore){
for (String ending : ScanossConstants.FILTERED_DIR_EXT) {
if (nameLower.endsWith(ending)) {
log.trace("Skipping folder due to ending: {} - {}", name, ending);
ignore = true;
}
}
}
}
return ignore;
}

/**
* Determine if a file should be processed or not
*
* @param name filename to review
* @return <code>true</code> if the file should be skipped, <code>false</code> otherwise
*/
private Boolean filterFile(String name) {
// Skip hidden files unless explicitly asked to read them
if (!hiddenFilesFolders && name.startsWith(".")) {
log.trace("Skipping hidden file: {}", name);
return true;
}
// Process all file extensions if requested
if (this.allExtensions) {
log.trace("Processing all file extensions: {}", name);
return false;
}
// Skip some specific files
if (ScanossConstants.FILTERED_FILES.contains(name)) {
log.trace("Skipping specific file: {}", name);
return true;
}
// Skip specific file endings/extensions
for (String ending : ScanossConstants.FILTERED_EXTENSIONS) {
if (name.endsWith(ending)) {
log.trace("Skipping file due to ending: {} - {}", name, ending);
return true;
}
}
return false;
}

/**
* Strip the leading string from the specified path
*
Expand Down Expand Up @@ -262,17 +234,16 @@ public List<String> processFolder(@NonNull String folder, FileProcessor processo
Files.walkFileTree(Paths.get(folder), new SimpleFileVisitor<>() {
@Override
public FileVisitResult preVisitDirectory(Path file, BasicFileAttributes attrs) {
String nameLower = file.getFileName().toString().toLowerCase();
if (attrs.isDirectory() && filterFolder(nameLower)) {
if(folderFilter.test(file)) {
log.debug("Processing file: {}", file.getFileName().toString());
return FileVisitResult.SKIP_SUBTREE; // Skip the rest of this directory tree
}
return FileVisitResult.CONTINUE;
}

@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) {
String nameLower = file.getFileName().toString().toLowerCase();
if (attrs.isRegularFile() && !filterFile(nameLower) && attrs.size() > 0) {
if (attrs.isRegularFile() && !fileFilter.test(file) && attrs.size() > 0) {
String filename = file.toString();
Future<String> future = executorService.submit(() -> processor.process(filename, stripDirectory(folder, filename)));
futures.add(future);
Expand Down Expand Up @@ -321,7 +292,8 @@ public List<String> processFileList(@NonNull String root, @NonNull List<String>
Path path = Path.of(file);
boolean skipDir = false;
for (Path p : path) {
if (filterFolder(p.toString().toLowerCase())) { // should we skip this folder or not
// should we skip this folder or not
if (this.folderFilter.test(p)) { // should we skip this folder or not
skipDir = true;
break;
}
Expand All @@ -330,7 +302,7 @@ public List<String> processFileList(@NonNull String root, @NonNull List<String>
continue; // skip this file as the folder is not allowed
}
String nameLower = path.getFileName().toString().toLowerCase();
if (!filterFile(nameLower)) {
if (!this.fileFilter.test(path)) {
Path fullPath = Path.of(root, file);
File f = fullPath.toFile();
if (f.exists() && f.isFile() && f.length() > 0 && ! Files.isSymbolicLink(fullPath)) {
Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/scanoss/ScannerPostProcessor.java
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ private ScanFileDetails createUpdatedResultDetails(ScanFileDetails existingCompo
.file(existingComponent.getFile())
.fileHash(existingComponent.getFileHash())
.fileUrl(existingComponent.getFileUrl())
.lines(existingComponent.getLines())
.purls(new String[]{newPurl.toString()})
.component(newPurl.getName())
.vendor(newPurl.getNamespace())
Expand Down
10 changes: 5 additions & 5 deletions src/main/java/com/scanoss/ScanossConstants.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public class ScanossConstants {
static final int MAX_LONG_LINE_CHARS = 1000; // Maximum length of a single source line to be considered source code

// File extensions to ignore snippets for
static final List<String> SKIP_SNIPPET_EXT = Arrays.asList(
public static final List<String> SKIP_SNIPPET_EXT = Arrays.asList(
".exe", ".zip", ".tar", ".tgz", ".gz", ".7z", ".rar", ".jar", ".war", ".ear", ".class", ".pyc",
".o", ".a", ".so", ".obj", ".dll", ".lib", ".out", ".app", ".bin",
".lst", ".dat", ".json", ".htm", ".html", ".xml", ".md", ".txt",
Expand All @@ -62,17 +62,17 @@ public class ScanossConstants {
);

// Folders to skip
static final List<String> FILTERED_DIRS = Arrays.asList(
public static final List<String> FILTERED_DIRS = Arrays.asList(
"nbproject", "nbbuild", "nbdist", "__pycache__", "venv", "_yardoc", "eggs", "wheels", "htmlcov",
"__pypackages__", "target"
);

// Folder endings to skip
static final List<String> FILTERED_DIR_EXT = List.of(".egg-info");
public static final List<String> FILTERED_DIR_EXT = List.of(".egg-info");


// File extensions to skip
static final List<String> FILTERED_EXTENSIONS = Arrays.asList(
public static final List<String> FILTERED_EXTENSIONS = Arrays.asList(
".1", ".2", ".3", ".4", ".5", ".6", ".7", ".8", ".9", ".ac", ".adoc", ".am",
".asciidoc", ".bmp", ".build", ".cfg", ".chm", ".class", ".cmake", ".cnf",
".conf", ".config", ".contributors", ".copying", ".crt", ".csproj", ".css",
Expand All @@ -95,7 +95,7 @@ public class ScanossConstants {
);

// Files to skip
static final List<String> FILTERED_FILES = Arrays.asList(
public static final List<String> FILTERED_FILES = Arrays.asList(
"gradlew", "gradlew.bat", "mvnw", "mvnw.cmd", "gradle-wrapper.jar", "maven-wrapper.jar",
"thumbs.db", "babel.config.js", "license.txt", "license.md", "copying.lib", "makefile"
);
Expand Down
Loading

0 comments on commit 7b3897b

Please sign in to comment.