From b4423aed5e38552ee05b657d78ce055d104dbdfc Mon Sep 17 00:00:00 2001 From: Agustin Isasmendi Date: Wed, 21 May 2025 17:44:59 +0200 Subject: [PATCH 1/5] chore(SP-2487): add utils `extractFilePathsFromWFPBlock()` and `extractFilePathFromWFPBlock()` --- .../com/scanoss/utils/WinnowingUtils.java | 48 ++++++++++ .../com/scanoss/utils/WinnowingUtilsTest.java | 87 +++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 src/test/java/com/scanoss/utils/WinnowingUtilsTest.java diff --git a/src/main/java/com/scanoss/utils/WinnowingUtils.java b/src/main/java/com/scanoss/utils/WinnowingUtils.java index 6425075..47b67b9 100644 --- a/src/main/java/com/scanoss/utils/WinnowingUtils.java +++ b/src/main/java/com/scanoss/utils/WinnowingUtils.java @@ -22,6 +22,13 @@ */ package com.scanoss.utils; +import org.jetbrains.annotations.NotNull; + +import java.util.HashSet; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + /** * SCANOSS Winnowing Utils Class *

@@ -47,4 +54,45 @@ public static char normalize(char c) { return 0; } } + + + /** + * Extracts the first/primary file path from a WFP block. + * This is a convenience method for single-file scenarios. + * + * @param wfpBlock the WFP block containing file entries + * @return the first extracted file path, or null if none found + */ + public static String extractFilePathFromWFPBlock(@NotNull String wfpBlock) { + Set paths = extractFilePathsFromWFPBlock(wfpBlock); + return paths.isEmpty() ? null : paths.iterator().next(); + } + + + /** + * Extract all file paths from a multi-file WFP block using regex. + * A multi-file WFP block contains multiple entries each starting with "file=". + * + * @param wfpBlock the WFP block containing multiple file entries + * @return a Set of extracted file paths, empty if none found + */ + public static Set extractFilePathsFromWFPBlock(@NotNull String wfpBlock) { + Set paths = new HashSet<>(); + + // Pattern to match file=,, format and capture the path + // This regex matches: "file=" followed by any characters until a comma, + // then any characters until another comma, then captures everything after that comma until end of line + Pattern pattern = Pattern.compile("^file=[^,]+,[^,]+,(.+)$", Pattern.MULTILINE); + Matcher matcher = pattern.matcher(wfpBlock); + + // Find all matches and add the captured paths to the result set + while (matcher.find()) { + String path = matcher.group(1); + if (path != null && !path.isEmpty()) { + paths.add(path); + } + } + + return paths; + } } diff --git a/src/test/java/com/scanoss/utils/WinnowingUtilsTest.java b/src/test/java/com/scanoss/utils/WinnowingUtilsTest.java new file mode 100644 index 0000000..0785992 --- /dev/null +++ b/src/test/java/com/scanoss/utils/WinnowingUtilsTest.java @@ -0,0 +1,87 @@ + +package com.scanoss.utils; + +import org.junit.Test; + +import java.util.Set; + +import static org.junit.Assert.*; + +public class WinnowingUtilsTest { + + // Test file format: file=,, + private static final String FILE1 = "file=90ebac4735d345fde0d05d939321d8fc,15878,/path/to/file1"; + private static final String FILE2 = "file=a7c31f87d23c42af732f57d39a9b05ac,24680,/path/to/file2"; + private static final String FILE3 = "file=e8585d8740d6664fda9e242a1d68b0f0,1815,/path/to/file3"; + private static final String FILE_SAME_PATH = "file=b1a89f4c5b0de974ad9846108c6d093a,9876,/path/to/file1"; + private static final String FILE_WITH_COMMA = "file=72a9e90d423b92dba36f78acc9bbecc7,12345,/path/with,comma"; + private static final String INVALID_FILE_NO_COMMAS = "file=invalid"; + + // WFP hash entries + private static final String WFP_ENTRY1 = "4=30777ca8,e9227657\n9=831bd2c5,701a2c74"; + private static final String WFP_ENTRY2 = "5=12345678,abcdefgh"; + + @Test + public void testExtractFilePathsFromWFPBlock_SingleFile_ReturnsSinglePath() { + String wfpBlock = FILE1 + "\n" + WFP_ENTRY1; + Set result = WinnowingUtils.extractFilePathsFromWFPBlock(wfpBlock); + assertEquals(1, result.size()); + assertTrue(result.contains("/path/to/file1")); + } + + @Test + public void testExtractFilePathsFromWFPBlock_MultipleFiles_ReturnsAllPaths() { + String wfpBlock = FILE1 + "\n" + WFP_ENTRY1 + "\n" + FILE2 + "\n" + WFP_ENTRY2 + "\n" + FILE3 + "\n"; + + Set result = WinnowingUtils.extractFilePathsFromWFPBlock(wfpBlock); + assertEquals(3, result.size()); + assertTrue(result.contains("/path/to/file1")); + assertTrue(result.contains("/path/to/file2")); + assertTrue(result.contains("/path/to/file3")); + } + + @Test + public void testExtractFilePathsFromWFPBlock_DuplicatePaths_ReturnsUniqueSet() { + String wfpBlock = FILE1 + "\n" + WFP_ENTRY1 + "\n" + FILE_SAME_PATH + "\n" + WFP_ENTRY2 + "\n"; + + Set result = WinnowingUtils.extractFilePathsFromWFPBlock(wfpBlock); + assertEquals(1, result.size()); + assertTrue(result.contains("/path/to/file1")); + } + + @Test + public void testExtractFilePathsFromWFPBlock_EmptyString_ReturnsEmptySet() { + String wfpBlock = ""; + Set result = WinnowingUtils.extractFilePathsFromWFPBlock(wfpBlock); + assertTrue(result.isEmpty()); + } + + @Test + public void testExtractFilePathsFromWFPBlock_NoValidFileLines_ReturnsEmptySet() { + String wfpBlock = "not_file=90ebac4735d345fde0d05d939321d8fc,15878,something\nanother=line\n"; + Set result = WinnowingUtils.extractFilePathsFromWFPBlock(wfpBlock); + assertTrue(result.isEmpty()); + } + + @Test + public void testExtractFilePathsFromWFPBlock_WithPathsContainingCommas_ParsesCorrectly() { + String wfpBlock = FILE_WITH_COMMA + "\n" + WFP_ENTRY1 + "\n" + FILE2 + "\n"; + + Set result = WinnowingUtils.extractFilePathsFromWFPBlock(wfpBlock); + assertEquals(2, result.size()); + assertTrue(result.contains("/path/with,comma")); + assertTrue(result.contains("/path/to/file2")); + } + + @Test + public void testExtractFilePathsFromWFPBlock_ComplexCase_HandlesCorrectly() { + String wfpBlock = "not_a_file=something\n" + FILE1 + "\n" + WFP_ENTRY1 + "\n" + INVALID_FILE_NO_COMMAS + "\n" + + FILE2 + "\n" + WFP_ENTRY2 + "\n" + "random line\n" + FILE3 + "\n"; + + Set result = WinnowingUtils.extractFilePathsFromWFPBlock(wfpBlock); + assertEquals(3, result.size()); + assertTrue(result.contains("/path/to/file1")); + assertTrue(result.contains("/path/to/file2")); + assertTrue(result.contains("/path/to/file3")); + } +} From b8ec9e73867e523291a5a98bca0068275e3b3360 Mon Sep 17 00:00:00 2001 From: Agustin Isasmendi Date: Wed, 7 May 2025 14:47:49 +0200 Subject: [PATCH 2/5] feat(SP-2487): implement path obfuscation on Winnowing class --- src/main/java/com/scanoss/Winnowing.java | 77 ++++++++++++++- src/test/java/com/scanoss/TestWinnowing.java | 69 +++++++++++++ .../com/scanoss/WinnowingConcurrencyTest.java | 98 +++++++++++++++++++ 3 files changed, 243 insertions(+), 1 deletion(-) create mode 100644 src/test/java/com/scanoss/WinnowingConcurrencyTest.java diff --git a/src/main/java/com/scanoss/Winnowing.java b/src/main/java/com/scanoss/Winnowing.java index 303d8eb..fb8fd16 100644 --- a/src/main/java/com/scanoss/Winnowing.java +++ b/src/main/java/com/scanoss/Winnowing.java @@ -28,9 +28,11 @@ import lombok.*; import lombok.extern.slf4j.Slf4j; import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.io.FilenameUtils; import org.apache.tika.Tika; import org.apache.tika.mime.MediaType; import org.apache.tika.mime.MediaTypeRegistry; +import org.jetbrains.annotations.NotNull; import java.io.ByteArrayInputStream; import java.io.File; @@ -38,6 +40,8 @@ import java.nio.charset.Charset; import java.nio.file.Files; import java.util.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; import java.util.zip.CRC32C; import java.util.zip.Checksum; @@ -58,6 +62,14 @@ public class Winnowing { private static final Tika tika = new Tika(); private static final MediaTypeRegistry mediaTypeRegistry = MediaTypeRegistry.getDefaultRegistry(); + /** + * Shared counter for generating unique IDs. + * idGenerator is shared across all Winnowing instances, + * ensuring sequential and unique ID generation for path obfuscation + * regardless of how many instances of Winnowing are created. + */ + private static final AtomicLong idGenerator = new AtomicLong(0); + @Builder.Default private Boolean skipSnippets = Boolean.FALSE; // Skip snippet generations @Builder.Default @@ -68,6 +80,31 @@ public class Winnowing { private boolean hpsm = Boolean.FALSE; // Enable High Precision Snippet Matching data collection @Builder.Default private int snippetLimit = MAX_LONG_LINE_CHARS; // Enable limiting of size of a single line of snippet generation + @Builder.Default + private Map obfuscationMap = new ConcurrentHashMap<>(); + + /** + * Resolves the real file path for a given obfuscated path. + * This method is thread-safe and can be called concurrently from multiple threads. + * If the provided path is not found in the obfuscation map, the original path is returned. + * + * @param obfuscatedPath the obfuscated path + * @return the real file path corresponding to the provided obfuscated path, or the original path if no mapping exists + */ + public String deobfuscateFilePath(@NotNull String obfuscatedPath) { + String originalPath = obfuscationMap.get(obfuscatedPath); + return originalPath != null ? originalPath : obfuscatedPath; + } + + + /** + * Retrieves the size of the obfuscation map. + * + * @return the number of entries in the obfuscation map + */ + public int getObfuscationMapSize() { + return obfuscationMap.size(); + } /** * Calculate the WFP (fingerprint) for the given file @@ -112,7 +149,11 @@ public String wfpForContents(@NonNull String filename, Boolean binFile, byte[] c char[] fileContents = (new String(contents, Charset.defaultCharset())).toCharArray(); String fileMD5 = DigestUtils.md5Hex(contents); StringBuilder wfpBuilder = new StringBuilder(); - // TODO add obfuscation of the filename here + + if (obfuscate) { + filename = obfuscateFilePath(filename); + } + wfpBuilder.append(String.format("file=%s,%d,%s\n", fileMD5, contents.length, filename)); if (binFile || this.skipSnippets || this.skipSnippets(filename, fileContents)) { return wfpBuilder.toString(); @@ -180,6 +221,40 @@ public String wfpForContents(@NonNull String filename, Boolean binFile, byte[] c return wfpBuilder.toString(); } + /** + * Obfuscates the given file path by replacing it with a generated unique identifier while + * retaining its original file extension. + * This method is thread-safe and can be called concurrently from multiple threads. + * + * @param originalPath the original file path to be obfuscated; must not be null + * @return the obfuscated file path with a unique identifier and the original file extension + */ + private String obfuscateFilePath(@NotNull String originalPath) { + final String extension = extractExtension(originalPath); + + // Generate a unique identifier for the obfuscated file using a thread-safe approach + final String obfuscatedPath = idGenerator.getAndIncrement() + extension; + this.obfuscationMap.put(obfuscatedPath, originalPath); + return obfuscatedPath; + } + + /** + * Extracts file extension from the given path, including the leading dot. + * + * @param path the file path or name (must not be null) + * @return the file extension with leading dot (e.g., ".txt") or empty string if no extension + */ + private String extractExtension(@NotNull String path) { + try { + String extractedExtension = FilenameUtils.getExtension(path).trim(); + return extractedExtension.isEmpty() ? "" : "." + extractedExtension; + } catch (IllegalArgumentException e) { + log.debug("Could not extract extension from filename '{}': {}", + path, e.getMessage()); + return ""; + } + } + /** * Determine if a file/contents should be skipped for snippet generation or not * @param filename filename for the contents (optional) diff --git a/src/test/java/com/scanoss/TestWinnowing.java b/src/test/java/com/scanoss/TestWinnowing.java index cb61307..e75e353 100644 --- a/src/test/java/com/scanoss/TestWinnowing.java +++ b/src/test/java/com/scanoss/TestWinnowing.java @@ -24,6 +24,7 @@ import com.scanoss.exceptions.WinnowingException; +import com.scanoss.utils.WinnowingUtils; import lombok.extern.slf4j.Slf4j; import org.junit.After; import org.junit.Before; @@ -265,4 +266,72 @@ public void TestWinnowingFileFailures() { log.info("Finished {} -->", methodName); } + + @Test + public void TestWinnowingObfuscationFileWithExtension() { + String methodName = new Object() { + }.getClass().getEnclosingMethod().getName(); + log.info("<-- Starting {}", methodName); + + Winnowing winnowing = Winnowing.builder().obfuscate(true).build(); + + String fileWithExtension = "testing/data/test-file.txt"; + + String wfpWithExtension = winnowing.wfpForFile(fileWithExtension, fileWithExtension); + assertNotNull("Expected a result from WFP with extension", wfpWithExtension); + + String obfuscatedPathWithExtension = WinnowingUtils.extractFilePathFromWFPBlock(wfpWithExtension); + assertNotNull("Should have found an obfuscated path in WFP with extension", obfuscatedPathWithExtension); + + String originalPathWithExtension = winnowing.deobfuscateFilePath(obfuscatedPathWithExtension); + assertEquals("Original path should match input file with extension", fileWithExtension, originalPathWithExtension); + + log.info("Finished {} -->", methodName); + } + + @Test + public void TestWinnowingObfuscationFileWithoutExtension() { + String methodName = new Object() { + }.getClass().getEnclosingMethod().getName(); + log.info("<-- Starting {}", methodName); + + Winnowing winnowing = Winnowing.builder().obfuscate(true).build(); + + String fileWithoutExtension = "testing/data/nbproject"; + + String wfpWithoutExtension = winnowing.wfpForFile(fileWithoutExtension, fileWithoutExtension); + + String obfuscatedPathWithoutExtension = WinnowingUtils.extractFilePathFromWFPBlock(wfpWithoutExtension); + assertNotNull("Should have found an obfuscated path in WFP without extension", obfuscatedPathWithoutExtension); + + String originalPathWithoutExtension = winnowing.deobfuscateFilePath(obfuscatedPathWithoutExtension); + assertEquals("Original path should match input file without extension", fileWithoutExtension, originalPathWithoutExtension); + + log.info("Finished {} -->", methodName); + } + + @Test + public void TestDeobfuscateFilePathEmpty() { + String methodName = new Object() { + }.getClass().getEnclosingMethod().getName(); + log.info("<-- Starting {}", methodName); + + Winnowing winnowing = Winnowing.builder().build(); + assertEquals("Should return null when given an empty obfuscated path", "" ,winnowing.deobfuscateFilePath("")); + + log.info("Finished {} -->", methodName); + } + + @Test + public void TestDeobfuscateFilePathInvalid() { + String methodName = new Object() { + }.getClass().getEnclosingMethod().getName(); + log.info("<-- Starting {}", methodName); + + Winnowing winnowing = Winnowing.builder().build(); + assertEquals("Should return same path if not exist on the map", "invalidPath", winnowing.deobfuscateFilePath("invalidPath")); + + log.info("Finished {} -->", methodName); + } } + diff --git a/src/test/java/com/scanoss/WinnowingConcurrencyTest.java b/src/test/java/com/scanoss/WinnowingConcurrencyTest.java new file mode 100644 index 0000000..a60697b --- /dev/null +++ b/src/test/java/com/scanoss/WinnowingConcurrencyTest.java @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright (c) 2025, SCANOSS + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package com.scanoss; + +import lombok.extern.slf4j.Slf4j; +import org.junit.Test; + +import java.util.*; +import java.util.concurrent.*; + +import static org.junit.Assert.*; +/** + * Tests to validate thread safety of the path obfuscation feature in the Winnowing class. + */ +@Slf4j +public class WinnowingConcurrencyTest { + + /** + * Test that concurrent obfuscation of paths works correctly without data loss or corruption. + * This simulates multiple threads processing different files simultaneously. + */ + @Test + public void testConcurrentObfuscation() throws InterruptedException, ExecutionException { + int fileCount = 500; // More files to increase collision chances + int iterations = 3; // Run multiple iterations to increase stress + + + for (int iter = 0; iter < iterations; iter++) { + log.info("Starting high-collision test iteration {}", iter); + + Winnowing winnowing = Winnowing.builder().obfuscate(true).build(); + + ExecutorService executor = Executors.newFixedThreadPool(fileCount); + + // Create a list of paths to obfuscate + List paths = new ArrayList<>(); + for (int i = 0; i < fileCount; i++) { + paths.add("/path/to/file" + i + ".java"); + } + + List> futures = new ArrayList<>(fileCount); + + // Use a CyclicBarrier to ensure all threads start exactly together. + CyclicBarrier barrier = new CyclicBarrier(fileCount, () -> log.info("All threads released simultaneously!")); + + for (String path : paths) { + futures.add(executor.submit(() -> { + try { + byte[] contents = ("sample content for " + path).getBytes(); + + // Wait at barrier until all threads are ready + barrier.await(); + + // Access the same Winnowing instance concurrently + return winnowing.wfpForContents(path, false, contents); + + } catch (InterruptedException | BrokenBarrierException e) { + throw new RuntimeException(e); + } + })); + } + + // Wait for all tasks to complete + for (Future future : futures) { + future.get(); + } + + executor.shutdown(); + assertTrue("Executor did not terminate properly", + executor.awaitTermination(5, TimeUnit.SECONDS)); + + // Verify results + log.info("Processed {} paths with {} unique results", + paths.size(), winnowing.getObfuscationMapSize()); + assertEquals(paths.size(), winnowing.getObfuscationMapSize()); + } + } +} \ No newline at end of file From 3e77390bfbb10f56d11f603fa1a4af30dfb91d46 Mon Sep 17 00:00:00 2001 From: Agustin Isasmendi Date: Thu, 8 May 2025 16:49:52 +0200 Subject: [PATCH 3/5] feat(SP-2487): implement path deobfuscation on Scanner class --- src/main/java/com/scanoss/Scanner.java | 65 ++++- .../java/com/scanoss/dto/ScanFileResult.java | 2 + src/test/java/com/scanoss/TestScanner.java | 270 +++++++++++++++++- 3 files changed, 318 insertions(+), 19 deletions(-) diff --git a/src/main/java/com/scanoss/Scanner.java b/src/main/java/com/scanoss/Scanner.java index 781b1ae..7ae7904 100644 --- a/src/main/java/com/scanoss/Scanner.java +++ b/src/main/java/com/scanoss/Scanner.java @@ -30,6 +30,7 @@ import com.scanoss.filters.factories.FolderFilterFactory; import com.scanoss.processor.*; import com.scanoss.rest.ScanApi; +import com.scanoss.settings.Bom; import com.scanoss.settings.ScanossSettings; import com.scanoss.utils.JsonUtils; import lombok.*; @@ -49,6 +50,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.function.Predicate; +import java.util.stream.Collectors; import static com.scanoss.ScanossConstants.*; @@ -353,13 +355,12 @@ public List wfpFolder(@NonNull String folder) throws ScannerException, W */ public String scanFile(@NonNull String filename) throws ScannerException, WinnowingException { String wfp = wfpFile(filename); - if (wfp != null && !wfp.isEmpty()) { - String response = this.scanApi.scan(wfp, "", 1); - if (response != null && !response.isEmpty()) { - return response; - } + if (wfp == null || wfp.isEmpty()) { + return ""; } - return ""; + + String result = scanApi.scan(wfp, "", 1); + return postProcessResult(result); } /** @@ -385,18 +386,52 @@ public List scanFileList(@NonNull String folder, @NonNull List f return postProcessResults(results); } + /** - * Post-processes scan results based on BOM (Bill of Materials) settings if available. - * @param results List of raw scan results in JSON string format - * @return Processed results, either modified based on BOM or original results if no BOM exists + * Processes the result string and provides a post-processed output. + * + * @param rawResults the raw result string to be processed. + * @return the post-processed result string. */ - private List postProcessResults(List results) { - if (settings.getBom() != null) { - List scanFileResults = JsonUtils.toScanFileResults(results); - List newScanFileResults = this.postProcessor.process(scanFileResults, this.settings.getBom()); - return JsonUtils.toRawJsonString(newScanFileResults); + private String postProcessResult(String rawResults) { + if (rawResults == null || rawResults.isEmpty()) { + return ""; } - return results; + return postProcessResults(List.of(rawResults)).stream() + .findFirst() + .orElse(""); } + /** + * Processes the given list of raw scan results by applying deobfuscation and post-processing steps based on settings. + * + * @param rawResults a list of raw scan results in string format to be processed + * @return a list of processed scan results in string format + */ + private List postProcessResults(List rawResults) { + List scanFileResults = JsonUtils.toScanFileResults(rawResults); + + if (obfuscate) { + scanFileResults = deobfuscateResults(scanFileResults); + } + + Bom bom = settings.getBom(); + if (bom != null) { + scanFileResults = this.postProcessor.process(scanFileResults, bom); + } + + return JsonUtils.toRawJsonString(scanFileResults); + } + + /** + * Deobfuscate the file paths in a list of ScanFileResult. + * + * @param scanFileResults List of ScanFileResult to be deobfuscated + * @return List of ScanFileResult with deobfuscated file paths + */ + private List deobfuscateResults(@NonNull List scanFileResults) { + return scanFileResults.stream() + .map(result -> result.withFilePath(winnowing.deobfuscateFilePath(result.getFilePath()))) + .collect(Collectors.toList()); + } } \ No newline at end of file diff --git a/src/main/java/com/scanoss/dto/ScanFileResult.java b/src/main/java/com/scanoss/dto/ScanFileResult.java index 5783873..1e588fd 100644 --- a/src/main/java/com/scanoss/dto/ScanFileResult.java +++ b/src/main/java/com/scanoss/dto/ScanFileResult.java @@ -23,6 +23,7 @@ package com.scanoss.dto; import lombok.Data; +import lombok.With; import java.util.List; @@ -31,6 +32,7 @@ */ @Data public class ScanFileResult { + @With private final String filePath; private final List fileDetails; } diff --git a/src/test/java/com/scanoss/TestScanner.java b/src/test/java/com/scanoss/TestScanner.java index 1770e23..06c8555 100644 --- a/src/test/java/com/scanoss/TestScanner.java +++ b/src/test/java/com/scanoss/TestScanner.java @@ -22,28 +22,59 @@ */ package com.scanoss; +import com.google.gson.Gson; +import com.scanoss.dto.ScanFileDetails; +import com.scanoss.dto.ScanFileResult; +import com.scanoss.dto.ServerDetails; +import com.scanoss.dto.enums.MatchType; import com.scanoss.exceptions.ScannerException; import com.scanoss.filters.FilterConfig; import com.scanoss.settings.ScanossSettings; +import com.scanoss.utils.JsonUtils; +import com.scanoss.utils.WinnowingUtils; import lombok.extern.slf4j.Slf4j; +import okhttp3.mockwebserver.Dispatcher; +import okhttp3.mockwebserver.MockResponse; +import okhttp3.mockwebserver.MockWebServer; +import okhttp3.mockwebserver.RecordedRequest; +import org.jetbrains.annotations.NotNull; +import org.junit.After; import org.junit.Before; import org.junit.Test; import java.io.FileWriter; import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; +import java.nio.file.*; +import java.util.*; +import java.util.concurrent.*; +import java.util.stream.Collectors; import static org.junit.Assert.*; @Slf4j public class TestScanner { + private MockWebServer server; + + @Before - public void Setup() { + public void Setup() throws IOException{ log.info("Starting Scanner test cases..."); log.debug("Logging debug enabled"); log.trace("Logging trace enabled"); + log.info("Starting Mock Server..."); + server = new MockWebServer(); + server.start(); + } + + @After + public void Finish() { + log.info("Shutting down mock server."); + try { + server.close(); + server.shutdown(); + } catch (IOException e) { + log.warn("Some issue shutting down mock server: {}", e.getLocalizedMessage()); + } } @Test @@ -219,6 +250,33 @@ public void TestScannerScanFileListPositive() { log.info("Finished {} -->", methodName); } + @Test + public void TestScannerScanFileListPositiveWithObfuscation() { + String methodName = new Object() { + }.getClass().getEnclosingMethod().getName(); + log.info("<-- Starting {}", methodName); + + Scanner scanner = Scanner.builder().obfuscate(true).build(); + + List fileList = Arrays.asList( + "src/test/java/com/scanoss/TestScanner.java", + "src/test/java/com/scanoss/TestWinnowing.java", + ".github/workflows/publish.yml", + ".gitignore", + "tmp/.gitignore" + ); + String folder = "."; + List results = scanner.scanFileList(folder, fileList); + + assertNotNull("Should've gotten a response", results); + assertFalse("Scan results should not be empty", results.isEmpty()); + assertEquals("Should've only gotten two results",2, results.size()); + log.info("Received {} results", results.size()); + log.info("Res Data: {}", results); + + log.info("Finished {} -->", methodName); + } + @Test public void TestScannerScanFileListNegative() { String methodName = new Object() { @@ -381,4 +439,208 @@ public void TestScannerCustomFilterConfig() { log.info("Finished {} -->", methodName); } + + /** + * Collects all files from the specified directory, returning their paths relative to the provided directory. + * + * @param directory the directory to scan for source files + * @return a list of paths relative to the specified directory + * @throws IOException if there's an error accessing the file system + */ + private List collectFilePaths(String directory) throws IOException { + Path dirPath = Paths.get(directory); + return Files.walk(dirPath) + .filter(Files::isRegularFile) + .map(path -> dirPath.relativize(path).toString()) + .collect(Collectors.toList()); + } + + /** + * Helper method to create a mock server dispatcher that returns "no match" results + * for all scan requests while tracking received paths for verification. + * + * @param receivedPaths Set that will be populated with paths extracted from the WFP block in requests + * @return Dispatcher that returns "no match" results for all files + */ + private Dispatcher createNoMatchDispatcher(Set receivedPaths) { + return new Dispatcher() { + @NotNull + @Override + public MockResponse dispatch(@NotNull RecordedRequest request) { + // Extract the WFP from the request and parse all obfuscated paths + String requestBody = request.getBody().readUtf8(); + Set paths = WinnowingUtils.extractFilePathsFromWFPBlock(requestBody); + + // Store all received paths for later verification + receivedPaths.addAll(paths); + + for (String path : paths) { + log.debug("Server received obfuscated path: {}", path); + } + + if (paths.isEmpty()) { + return new MockResponse() + .setResponseCode(400) + .setBody("error: Bad Request - No valid obfuscated paths found"); + } + + // Create response objects using the DTO classes + Map> responseMap = new HashMap<>(); + + // Create server details object (same for all responses) + ServerDetails.KbVersion kbVersion = new ServerDetails.KbVersion("25.05", "21.05.21"); + ServerDetails serverDetails = new ServerDetails("5.4.10", kbVersion); + + // Create a "none" match result for each path + for (String path : paths) { + ScanFileDetails noMatchResult = ScanFileDetails.builder() + .matchType(MatchType.none) + .serverDetails(serverDetails) + .build(); + + responseMap.put(path, Collections.singletonList(noMatchResult)); + } + + // Convert to JSON + Gson gson = new Gson(); + String responseJson = gson.toJson(responseMap); + + return new MockResponse() + .setResponseCode(200) + .setBody(responseJson); + } + }; + } + + /** + * Test that we can scan a file with obfuscation enabled using a mock server. + * This test focuses on the path obfuscation/deobfuscation cycle + */ + @Test + public void testScanFileWithObfuscation() { + String methodName = new Object() {}.getClass().getEnclosingMethod().getName(); + log.info("<-- Starting {}", methodName); + + final String fileToScan = "src/test/java/com/scanoss/TestScanner.java"; + + // Set to capture the path received by the server + final Set receivedPaths = ConcurrentHashMap.newKeySet(); + + // Configure the MockWebServer to return a 'no match' response + server.setDispatcher(createNoMatchDispatcher(receivedPaths)); + + Scanner scanner = Scanner.builder() + .obfuscate(true) + .url(server.url("/api/scan/direct").toString()) + .build(); + + String result = scanner.scanFile(fileToScan); + + // Verify we got scan results + assertNotNull("Should have scan results", result); + assertFalse("Should have non-empty result", result.isEmpty()); + log.info("Received scan result for file"); + + // Verify path received by the server is obfuscated (not matching the source file path) + assertFalse("Received paths should not be empty", receivedPaths.isEmpty()); + String receivedPath = receivedPaths.iterator().next(); + assertNotEquals("Path should be obfuscated", fileToScan, receivedPath); + + // Verify (deobfuscation) that the result has the correct file path + List resultsDto = JsonUtils.toScanFileResults(Collections.singletonList(result)); + assertFalse("Results should not be empty", resultsDto.isEmpty()); + + String resultPath = resultsDto.get(0).getFilePath(); + assertEquals("resultPath should be equal to the original file path", fileToScan, resultPath); + + log.info("Finished {} -->", methodName); + } + + /** + * Test that we can scan a list of files with obfuscation enabled using a mock server. + * This test focuses on the path obfuscation/deobfuscation cycle + */ + @Test + public void testScanFileListWithObfuscation() throws IOException { + String methodName = new Object() {}.getClass().getEnclosingMethod().getName(); + log.info("<-- Starting {}", methodName); + + String testDir = "src/test/java/com/scanoss"; + + List allFiles = collectFilePaths(testDir); + log.info("Found {} files in source directory", allFiles.size()); + + // Set to capture paths received by the server + final Set receivedPaths = ConcurrentHashMap.newKeySet(); + + // Configure the MockWebServer to return a 'no match' response + server.setDispatcher(createNoMatchDispatcher(receivedPaths)); + + Scanner scanner = Scanner.builder() + .obfuscate(true) + .url(server.url("/api/scan/direct").toString()) + .build(); + + List results = scanner.scanFileList(testDir, allFiles); + + // Verify we got scan results + assertNotNull("Should have scan results", results); + assertFalse("Should have non-empty results", results.isEmpty()); + log.info("Received {} scan results", results.size()); + + // Verify paths received by the server are obfuscated (not matching any source file paths) + assertFalse("Received paths should not be empty", receivedPaths.isEmpty()); + receivedPaths.forEach(receivedPath -> + assertFalse("Path should be obfuscated", allFiles.contains(receivedPath))); + + // Verify all original paths are in the results (deobfuscation check) + List resultsDto = JsonUtils.toScanFileResults(results); + resultsDto.forEach(r -> + assertTrue("Result should contain the original file path: " + r.getFilePath(), + allFiles.contains(r.getFilePath()))); + + log.info("Finished {} -->", methodName); + } + + /** + * Test that we can scan a folder with obfuscation enabled using a mock server. + * This test focuses on the path obfuscation/deobfuscation cycle + */ + @Test + public void testScanWithObfuscationCycle() throws IOException { + final String folderToScan = "src/test"; + + // Set to capture all paths received by the server + final Set receivedPaths = ConcurrentHashMap.newKeySet(); + + // Collect all files in the src/test folder before scanning + List allFiles = collectFilePaths(folderToScan); + log.info("Found {} files in source directory", allFiles.size()); + + // Configure the MockWebServer to return a 'no match' response + server.setDispatcher(createNoMatchDispatcher(receivedPaths)); + + Scanner scanner = Scanner.builder() + .obfuscate(true) + .url(server.url("/api/scan/direct").toString()) // Use our mock server + .build(); + + // Scan the files to test the full obfuscation/deobfuscation cycle + List results = scanner.scanFolder(folderToScan); + + // Verify we got scan results + assertNotNull("Should have scan results", results); + assertFalse("Should have result non empty", results.isEmpty()); + log.info("Received {} scan results", results.size()); + + // Verify paths received by the server are obfuscated (not matching any source file paths) + receivedPaths.forEach(receivedPath -> + assertFalse("Path should be obfuscated: " + receivedPath, allFiles.contains(receivedPath))); + + List resultsDto = JsonUtils.toScanFileResults(results); + // Verify (deobfuscation) that all results from scanFolder are valid file paths from our source directory + resultsDto.forEach(r -> + assertTrue("Result should be a valid source file path: " + r.getFilePath(), + allFiles.contains(r.getFilePath()))); + } } From 909098726e6cb936ab4f08bbf28ccb5c4ab4831b Mon Sep 17 00:00:00 2001 From: Agustin Isasmendi Date: Thu, 8 May 2025 17:11:10 +0200 Subject: [PATCH 4/5] feat(SP-2487): add obfuscate option to CLI --- src/main/java/com/scanoss/cli/ScanCommandLine.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/scanoss/cli/ScanCommandLine.java b/src/main/java/com/scanoss/cli/ScanCommandLine.java index b52dbb6..4a8a01a 100644 --- a/src/main/java/com/scanoss/cli/ScanCommandLine.java +++ b/src/main/java/com/scanoss/cli/ScanCommandLine.java @@ -93,6 +93,9 @@ class ScanCommandLine implements Runnable { @picocli.CommandLine.Option(names = {"--snippet-limit"}, description = "Length of single line snippet limit (0 for unlimited, default 1000)") private int snippetLimit = 1000; + @picocli.CommandLine.Option(names = {"--obfuscate"}, description = "Obfuscate fingerprints") + private boolean obfuscate; + @picocli.CommandLine.Option(names = {"--ca-cert"}, description = "Alternative certificate PEM file (optional)") private String caCert; @@ -165,7 +168,7 @@ public void run() { .hiddenFilesFolders(allHidden).numThreads(numThreads).url(apiUrl).apiKey(apiKey) .retryLimit(retryLimit).timeout(Duration.ofSeconds(timeoutLimit)).scanFlags(scanFlags) .sbomType(sbomType).sbom(sbom).snippetLimit(snippetLimit).customCert(caCertPem).proxy(proxy).hpsm(enableHpsm) - .settings(settings) + .settings(settings).obfuscate(obfuscate) .build(); File f = new File(fileFolder); From 31f08556f531f418f6c1593659451a385ca2165e Mon Sep 17 00:00:00 2001 From: Agustin Isasmendi Date: Thu, 22 May 2025 10:04:10 +0200 Subject: [PATCH 5/5] chore(SP-2487): add `javadoc` and `lint` to Makefile --- Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Makefile b/Makefile index 543ae98..d9bc88a 100644 --- a/Makefile +++ b/Makefile @@ -60,3 +60,11 @@ inc_major: ## Increment the major version on pom.xml version: ## Show the current version of the package @echo "Getting package version..." VER=$(shell ./mvnw help:evaluate -Dexpression=project.version -q -DforceStdout) + +javadoc: ## Run javadoc to check for documentation errors + @echo "Running javadoc check..." + ./mvnw javadoc:javadoc + +lint: ## Run checkstyle or other linting tools + @echo "Running lint checks..." + ./mvnw checkstyle:check \ No newline at end of file