Skip to content

Commit

Permalink
File name strategy (#217)
Browse files Browse the repository at this point in the history
* Edited the file long name strategy adjusted

* Modified the filename naming strategy for cache files

* Wrote Unit Test and also a bit changed java file

* Add Parameterized Tests

* Refactored CachingContentProviderBase

* Recfactored, setted Logik

* minor test impoprvements, added double underscores between parts of original url and the hash

* adopted user guide

---------

Co-authored-by: ohecker <8004361+ohecker@users.noreply.github.com>
  • Loading branch information
sauleh7 and ohecker authored Nov 16, 2023
1 parent ff07997 commit a89af38
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,16 @@
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Collection;
import java.util.Scanner;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;

import com.devonfw.tools.solicitor.common.SolicitorRuntimeException;
import com.devonfw.tools.solicitor.common.UrlInputStreamFactory;

/**
Expand All @@ -30,6 +33,8 @@ public abstract class CachingContentProviderBase<C extends Content> extends Abst

private ContentProvider<C> nextContentProvider;

static final int MAX_KEY_LENGTH = 250;

/**
* The Constructor.
*
Expand All @@ -40,7 +45,6 @@ public CachingContentProviderBase(ContentFactory<C> contentFactory, ContentProvi

super(contentFactory);
this.nextContentProvider = nextContentProvider;

}

/**
Expand All @@ -52,21 +56,51 @@ public CachingContentProviderBase(ContentFactory<C> contentFactory, ContentProvi
protected abstract Collection<String> getCacheUrls(String key);

/**
* Calculate the key for the given web content URL.
* Calculate the cache key for the given web content URL.
*
* @param url the URL of the web content
* @return the cache key
*/
public String getKey(String url) {

/**
* Normalize URL to http
*/
if (url.startsWith("https")) {
url = url.replace("https", "http");
}
String result = url.replaceAll("\\W", "_");
return result;
// Check if the filename length exceeds the maximum length
if (result.length() <= MAX_KEY_LENGTH) {
return result; // If it's within the limit, use it as is.
} else {
// If the filename length is too long, create a modified filename.
String prefix = result.substring(0, 40);
String suffix = result.substring(result.length() - 40);

// Calculate a hash value of the original filename (e.g., using SHA-256)
String hash = generateHash(result);
return prefix + "__" + hash + "__" + suffix;
}
}

/**
* Generates a SHA-256 hash of the input string.
*
* @param input The input string to be hashed.
* @return A hexadecimal string representation of the SHA-256 hash.
*
*/
private String generateHash(String input) {

try {
MessageDigest digest = MessageDigest.getInstance("SHA-256");
byte[] hash = digest.digest(input.getBytes());
StringBuilder hexString = new StringBuilder();
for (byte b : hash) {
hexString.append(String.format("%02x", b));
}
return hexString.toString();
} catch (NoSuchAlgorithmException e) {
throw new SolicitorRuntimeException("SHA-256 hashing algorithm not available.", e);
}
}

/**
Expand Down Expand Up @@ -111,4 +145,4 @@ protected C loadFromNext(String url) {
return this.nextContentProvider.getContentForUri(url);
}

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package com.devonfw.tools.solicitor.common.content;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.util.Collection;

import org.junit.jupiter.api.Test;

/**
* Unit test for the {@link CachingContentProviderBase#getKey(String)} method.
*/
class CachingContentProviderBaseTest {

// Dummy implementation of CachingContentProviderBase for testing
static class TestCachingContentProvider extends CachingContentProviderBase<Content> {
public TestCachingContentProvider() {

super(null, null); // ContentFactory and ContentProvider not needed for this test
}

@Override
protected Collection<String> getCacheUrls(String key) {

return null;
}
}

@Test
void shouldGenerateCorrectKeyForUrlOfLength250() {

TestCachingContentProvider cachingContentProvider = new TestCachingContentProvider();

// Create a URL of length 250 (classical logic should be used)
String longUrl250 = "http://example.com/clear/and/concise/url/for/testing/purposes/with/exactly/250/characters/in/total/including/letters/numbers/special/characters/as/appropriate/for/clarity/this/is/a/very/long/url/the/maximum/filename/length/just/to/reach/to/length/250";
String longResult250 = cachingContentProvider.getKey(longUrl250);
assertTrue(longResult250.length() == 250, // length should be unchanged
"Modified filename length exceeds the maximum for URL of length 250");
}

@Test
void shouldGenerateCorrectKeyForUrlOfLength251() {

TestCachingContentProvider cachingContentProvider = new TestCachingContentProvider();

// Create a URL of length 251 (new approach should be used)
String longUrl251 = "http://example.com/clear/and/concise/url/for/testing/purposes/with/exactly/251/characters/in/total/including/letters/numbers/special/characters/as/appropriate/for/clarity/this/is/a/very/long/url/the/maximum/filenames/length/just/to/reach/to/length/251";
String longResult251 = cachingContentProvider.getKey(longUrl251);
assertEquals(40 + 2 + 64 + 2 + 40, longResult251.length(),
"Modified filename length is incorrect for URL of length 251");
}

}
10 changes: 9 additions & 1 deletion documentation/master-solicitor.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -1261,7 +1261,14 @@ The determined content is available as `NormalizedLicense.effectiveNormalizedLic
=== Encoding of URLs
When creating the resource or filename for given URLs in the above steps the following encoding scheme will be applied to ensure that always a valid name can be created:

All "non-word" characters (i.e. characters outside the set `[a-zA-Z_0-9]`) are replaced by underscores ("`_`").
* If the scheme is `https` it will be replaced with `http`.
* All "non-word" characters (i.e. characters outside the set `[a-zA-Z_0-9]`) are replaced by underscores ("`_`").
* In case that the resulting filename exceeds a length of 250 it will be replaced by a new name concatenated from
** the first 40 characters of the (too) long filename
** two underscores
** a sha256 (hex encoded) of the (too) long filename
** two underscores
** the last 40 characters of the (too) long filename

== Guessing of license URLs
Fetching the license content `NormalizedLicense.effectiveNormalizedLicenseContent` based on the URL in `NormalizedLicense.effectiveNormalizedLicenseUrl` will often result in content which is in HTML format instead of plain text and is not properly rendered when included in reports.
Expand Down Expand Up @@ -1615,6 +1622,7 @@ Spring beans implementing this interface will be called at certain points in the
== Release Notes
Changes in 1.16.0::
* https://github.com/devonfw/solicitor/pull/212: Improvement in determining License-URL within NpmLicenseCheckerReader.
* https://github.com/devonfw/solicitor/issues/213: Avoid (too) long filenames when caching license texts oder licenseurls. See <<Encoding of URLs>>.

Changes in 1.15.0::
* https://github.com/devonfw/solicitor/issues/208: Add two new lifecycle methods to `SolicitorLifecycleListener`.
Expand Down

0 comments on commit a89af38

Please sign in to comment.