diff --git a/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs b/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs index 59c5ef4d2..819dd247e 100644 --- a/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs +++ b/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs @@ -158,7 +158,7 @@ public async Task GetProjectAsync(PipDependencySpecification spec using var r = new PypiRetryTelemetryRecord { Name = spec.Name, DependencySpecifiers = spec.DependencySpecifiers?.ToArray(), StatusCode = result.Result.StatusCode }; this.logger.LogWarning( - "Received {StatusCode} {ReasonPhrase} from {RequestUri}. Waiting {TimeSpan} before retry attempt {RetryCount}", + "Received status:{StatusCode} with reason:{ReasonPhrase} from {RequestUri}. Waiting {TimeSpan} before retry attempt {RetryCount}", result.Result.StatusCode, result.Result.ReasonPhrase, requestUri, @@ -190,7 +190,7 @@ public async Task GetProjectAsync(PipDependencySpecification spec { using var r = new PypiFailureTelemetryRecord { Name = spec.Name, DependencySpecifiers = spec.DependencySpecifiers?.ToArray(), StatusCode = request.StatusCode }; - this.logger.LogWarning("Received {StatusCode} {ReasonPhrase} from {RequestUri}", request.StatusCode, request.ReasonPhrase, requestUri); + this.logger.LogWarning("Received status:{StatusCode} with reason:{ReasonPhrase} from {RequestUri}", request.StatusCode, request.ReasonPhrase, requestUri); return new PythonProject(); } @@ -212,7 +212,7 @@ public async Task GetProjectAsync(PipDependencySpecification spec parsedVersion.Valid && parsedVersion.IsReleasedPackage && PythonVersionUtilities.VersionValidForSpec(release.Key, spec.DependencySpecifiers)) { - versions.Releases.Add(release.Key, release.Value); + versions.Releases[release.Key] = release.Value; } } catch (ArgumentException ae) diff --git a/src/Microsoft.ComponentDetection.Detectors/pip/PythonResolver.cs b/src/Microsoft.ComponentDetection.Detectors/pip/PythonResolver.cs index bf13fd01a..de7abf77b 100644 --- a/src/Microsoft.ComponentDetection.Detectors/pip/PythonResolver.cs +++ b/src/Microsoft.ComponentDetection.Detectors/pip/PythonResolver.cs @@ -7,8 +7,10 @@ namespace Microsoft.ComponentDetection.Detectors.Pip; using Microsoft.ComponentDetection.Contracts; using Microsoft.ComponentDetection.Contracts.TypedComponent; using Microsoft.Extensions.Logging; +using MoreLinq; +using Newtonsoft.Json; -public class PythonResolver : IPythonResolver +public class PythonResolver : PythonResolverBase, IPythonResolver { private readonly IPyPiClient pypiClient; private readonly ILogger logger; @@ -18,6 +20,7 @@ public class PythonResolver : IPythonResolver private readonly string classifierFieldLicensePrefix = "License"; public PythonResolver(IPyPiClient pypiClient, ILogger logger) + : base(logger) { this.pypiClient = pypiClient; this.logger = logger; @@ -43,7 +46,7 @@ public async Task> ResolveRootsAsync(ISingleFileComponentRec var result = project.Releases; - if (result.Keys.Any()) + if (result is not null && result.Keys.Any()) { state.ValidVersionMap[rootPackage.Name] = result; @@ -62,8 +65,9 @@ public async Task> ResolveRootsAsync(ISingleFileComponentRec else { this.logger.LogWarning( - "Root dependency {RootPackageName} not found on pypi. Skipping package.", - rootPackage.Name); + "Unable to resolve root dependency {PackageName} with version specifiers {PackageVersions} from pypi possibly due to computed version constraints. Skipping package.", + rootPackage.Name, + JsonConvert.SerializeObject(rootPackage.DependencySpecifiers)); singleFileComponentRecorder.RegisterPackageParseFailure(rootPackage.Name); } } @@ -113,7 +117,7 @@ private async Task> ProcessQueueAsync(ISingleFileComponentRe var result = project.Releases; - if (result.Keys.Any()) + if (result is not null && result.Keys.Any()) { state.ValidVersionMap[dependencyNode.Name] = result; var candidateVersion = state.ValidVersionMap[dependencyNode.Name].Keys.Any() @@ -126,8 +130,9 @@ private async Task> ProcessQueueAsync(ISingleFileComponentRe else { this.logger.LogWarning( - "Dependency Package {DependencyName} not found in Pypi. Skipping package", - dependencyNode.Name); + "Unable to resolve non-root dependency {PackageName} with version specifiers {PackageVersions} from pypi possibly due to computed version constraints. Skipping package.", + dependencyNode.Name, + JsonConvert.SerializeObject(dependencyNode.DependencySpecifiers)); singleFileComponentRecorder.RegisterPackageParseFailure(dependencyNode.Name); } } @@ -137,63 +142,7 @@ private async Task> ProcessQueueAsync(ISingleFileComponentRe return state.Roots; } - private async Task InvalidateAndReprocessAsync( - PythonResolverState state, - PipGraphNode node, - PipDependencySpecification newSpec) - { - var pipComponent = node.Value; - - var oldVersions = state.ValidVersionMap[pipComponent.Name].Keys.ToList(); - var currentSelectedVersion = node.Value.Version; - var currentReleases = state.ValidVersionMap[pipComponent.Name][currentSelectedVersion]; - foreach (var version in oldVersions) - { - if (!PythonVersionUtilities.VersionValidForSpec(version, newSpec.DependencySpecifiers)) - { - state.ValidVersionMap[pipComponent.Name].Remove(version); - } - } - - if (state.ValidVersionMap[pipComponent.Name].Count == 0) - { - state.ValidVersionMap[pipComponent.Name][currentSelectedVersion] = currentReleases; - return false; - } - - var candidateVersion = state.ValidVersionMap[pipComponent.Name].Keys.Any() ? state.ValidVersionMap[pipComponent.Name].Keys.Last() : null; - - node.Value = new PipComponent(pipComponent.Name, candidateVersion, license: pipComponent.License, author: pipComponent.Author); - - var dependencies = (await this.FetchPackageDependenciesAsync(state, newSpec)).ToDictionary(x => x.Name, x => x); - - var toRemove = new List(); - foreach (var child in node.Children) - { - var pipChild = child.Value; - - if (!dependencies.TryGetValue(pipChild.Name, out var newDependency)) - { - toRemove.Add(child); - } - else if (!PythonVersionUtilities.VersionValidForSpec(pipChild.Version, newDependency.DependencySpecifiers)) - { - if (!await this.InvalidateAndReprocessAsync(state, child, newDependency)) - { - return false; - } - } - } - - foreach (var remove in toRemove) - { - node.Children.Remove(remove); - } - - return true; - } - - private async Task> FetchPackageDependenciesAsync( + protected override async Task> FetchPackageDependenciesAsync( PythonResolverState state, PipDependencySpecification spec) { diff --git a/src/Microsoft.ComponentDetection.Detectors/pip/PythonResolverBase.cs b/src/Microsoft.ComponentDetection.Detectors/pip/PythonResolverBase.cs new file mode 100644 index 000000000..e501d8ff2 --- /dev/null +++ b/src/Microsoft.ComponentDetection.Detectors/pip/PythonResolverBase.cs @@ -0,0 +1,107 @@ +namespace Microsoft.ComponentDetection.Detectors.Pip; + +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +using Microsoft.ComponentDetection.Contracts.TypedComponent; +using Microsoft.Extensions.Logging; +using MoreLinq; +using Newtonsoft.Json; + +public abstract class PythonResolverBase +{ + private readonly ILogger logger; + + internal PythonResolverBase(ILogger logger) => this.logger = logger; + + /// + /// Given a state, node, and new spec, will reprocess a new valid version for the node. + /// + /// The PythonResolverState. + /// The PipGraphNode. + /// The PipDependencySpecification. + /// Returns true if the node can be reprocessed else false. + protected async Task InvalidateAndReprocessAsync( + PythonResolverState state, + PipGraphNode node, + PipDependencySpecification newSpec) + { + var pipComponent = node.Value; + + var oldVersions = state.ValidVersionMap[pipComponent.Name].Keys.ToList(); + var currentSelectedVersion = node.Value.Version; + var currentReleases = state.ValidVersionMap[pipComponent.Name][currentSelectedVersion]; + foreach (var version in oldVersions.Where(version => !PythonVersionUtilities.VersionValidForSpec(version, newSpec.DependencySpecifiers))) + { + state.ValidVersionMap[pipComponent.Name].Remove(version); + } + + if (state.ValidVersionMap[pipComponent.Name].Count == 0) + { + state.ValidVersionMap[pipComponent.Name][currentSelectedVersion] = currentReleases; + return false; + } + + var candidateVersion = state.ValidVersionMap[pipComponent.Name].Keys.Any() ? state.ValidVersionMap[pipComponent.Name].Keys.Last() : null; + + node.Value = new PipComponent(pipComponent.Name, candidateVersion, author: pipComponent.Author, license: pipComponent.License); + + var fetchedDependences = await this.FetchPackageDependenciesAsync(state, newSpec); + var dependencies = this.ResolveDependencySpecifications(pipComponent, fetchedDependences); + + var toRemove = new List(); + foreach (var child in node.Children) + { + var pipChild = child.Value; + + if (!dependencies.TryGetValue(pipChild.Name, out var newDependency)) + { + toRemove.Add(child); + } + else if (!PythonVersionUtilities.VersionValidForSpec(pipChild.Version, newDependency.DependencySpecifiers)) + { + if (!await this.InvalidateAndReprocessAsync(state, child, newDependency)) + { + return false; + } + } + } + + foreach (var remove in toRemove) + { + node.Children.Remove(remove); + } + + return true; + } + + /// + /// Multiple dependency specification versions can be given for a single package name. + /// Until a better method is devised, choose the latest entry. + /// See https://github.com/microsoft/component-detection/issues/963. + /// + /// Dictionary of package names to dependency version specifiers. + public Dictionary ResolveDependencySpecifications(PipComponent component, IList fetchedDependences) + { + var dependencies = new Dictionary(); + fetchedDependences.ForEach(d => + { + if (!dependencies.TryAdd(d.Name, d)) + { + this.logger.LogWarning( + "Duplicate package dependencies entry for component:{ComponentName} with dependency:{DependencyName}. Existing dependency specifiers: {ExistingSpecifiers}. New dependency specifiers: {NewSpecifiers}.", + component.Name, + d.Name, + JsonConvert.SerializeObject(dependencies[d.Name].DependencySpecifiers), + JsonConvert.SerializeObject(d.DependencySpecifiers)); + dependencies[d.Name] = d; + } + }); + + return dependencies; + } + + protected abstract Task> FetchPackageDependenciesAsync( + PythonResolverState state, + PipDependencySpecification spec); +} diff --git a/src/Microsoft.ComponentDetection.Detectors/pip/SimplePythonResolver.cs b/src/Microsoft.ComponentDetection.Detectors/pip/SimplePythonResolver.cs index d70241c5f..71fa03c89 100644 --- a/src/Microsoft.ComponentDetection.Detectors/pip/SimplePythonResolver.cs +++ b/src/Microsoft.ComponentDetection.Detectors/pip/SimplePythonResolver.cs @@ -10,9 +10,10 @@ namespace Microsoft.ComponentDetection.Detectors.Pip; using Microsoft.ComponentDetection.Contracts; using Microsoft.ComponentDetection.Contracts.TypedComponent; using Microsoft.Extensions.Logging; +using MoreLinq; using Newtonsoft.Json; -public class SimplePythonResolver : ISimplePythonResolver +public class SimplePythonResolver : PythonResolverBase, ISimplePythonResolver { private static readonly Regex VersionRegex = new(@"-((\d+)((\.)\w+((\+|\.)\w*)*)*)(.tar|-)", RegexOptions.Compiled); @@ -25,6 +26,7 @@ public class SimplePythonResolver : ISimplePythonResolver /// The simple PyPi client. /// The logger. public SimplePythonResolver(ISimplePyPiClient simplePypiClient, ILogger logger) + : base(logger) { this.simplePypiClient = simplePypiClient; this.logger = logger; @@ -136,8 +138,9 @@ await Parallel.ForEachAsync(initialPackages, async (rootPackage, ct) => else { this.logger.LogWarning( - "Unable to resolve package: {RootPackageName} gotten from pypi possibly due to invalid versions. Skipping package.", - rootPackage.Name); + "Unable to resolve root dependency {PackageName} with version specifiers {PackageVersions} from pypi possibly due to computed version constraints. Skipping package.", + rootPackage.Name, + JsonConvert.SerializeObject(rootPackage.DependencySpecifiers)); singleFileComponentRecorder.RegisterPackageParseFailure(rootPackage.Name); } } @@ -207,8 +210,9 @@ private async Task> ProcessQueueAsync(ISingleFileComponentRe else { this.logger.LogWarning( - "Unable to resolve dependency package {DependencyName} gotten from pypi possibly due to invalid versions. Skipping package", - dependencyNode.Name); + "Unable to resolve non-root dependency {PackageName} with version specifiers {PackageVersions} from pypi possibly due to computed version constraints. Skipping package.", + dependencyNode.Name, + JsonConvert.SerializeObject(dependencyNode.DependencySpecifiers)); singleFileComponentRecorder.RegisterPackageParseFailure(dependencyNode.Name); } } @@ -227,6 +231,11 @@ private async Task> ProcessQueueAsync(ISingleFileComponentRe private SortedDictionary> ConvertSimplePypiProjectToSortedDictionary(SimplePypiProject simplePypiProject, PipDependencySpecification spec) { var sortedProjectVersions = new SortedDictionary>(new PythonVersionComparer()); + if (simplePypiProject is null) + { + return sortedProjectVersions; + } + foreach (var file in simplePypiProject.Files) { try @@ -267,7 +276,7 @@ private SortedDictionary> ConvertSimplePypiP /// The PythonResolverState. /// The PipDependencySpecification. /// Returns a list of PipDependencySpecification. - private async Task> FetchPackageDependenciesAsync( + protected override async Task> FetchPackageDependenciesAsync( PythonResolverState state, PipDependencySpecification spec) { @@ -333,64 +342,4 @@ private async Task> FetchDependenciesFromPacka return dependencies; } - - /// - /// Given a state, node, and new spec, will reprocess a new valid version for the node. - /// - /// The PythonResolverState. - /// The PipGraphNode. - /// The PipDependencySpecification. - /// Returns true if the node can be reprocessed else false. - private async Task InvalidateAndReprocessAsync( - PythonResolverState state, - PipGraphNode node, - PipDependencySpecification newSpec) - { - var pipComponent = node.Value; - - var oldVersions = state.ValidVersionMap[pipComponent.Name].Keys.ToList(); - var currentSelectedVersion = node.Value.Version; - var currentReleases = state.ValidVersionMap[pipComponent.Name][currentSelectedVersion]; - foreach (var version in oldVersions.Where(version => !PythonVersionUtilities.VersionValidForSpec(version, newSpec.DependencySpecifiers))) - { - state.ValidVersionMap[pipComponent.Name].Remove(version); - } - - if (state.ValidVersionMap[pipComponent.Name].Count == 0) - { - state.ValidVersionMap[pipComponent.Name][currentSelectedVersion] = currentReleases; - return false; - } - - var candidateVersion = state.ValidVersionMap[pipComponent.Name].Keys.Any() ? state.ValidVersionMap[pipComponent.Name].Keys.Last() : null; - - node.Value = new PipComponent(pipComponent.Name, candidateVersion); - - var dependencies = (await this.FetchPackageDependenciesAsync(state, newSpec)).ToDictionary(x => x.Name, x => x); - - var toRemove = new List(); - foreach (var child in node.Children) - { - var pipChild = child.Value; - - if (!dependencies.TryGetValue(pipChild.Name, out var newDependency)) - { - toRemove.Add(child); - } - else if (!PythonVersionUtilities.VersionValidForSpec(pipChild.Version, newDependency.DependencySpecifiers)) - { - if (!await this.InvalidateAndReprocessAsync(state, child, newDependency)) - { - return false; - } - } - } - - foreach (var remove in toRemove) - { - node.Children.Remove(remove); - } - - return true; - } } diff --git a/test/Microsoft.ComponentDetection.Detectors.Tests/PipResolverTests.cs b/test/Microsoft.ComponentDetection.Detectors.Tests/PipResolverTests.cs index ca8760e2f..4276f663f 100644 --- a/test/Microsoft.ComponentDetection.Detectors.Tests/PipResolverTests.cs +++ b/test/Microsoft.ComponentDetection.Detectors.Tests/PipResolverTests.cs @@ -167,6 +167,80 @@ public async Task TestPipResolverNonExistantRootAsync() this.CompareGraphs(resolveResult.First(), expectedA).Should().BeTrue(); } + [TestMethod] + public async Task TestPipResolverInvalidSpecAsync() + { + var a = new PipDependencySpecification("a==1.0"); + var b = new PipDependencySpecification("b==1.0"); + var c = new PipDependencySpecification("c==1.0"); + var doesNotExist = new PipDependencySpecification("dne==1.0"); + + var versions = new List { "1.0" }; + + var aReleases = this.CreateReleasesDictionary(versions); + var bReleases = this.CreateReleasesDictionary(versions); + var cReleases = this.CreateReleasesDictionary(versions); + + var aProject = new PythonProject + { + Releases = aReleases, + Info = new PythonProjectInfo + { + MaintainerEmail = "Microsoft", + }, + }; + + var bProject = new PythonProject + { + Releases = bReleases, + }; + + var cProject = new PythonProject + { + Releases = cReleases, + }; + + var dneProject = new PythonProject + { + Releases = new SortedDictionary>(), + }; + + this.pyPiClient.Setup(x => x.GetProjectAsync(a)).ReturnsAsync(aProject); + this.pyPiClient.Setup(x => x.GetProjectAsync(b)).ReturnsAsync(bProject); + this.pyPiClient.Setup(x => x.GetProjectAsync(c)).ReturnsAsync(cProject); + this.pyPiClient.Setup(x => x.GetProjectAsync(doesNotExist)).ReturnsAsync(dneProject); + + this.pyPiClient.Setup(x => x.FetchPackageDependenciesAsync("a", "1.0", aReleases["1.0"].First())).ReturnsAsync(new List { b }); + this.pyPiClient.Setup(x => x.FetchPackageDependenciesAsync("b", "1.0", bReleases["1.0"].First())).ReturnsAsync(new List { c }); + this.pyPiClient.Setup(x => x.FetchPackageDependenciesAsync("c", "1.0", cReleases["1.0"].First())).ReturnsAsync(new List { }); + + var dependencies = new List { a, doesNotExist }; + + var resolver = new PythonResolver(this.pyPiClient.Object, this.loggerMock.Object); + + var resolveResult = await resolver.ResolveRootsAsync(this.recorderMock.Object, dependencies); + + resolveResult.Should().NotBeNull(); + + var expectedA = new PipGraphNode(new PipComponent("a", "1.0", "Microsoft", null)); + var expectedB = new PipGraphNode(new PipComponent("b", "1.0")); + var expectedC = new PipGraphNode(new PipComponent("c", "1.0")); + + expectedA.Children.Add(expectedB); + expectedB.Parents.Add(expectedA); + expectedB.Children.Add(expectedC); + expectedC.Parents.Add(expectedB); + + this.CompareGraphs(resolveResult.First(), expectedA).Should().BeTrue(); + + this.loggerMock.Verify(x => x.Log( + LogLevel.Warning, + It.IsAny(), + It.Is((o, t) => string.Equals("Unable to resolve root dependency dne with version specifiers [\"==1.0\"] from pypi possibly due to computed version constraints. Skipping package.", o.ToString(), StringComparison.Ordinal)), + It.IsAny(), + (Func)It.IsAny())); + } + [TestMethod] public async Task TestPipResolverNonExistantLeafAsync() { diff --git a/test/Microsoft.ComponentDetection.Detectors.Tests/SimplePythonResolverTests.cs b/test/Microsoft.ComponentDetection.Detectors.Tests/SimplePythonResolverTests.cs index 20c39ff5a..bf6c5c48d 100644 --- a/test/Microsoft.ComponentDetection.Detectors.Tests/SimplePythonResolverTests.cs +++ b/test/Microsoft.ComponentDetection.Detectors.Tests/SimplePythonResolverTests.cs @@ -207,6 +207,55 @@ public async Task TestPipResolverBacktrackAsync() this.simplePyPiClient.Verify(x => x.FetchPackageFileStreamAsync(It.IsAny()), Times.Exactly(4)); } + [TestMethod] + public async Task TestPipResolverInvalidVersionSpecAsync() + { + var a = "a==1.0"; + var b = "b==1.0"; + var c = "c<=1.1"; + var cAlt = "c==1.0"; + + var specA = new PipDependencySpecification(a); + var specB = new PipDependencySpecification(b); + var specC = new PipDependencySpecification(c); + var specCAlt = new PipDependencySpecification(cAlt); + + var aReleases = this.CreateSimplePypiProject(new List<(string, string)> { ("1.0", "bdist_wheel") }); + var bReleases = this.CreateSimplePypiProject(new List<(string, string)> { ("1.0", "bdist_wheel") }); + var cReleases = this.CreateSimplePypiProject(new List<(string, string)> { ("1.2", "bdist_wheel") }); + + this.simplePyPiClient.Setup(x => x.GetSimplePypiProjectAsync(It.Is(x => x.Name.Equals("a")))).ReturnsAsync(aReleases); + this.simplePyPiClient.Setup(x => x.GetSimplePypiProjectAsync(It.Is(x => x.Name.Equals("b")))).ReturnsAsync(bReleases); + this.simplePyPiClient.Setup(x => x.GetSimplePypiProjectAsync(It.Is(x => x.Name.Equals("c") && x.DependencySpecifiers.First().Equals("<=1.1")))).ReturnsAsync(cReleases); + + this.simplePyPiClient.Setup(x => x.FetchPackageFileStreamAsync(aReleases.Files.First().Url)).ReturnsAsync(this.CreatePypiZip("a", "1.0", this.CreateMetadataString(new List() { b, c }))); + this.simplePyPiClient.Setup(x => x.FetchPackageFileStreamAsync(bReleases.Files.First().Url)).ReturnsAsync(this.CreatePypiZip("b", "1.0", this.CreateMetadataString(new List() { cAlt }))); + + var dependencies = new List { specA }; + + var resolver = new SimplePythonResolver(this.simplePyPiClient.Object, this.loggerMock.Object); + + var resolveResult = await resolver.ResolveRootsAsync(this.recorderMock.Object, dependencies); + + resolveResult.Should().NotBeNull(); + + var expectedA = new PipGraphNode(new PipComponent("a", "1.0")); + var expectedB = new PipGraphNode(new PipComponent("b", "1.0")); + + expectedA.Children.Add(expectedB); + expectedB.Parents.Add(expectedA); + + this.CompareGraphs(resolveResult.First(), expectedA).Should().BeTrue(); + this.simplePyPiClient.Verify(x => x.FetchPackageFileStreamAsync(It.IsAny()), Times.Exactly(2)); + + this.loggerMock.Verify(x => x.Log( + LogLevel.Warning, + It.IsAny(), + It.Is((o, t) => string.Equals("Unable to resolve non-root dependency c with version specifiers [\"<=1.1\"] from pypi possibly due to computed version constraints. Skipping package.", o.ToString(), StringComparison.Ordinal)), + It.IsAny(), + (Func)It.IsAny())); + } + [TestMethod] public async Task TestPipResolverVersionExtractionWithDifferentVersionFormatsAsync() {