Skip to content

Commit

Permalink
Catch version spec failures in Python detection (#1006)
Browse files Browse the repository at this point in the history
* Catch version spec failures in Python detection

* Bump versions

---------

Co-authored-by: Coby Allred <coallred@microsoft.com>
  • Loading branch information
cobya and Coby Allred authored Feb 9, 2024
1 parent 349ef7a commit ca32600
Show file tree
Hide file tree
Showing 6 changed files with 238 additions and 87 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public PipComponentDetector(

public override IEnumerable<ComponentType> SupportedComponentTypes { get; } = new[] { ComponentType.Pip };

public override int Version { get; } = 6;
public override int Version { get; } = 7;

protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs)
{
Expand Down
88 changes: 49 additions & 39 deletions src/Microsoft.ComponentDetection.Detectors/pip/PythonResolver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -88,54 +88,64 @@ private async Task<IList<PipGraphNode>> ProcessQueueAsync(ISingleFileComponentRe

foreach (var dependencyNode in dependencies)
{
// if we have already seen the dependency and the version we have is valid, just add the dependency to the graph
if (state.NodeReferences.TryGetValue(dependencyNode.Name, out var node) &&
PythonVersionUtilities.VersionValidForSpec(node.Value.Version, dependencyNode.DependencySpecifiers))
try
{
state.NodeReferences[currentNode.Name].Children.Add(node);
node.Parents.Add(state.NodeReferences[currentNode.Name]);
}
else if (node != null)
{
this.logger.LogWarning("Candidate version ({NodeValueId}) for {DependencyName} already exists in map and the version is NOT valid.", node.Value.Id, dependencyNode.Name);
this.logger.LogWarning("Specifiers: {DependencySpecifiers} for package {CurrentNodeName} caused this.", string.Join(',', dependencyNode.DependencySpecifiers), currentNode.Name);

// The currently selected version is invalid, try to see if there is another valid version available
if (!await this.InvalidateAndReprocessAsync(state, node, dependencyNode))
// if we have already seen the dependency and the version we have is valid, just add the dependency to the graph
if (state.NodeReferences.TryGetValue(dependencyNode.Name, out var node) &&
PythonVersionUtilities.VersionValidForSpec(node.Value.Version, dependencyNode.DependencySpecifiers))
{
this.logger.LogWarning(
"Version Resolution for {DependencyName} failed, assuming last valid version is used.",
dependencyNode.Name);

// there is no valid version available for the node, dependencies are incompatible,
state.NodeReferences[currentNode.Name].Children.Add(node);
node.Parents.Add(state.NodeReferences[currentNode.Name]);
}
}
else
{
// We haven't encountered this package before, so let's fetch it and find a candidate
var project = await this.pypiClient.GetProjectAsync(dependencyNode);

var result = project.Releases;

if (result is not null && result.Keys.Any())
else if (node != null)
{
state.ValidVersionMap[dependencyNode.Name] = result;
var candidateVersion = state.ValidVersionMap[dependencyNode.Name].Keys.Any()
? state.ValidVersionMap[dependencyNode.Name].Keys.Last() : null;

this.AddGraphNode(state, state.NodeReferences[currentNode.Name], dependencyNode.Name, candidateVersion, license: this.GetLicenseFromProject(project), author: this.GetSupplierFromProject(project));

state.ProcessingQueue.Enqueue((root, dependencyNode));
this.logger.LogWarning("Candidate version ({NodeValueId}) for {DependencyName} already exists in map and the version is NOT valid.", node.Value.Id, dependencyNode.Name);
this.logger.LogWarning("Specifiers: {DependencySpecifiers} for package {CurrentNodeName} caused this.", string.Join(',', dependencyNode.DependencySpecifiers), currentNode.Name);

// The currently selected version is invalid, try to see if there is another valid version available
if (!await this.InvalidateAndReprocessAsync(state, node, dependencyNode))
{
this.logger.LogWarning(
"Version Resolution for {DependencyName} failed, assuming last valid version is used.",
dependencyNode.Name);

// there is no valid version available for the node, dependencies are incompatible,
}
}
else
{
this.logger.LogWarning(
"Unable to resolve non-root dependency {PackageName} with version specifiers {PackageVersions} from pypi possibly due to computed version constraints. Skipping package.",
dependencyNode.Name,
JsonConvert.SerializeObject(dependencyNode.DependencySpecifiers));
singleFileComponentRecorder.RegisterPackageParseFailure(dependencyNode.Name);
// We haven't encountered this package before, so let's fetch it and find a candidate
var project = await this.pypiClient.GetProjectAsync(dependencyNode);

var result = project.Releases;

if (result is not null && result.Keys.Any())
{
state.ValidVersionMap[dependencyNode.Name] = result;
var candidateVersion = state.ValidVersionMap[dependencyNode.Name].Keys.Any()
? state.ValidVersionMap[dependencyNode.Name].Keys.Last() : null;

this.AddGraphNode(state, state.NodeReferences[currentNode.Name], dependencyNode.Name, candidateVersion, license: this.GetLicenseFromProject(project), author: this.GetSupplierFromProject(project));

state.ProcessingQueue.Enqueue((root, dependencyNode));
}
else
{
this.logger.LogWarning(
"Unable to resolve non-root dependency {PackageName} with version specifiers {PackageVersions} from pypi possibly due to computed version constraints. Skipping package.",
dependencyNode.Name,
JsonConvert.SerializeObject(dependencyNode.DependencySpecifiers));
singleFileComponentRecorder.RegisterPackageParseFailure(dependencyNode.Name);
}
}
}
catch (ArgumentException ae)
{
// If version specifier parsing fails, don't attempt to reprocess because it would fail also.
// Log a package failure warning and continue.
this.logger.LogWarning("Failure resolving Python package {DependencyName} with message: {ExMessage}.", dependencyNode.Name, ae.Message);
singleFileComponentRecorder.RegisterPackageParseFailure(dependencyNode.Name);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public SimplePipComponentDetector(

public override IEnumerable<ComponentType> SupportedComponentTypes { get; } = new[] { ComponentType.Pip };

public override int Version { get; } = 1;
public override int Version { get; } = 2;

protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs)
{
Expand Down
102 changes: 56 additions & 46 deletions src/Microsoft.ComponentDetection.Detectors/pip/SimplePythonResolver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -161,61 +161,71 @@ private async Task<IList<PipGraphNode>> ProcessQueueAsync(ISingleFileComponentRe

foreach (var dependencyNode in dependencies)
{
// if we have already seen the dependency and the version we have is valid, just add the dependency to the graph
if (state.NodeReferences.TryGetValue(dependencyNode.Name, out var node) &&
PythonVersionUtilities.VersionValidForSpec(node.Value.Version, dependencyNode.DependencySpecifiers))
try
{
state.NodeReferences[currentNode.Name].Children.Add(node);
node.Parents.Add(state.NodeReferences[currentNode.Name]);
}
else if (node != null)
{
this.logger.LogWarning("Candidate version ({NodeValueId}) for {DependencyName} already exists in map and the version is NOT valid.", node.Value.Id, dependencyNode.Name);
this.logger.LogWarning("Specifiers: {DependencySpecifiers} for package {CurrentNodeName} caused this.", string.Join(',', dependencyNode.DependencySpecifiers), currentNode.Name);

// The currently selected version is invalid, try to see if there is another valid version available
if (!await this.InvalidateAndReprocessAsync(state, node, dependencyNode))
// if we have already seen the dependency and the version we have is valid, just add the dependency to the graph
if (state.NodeReferences.TryGetValue(dependencyNode.Name, out var node) &&
PythonVersionUtilities.VersionValidForSpec(node.Value.Version, dependencyNode.DependencySpecifiers))
{
this.logger.LogWarning(
"Version Resolution for {DependencyName} failed, assuming last valid version is used.",
dependencyNode.Name);

// there is no valid version available for the node, dependencies are incompatible,
state.NodeReferences[currentNode.Name].Children.Add(node);
node.Parents.Add(state.NodeReferences[currentNode.Name]);
}
}
else
{
// We haven't encountered this package before, so let's fetch it and find a candidate
var newProject = await this.simplePypiClient.GetSimplePypiProjectAsync(dependencyNode);

if (newProject == null || !newProject.Files.Any())
else if (node != null)
{
this.logger.LogWarning(
"Dependency Package {DependencyName} not found in Pypi. Skipping package",
dependencyNode.Name);
singleFileComponentRecorder.RegisterPackageParseFailure(dependencyNode.Name);
}

var result = this.ConvertSimplePypiProjectToSortedDictionary(newProject, dependencyNode);
if (result.Keys.Any())
{
state.ValidVersionMap[dependencyNode.Name] = result;
var candidateVersion = state.ValidVersionMap[dependencyNode.Name].Keys.Any()
? state.ValidVersionMap[dependencyNode.Name].Keys.Last() : null;

AddGraphNode(state, state.NodeReferences[currentNode.Name], dependencyNode.Name, candidateVersion);

state.ProcessingQueue.Enqueue((root, dependencyNode));
this.logger.LogWarning("Candidate version ({NodeValueId}) for {DependencyName} already exists in map and the version is NOT valid.", node.Value.Id, dependencyNode.Name);
this.logger.LogWarning("Specifiers: {DependencySpecifiers} for package {CurrentNodeName} caused this.", string.Join(',', dependencyNode.DependencySpecifiers), currentNode.Name);

// The currently selected version is invalid, try to see if there is another valid version available
if (!await this.InvalidateAndReprocessAsync(state, node, dependencyNode))
{
this.logger.LogWarning(
"Version Resolution for {DependencyName} failed, assuming last valid version is used.",
dependencyNode.Name);

// there is no valid version available for the node, dependencies are incompatible,
}
}
else
{
this.logger.LogWarning(
"Unable to resolve non-root dependency {PackageName} with version specifiers {PackageVersions} from pypi possibly due to computed version constraints. Skipping package.",
dependencyNode.Name,
JsonConvert.SerializeObject(dependencyNode.DependencySpecifiers));
singleFileComponentRecorder.RegisterPackageParseFailure(dependencyNode.Name);
// We haven't encountered this package before, so let's fetch it and find a candidate
var newProject = await this.simplePypiClient.GetSimplePypiProjectAsync(dependencyNode);

if (newProject == null || !newProject.Files.Any())
{
this.logger.LogWarning(
"Dependency Package {DependencyName} not found in Pypi. Skipping package",
dependencyNode.Name);
singleFileComponentRecorder.RegisterPackageParseFailure(dependencyNode.Name);
}

var result = this.ConvertSimplePypiProjectToSortedDictionary(newProject, dependencyNode);
if (result.Keys.Any())
{
state.ValidVersionMap[dependencyNode.Name] = result;
var candidateVersion = state.ValidVersionMap[dependencyNode.Name].Keys.Any()
? state.ValidVersionMap[dependencyNode.Name].Keys.Last() : null;

AddGraphNode(state, state.NodeReferences[currentNode.Name], dependencyNode.Name, candidateVersion);

state.ProcessingQueue.Enqueue((root, dependencyNode));
}
else
{
this.logger.LogWarning(
"Unable to resolve non-root dependency {PackageName} with version specifiers {PackageVersions} from pypi possibly due to computed version constraints. Skipping package.",
dependencyNode.Name,
JsonConvert.SerializeObject(dependencyNode.DependencySpecifiers));
singleFileComponentRecorder.RegisterPackageParseFailure(dependencyNode.Name);
}
}
}
catch (ArgumentException ae)
{
// If version specifier parsing fails, don't attempt to reprocess because it would fail also.
// Log a package failure warning and continue.
this.logger.LogWarning("Failure resolving Python package {DependencyName} with message: {ExMessage}.", dependencyNode.Name, ae.Message);
singleFileComponentRecorder.RegisterPackageParseFailure(dependencyNode.Name);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,85 @@ public async Task TestPipResolverBacktrackAsync()
this.pyPiClient.Verify(x => x.FetchPackageDependenciesAsync(It.IsAny<string>(), It.IsAny<string>(), It.IsAny<PythonProjectRelease>()), Times.Exactly(4));
}

[TestMethod]
public async Task TestInvalidVersionSpecThrowsAsync()
{
var a = new PipDependencySpecification("a==1.0");
var b = new PipDependencySpecification("b==1.0");
var c = new PipDependencySpecification("c==1.0");
var c2 = new PipDependencySpecification("Requires-Dist: c (>dev)", true);

var versions = new List<string> { "1.0" };

var aReleases = this.CreateReleasesDictionary(versions);
var bReleases = this.CreateReleasesDictionary(versions);
var cReleases = this.CreateReleasesDictionary(versions);

var aProject = new PythonProject
{
Releases = aReleases,
Info = new PythonProjectInfo
{
Author = "Microsoft",
License = "MIT",
},
};

var bProject = new PythonProject
{
Releases = bReleases,
Info = new PythonProjectInfo
{
AuthorEmail = "Microsoft <sample@microsoft.com>",
Classifiers = new List<string> { "License :: OSI Approved :: MIT License" },
},
};

var cProject = new PythonProject
{
Releases = cReleases,
Info = new PythonProjectInfo
{
Maintainer = "Microsoft",
Classifiers = new List<string> { "License :: OSI Approved :: MIT License", "License :: OSI Approved :: BSD License" },
},
};

this.pyPiClient.Setup(x => x.GetProjectAsync(a)).ReturnsAsync(aProject);
this.pyPiClient.Setup(x => x.GetProjectAsync(b)).ReturnsAsync(bProject);
this.pyPiClient.Setup(x => x.GetProjectAsync(c)).ReturnsAsync(cProject);

this.pyPiClient.Setup(x => x.FetchPackageDependenciesAsync("a", "1.0", aReleases["1.0"].First())).ReturnsAsync(new List<PipDependencySpecification> { b });
this.pyPiClient.Setup(x => x.FetchPackageDependenciesAsync("b", "1.0", bReleases["1.0"].First())).ReturnsAsync(new List<PipDependencySpecification> { c, c2 });
this.pyPiClient.Setup(x => x.FetchPackageDependenciesAsync("c", "1.0", cReleases["1.0"].First())).ReturnsAsync(new List<PipDependencySpecification> { });

var dependencies = new List<PipDependencySpecification> { a };

var resolver = new PythonResolver(this.pyPiClient.Object, this.loggerMock.Object);

var resolveResult = await resolver.ResolveRootsAsync(this.recorderMock.Object, dependencies);

resolveResult.Should().NotBeNull();

var expectedA = new PipGraphNode(new PipComponent("a", "1.0", "Microsoft", "MIT"));
var expectedB = new PipGraphNode(new PipComponent("b", "1.0", "Microsoft <sample@microsoft.com>", "MIT License"));
var expectedC = new PipGraphNode(new PipComponent("c", "1.0", "Microsoft", "MIT License, BSD License"));

expectedA.Children.Add(expectedB);
expectedB.Parents.Add(expectedA);
expectedB.Children.Add(expectedC);
expectedC.Parents.Add(expectedB);

this.CompareGraphs(resolveResult.First(), expectedA).Should().BeTrue();

this.loggerMock.Verify(x => x.Log(
LogLevel.Warning,
It.IsAny<EventId>(),
It.Is<It.IsAnyType>((o, t) => string.Equals("Failure resolving Python package c with message: The version specification dev is not a valid python version.", o.ToString(), StringComparison.Ordinal)),
It.IsAny<Exception>(),
(Func<It.IsAnyType, Exception, string>)It.IsAny<object>()));
}

private bool CompareGraphs(PipGraphNode a, PipGraphNode b)
{
var componentA = a.Value;
Expand Down
Loading

0 comments on commit ca32600

Please sign in to comment.