Skip to content

Commit

Permalink
Make Python detection more resilient to unexpected failure cases (#962)
Browse files Browse the repository at this point in the history
* Make Python detection more resilient to unexpected cases

* Add Pip tests

* PR comments

---------

Co-authored-by: Coby Allred <coallred@microsoft.com>
  • Loading branch information
cobya and Coby Allred authored Jan 19, 2024
1 parent b65fa8a commit 9ea2b3c
Show file tree
Hide file tree
Showing 6 changed files with 261 additions and 133 deletions.
6 changes: 3 additions & 3 deletions src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ public async Task<PythonProject> GetProjectAsync(PipDependencySpecification spec
using var r = new PypiRetryTelemetryRecord { Name = spec.Name, DependencySpecifiers = spec.DependencySpecifiers?.ToArray(), StatusCode = result.Result.StatusCode };

this.logger.LogWarning(
"Received {StatusCode} {ReasonPhrase} from {RequestUri}. Waiting {TimeSpan} before retry attempt {RetryCount}",
"Received status:{StatusCode} with reason:{ReasonPhrase} from {RequestUri}. Waiting {TimeSpan} before retry attempt {RetryCount}",
result.Result.StatusCode,
result.Result.ReasonPhrase,
requestUri,
Expand Down Expand Up @@ -190,7 +190,7 @@ public async Task<PythonProject> GetProjectAsync(PipDependencySpecification spec
{
using var r = new PypiFailureTelemetryRecord { Name = spec.Name, DependencySpecifiers = spec.DependencySpecifiers?.ToArray(), StatusCode = request.StatusCode };

this.logger.LogWarning("Received {StatusCode} {ReasonPhrase} from {RequestUri}", request.StatusCode, request.ReasonPhrase, requestUri);
this.logger.LogWarning("Received status:{StatusCode} with reason:{ReasonPhrase} from {RequestUri}", request.StatusCode, request.ReasonPhrase, requestUri);

return new PythonProject();
}
Expand All @@ -212,7 +212,7 @@ public async Task<PythonProject> GetProjectAsync(PipDependencySpecification spec
parsedVersion.Valid && parsedVersion.IsReleasedPackage &&
PythonVersionUtilities.VersionValidForSpec(release.Key, spec.DependencySpecifiers))
{
versions.Releases.Add(release.Key, release.Value);
versions.Releases[release.Key] = release.Value;
}
}
catch (ArgumentException ae)
Expand Down
77 changes: 13 additions & 64 deletions src/Microsoft.ComponentDetection.Detectors/pip/PythonResolver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@ namespace Microsoft.ComponentDetection.Detectors.Pip;
using Microsoft.ComponentDetection.Contracts;
using Microsoft.ComponentDetection.Contracts.TypedComponent;
using Microsoft.Extensions.Logging;
using MoreLinq;
using Newtonsoft.Json;

public class PythonResolver : IPythonResolver
public class PythonResolver : PythonResolverBase, IPythonResolver
{
private readonly IPyPiClient pypiClient;
private readonly ILogger<PythonResolver> logger;
Expand All @@ -18,6 +20,7 @@ public class PythonResolver : IPythonResolver
private readonly string classifierFieldLicensePrefix = "License";

public PythonResolver(IPyPiClient pypiClient, ILogger<PythonResolver> logger)
: base(logger)
{
this.pypiClient = pypiClient;
this.logger = logger;
Expand All @@ -43,7 +46,7 @@ public async Task<IList<PipGraphNode>> ResolveRootsAsync(ISingleFileComponentRec

var result = project.Releases;

if (result.Keys.Any())
if (result is not null && result.Keys.Any())
{
state.ValidVersionMap[rootPackage.Name] = result;

Expand All @@ -62,8 +65,9 @@ public async Task<IList<PipGraphNode>> ResolveRootsAsync(ISingleFileComponentRec
else
{
this.logger.LogWarning(
"Root dependency {RootPackageName} not found on pypi. Skipping package.",
rootPackage.Name);
"Unable to resolve root dependency {PackageName} with version specifiers {PackageVersions} from pypi possibly due to computed version constraints. Skipping package.",
rootPackage.Name,
JsonConvert.SerializeObject(rootPackage.DependencySpecifiers));
singleFileComponentRecorder.RegisterPackageParseFailure(rootPackage.Name);
}
}
Expand Down Expand Up @@ -113,7 +117,7 @@ private async Task<IList<PipGraphNode>> ProcessQueueAsync(ISingleFileComponentRe

var result = project.Releases;

if (result.Keys.Any())
if (result is not null && result.Keys.Any())
{
state.ValidVersionMap[dependencyNode.Name] = result;
var candidateVersion = state.ValidVersionMap[dependencyNode.Name].Keys.Any()
Expand All @@ -126,8 +130,9 @@ private async Task<IList<PipGraphNode>> ProcessQueueAsync(ISingleFileComponentRe
else
{
this.logger.LogWarning(
"Dependency Package {DependencyName} not found in Pypi. Skipping package",
dependencyNode.Name);
"Unable to resolve non-root dependency {PackageName} with version specifiers {PackageVersions} from pypi possibly due to computed version constraints. Skipping package.",
dependencyNode.Name,
JsonConvert.SerializeObject(dependencyNode.DependencySpecifiers));
singleFileComponentRecorder.RegisterPackageParseFailure(dependencyNode.Name);
}
}
Expand All @@ -137,63 +142,7 @@ private async Task<IList<PipGraphNode>> ProcessQueueAsync(ISingleFileComponentRe
return state.Roots;
}

private async Task<bool> InvalidateAndReprocessAsync(
PythonResolverState state,
PipGraphNode node,
PipDependencySpecification newSpec)
{
var pipComponent = node.Value;

var oldVersions = state.ValidVersionMap[pipComponent.Name].Keys.ToList();
var currentSelectedVersion = node.Value.Version;
var currentReleases = state.ValidVersionMap[pipComponent.Name][currentSelectedVersion];
foreach (var version in oldVersions)
{
if (!PythonVersionUtilities.VersionValidForSpec(version, newSpec.DependencySpecifiers))
{
state.ValidVersionMap[pipComponent.Name].Remove(version);
}
}

if (state.ValidVersionMap[pipComponent.Name].Count == 0)
{
state.ValidVersionMap[pipComponent.Name][currentSelectedVersion] = currentReleases;
return false;
}

var candidateVersion = state.ValidVersionMap[pipComponent.Name].Keys.Any() ? state.ValidVersionMap[pipComponent.Name].Keys.Last() : null;

node.Value = new PipComponent(pipComponent.Name, candidateVersion, license: pipComponent.License, author: pipComponent.Author);

var dependencies = (await this.FetchPackageDependenciesAsync(state, newSpec)).ToDictionary(x => x.Name, x => x);

var toRemove = new List<PipGraphNode>();
foreach (var child in node.Children)
{
var pipChild = child.Value;

if (!dependencies.TryGetValue(pipChild.Name, out var newDependency))
{
toRemove.Add(child);
}
else if (!PythonVersionUtilities.VersionValidForSpec(pipChild.Version, newDependency.DependencySpecifiers))
{
if (!await this.InvalidateAndReprocessAsync(state, child, newDependency))
{
return false;
}
}
}

foreach (var remove in toRemove)
{
node.Children.Remove(remove);
}

return true;
}

private async Task<IList<PipDependencySpecification>> FetchPackageDependenciesAsync(
protected override async Task<IList<PipDependencySpecification>> FetchPackageDependenciesAsync(
PythonResolverState state,
PipDependencySpecification spec)
{
Expand Down
107 changes: 107 additions & 0 deletions src/Microsoft.ComponentDetection.Detectors/pip/PythonResolverBase.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
namespace Microsoft.ComponentDetection.Detectors.Pip;

using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using Microsoft.ComponentDetection.Contracts.TypedComponent;
using Microsoft.Extensions.Logging;
using MoreLinq;
using Newtonsoft.Json;

public abstract class PythonResolverBase
{
private readonly ILogger logger;

internal PythonResolverBase(ILogger logger) => this.logger = logger;

/// <summary>
/// Given a state, node, and new spec, will reprocess a new valid version for the node.
/// </summary>
/// <param name="state">The PythonResolverState.</param>
/// <param name="node">The PipGraphNode.</param>
/// <param name="newSpec">The PipDependencySpecification.</param>
/// <returns>Returns true if the node can be reprocessed else false.</returns>
protected async Task<bool> InvalidateAndReprocessAsync(
PythonResolverState state,
PipGraphNode node,
PipDependencySpecification newSpec)
{
var pipComponent = node.Value;

var oldVersions = state.ValidVersionMap[pipComponent.Name].Keys.ToList();
var currentSelectedVersion = node.Value.Version;
var currentReleases = state.ValidVersionMap[pipComponent.Name][currentSelectedVersion];
foreach (var version in oldVersions.Where(version => !PythonVersionUtilities.VersionValidForSpec(version, newSpec.DependencySpecifiers)))
{
state.ValidVersionMap[pipComponent.Name].Remove(version);
}

if (state.ValidVersionMap[pipComponent.Name].Count == 0)
{
state.ValidVersionMap[pipComponent.Name][currentSelectedVersion] = currentReleases;
return false;
}

var candidateVersion = state.ValidVersionMap[pipComponent.Name].Keys.Any() ? state.ValidVersionMap[pipComponent.Name].Keys.Last() : null;

node.Value = new PipComponent(pipComponent.Name, candidateVersion, author: pipComponent.Author, license: pipComponent.License);

var fetchedDependences = await this.FetchPackageDependenciesAsync(state, newSpec);
var dependencies = this.ResolveDependencySpecifications(pipComponent, fetchedDependences);

var toRemove = new List<PipGraphNode>();
foreach (var child in node.Children)
{
var pipChild = child.Value;

if (!dependencies.TryGetValue(pipChild.Name, out var newDependency))
{
toRemove.Add(child);
}
else if (!PythonVersionUtilities.VersionValidForSpec(pipChild.Version, newDependency.DependencySpecifiers))
{
if (!await this.InvalidateAndReprocessAsync(state, child, newDependency))
{
return false;
}
}
}

foreach (var remove in toRemove)
{
node.Children.Remove(remove);
}

return true;
}

/// <summary>
/// Multiple dependency specification versions can be given for a single package name.
/// Until a better method is devised, choose the latest entry.
/// See https://github.com/microsoft/component-detection/issues/963.
/// </summary>
/// <returns>Dictionary of package names to dependency version specifiers.</returns>
public Dictionary<string, PipDependencySpecification> ResolveDependencySpecifications(PipComponent component, IList<PipDependencySpecification> fetchedDependences)
{
var dependencies = new Dictionary<string, PipDependencySpecification>();
fetchedDependences.ForEach(d =>
{
if (!dependencies.TryAdd(d.Name, d))
{
this.logger.LogWarning(
"Duplicate package dependencies entry for component:{ComponentName} with dependency:{DependencyName}. Existing dependency specifiers: {ExistingSpecifiers}. New dependency specifiers: {NewSpecifiers}.",
component.Name,
d.Name,
JsonConvert.SerializeObject(dependencies[d.Name].DependencySpecifiers),
JsonConvert.SerializeObject(d.DependencySpecifiers));
dependencies[d.Name] = d;
}
});

return dependencies;
}

protected abstract Task<IList<PipDependencySpecification>> FetchPackageDependenciesAsync(
PythonResolverState state,
PipDependencySpecification spec);
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ namespace Microsoft.ComponentDetection.Detectors.Pip;
using Microsoft.ComponentDetection.Contracts;
using Microsoft.ComponentDetection.Contracts.TypedComponent;
using Microsoft.Extensions.Logging;
using MoreLinq;
using Newtonsoft.Json;

public class SimplePythonResolver : ISimplePythonResolver
public class SimplePythonResolver : PythonResolverBase, ISimplePythonResolver
{
private static readonly Regex VersionRegex = new(@"-((\d+)((\.)\w+((\+|\.)\w*)*)*)(.tar|-)", RegexOptions.Compiled);

Expand All @@ -25,6 +26,7 @@ public class SimplePythonResolver : ISimplePythonResolver
/// <param name="simplePypiClient">The simple PyPi client.</param>
/// <param name="logger">The logger.</param>
public SimplePythonResolver(ISimplePyPiClient simplePypiClient, ILogger<SimplePythonResolver> logger)
: base(logger)
{
this.simplePypiClient = simplePypiClient;
this.logger = logger;
Expand Down Expand Up @@ -136,8 +138,9 @@ await Parallel.ForEachAsync(initialPackages, async (rootPackage, ct) =>
else
{
this.logger.LogWarning(
"Unable to resolve package: {RootPackageName} gotten from pypi possibly due to invalid versions. Skipping package.",
rootPackage.Name);
"Unable to resolve root dependency {PackageName} with version specifiers {PackageVersions} from pypi possibly due to computed version constraints. Skipping package.",
rootPackage.Name,
JsonConvert.SerializeObject(rootPackage.DependencySpecifiers));
singleFileComponentRecorder.RegisterPackageParseFailure(rootPackage.Name);
}
}
Expand Down Expand Up @@ -207,8 +210,9 @@ private async Task<IList<PipGraphNode>> ProcessQueueAsync(ISingleFileComponentRe
else
{
this.logger.LogWarning(
"Unable to resolve dependency package {DependencyName} gotten from pypi possibly due to invalid versions. Skipping package",
dependencyNode.Name);
"Unable to resolve non-root dependency {PackageName} with version specifiers {PackageVersions} from pypi possibly due to computed version constraints. Skipping package.",
dependencyNode.Name,
JsonConvert.SerializeObject(dependencyNode.DependencySpecifiers));
singleFileComponentRecorder.RegisterPackageParseFailure(dependencyNode.Name);
}
}
Expand All @@ -227,6 +231,11 @@ private async Task<IList<PipGraphNode>> ProcessQueueAsync(ISingleFileComponentRe
private SortedDictionary<string, IList<PythonProjectRelease>> ConvertSimplePypiProjectToSortedDictionary(SimplePypiProject simplePypiProject, PipDependencySpecification spec)
{
var sortedProjectVersions = new SortedDictionary<string, IList<PythonProjectRelease>>(new PythonVersionComparer());
if (simplePypiProject is null)
{
return sortedProjectVersions;
}

foreach (var file in simplePypiProject.Files)
{
try
Expand Down Expand Up @@ -267,7 +276,7 @@ private SortedDictionary<string, IList<PythonProjectRelease>> ConvertSimplePypiP
/// <param name="state"> The PythonResolverState. </param>
/// <param name="spec"> The PipDependencySpecification. </param>
/// <returns> Returns a list of PipDependencySpecification. </returns>
private async Task<IList<PipDependencySpecification>> FetchPackageDependenciesAsync(
protected override async Task<IList<PipDependencySpecification>> FetchPackageDependenciesAsync(
PythonResolverState state,
PipDependencySpecification spec)
{
Expand Down Expand Up @@ -333,64 +342,4 @@ private async Task<IList<PipDependencySpecification>> FetchDependenciesFromPacka

return dependencies;
}

/// <summary>
/// Given a state, node, and new spec, will reprocess a new valid version for the node.
/// </summary>
/// <param name="state"> The PythonResolverState. </param>
/// <param name="node"> The PipGraphNode. </param>
/// <param name="newSpec"> The PipDependencySpecification. </param>
/// <returns> Returns true if the node can be reprocessed else false. </returns>
private async Task<bool> InvalidateAndReprocessAsync(
PythonResolverState state,
PipGraphNode node,
PipDependencySpecification newSpec)
{
var pipComponent = node.Value;

var oldVersions = state.ValidVersionMap[pipComponent.Name].Keys.ToList();
var currentSelectedVersion = node.Value.Version;
var currentReleases = state.ValidVersionMap[pipComponent.Name][currentSelectedVersion];
foreach (var version in oldVersions.Where(version => !PythonVersionUtilities.VersionValidForSpec(version, newSpec.DependencySpecifiers)))
{
state.ValidVersionMap[pipComponent.Name].Remove(version);
}

if (state.ValidVersionMap[pipComponent.Name].Count == 0)
{
state.ValidVersionMap[pipComponent.Name][currentSelectedVersion] = currentReleases;
return false;
}

var candidateVersion = state.ValidVersionMap[pipComponent.Name].Keys.Any() ? state.ValidVersionMap[pipComponent.Name].Keys.Last() : null;

node.Value = new PipComponent(pipComponent.Name, candidateVersion);

var dependencies = (await this.FetchPackageDependenciesAsync(state, newSpec)).ToDictionary(x => x.Name, x => x);

var toRemove = new List<PipGraphNode>();
foreach (var child in node.Children)
{
var pipChild = child.Value;

if (!dependencies.TryGetValue(pipChild.Name, out var newDependency))
{
toRemove.Add(child);
}
else if (!PythonVersionUtilities.VersionValidForSpec(pipChild.Version, newDependency.DependencySpecifiers))
{
if (!await this.InvalidateAndReprocessAsync(state, child, newDependency))
{
return false;
}
}
}

foreach (var remove in toRemove)
{
node.Children.Remove(remove);
}

return true;
}
}
Loading

0 comments on commit 9ea2b3c

Please sign in to comment.