Skip to content

Commit

Permalink
Smarter Chains: check taskrun level results for Subjects
Browse files Browse the repository at this point in the history
Step 1/2 of tektoncd#850

Prior, Chains only looks for pipeline results to understand what
artifacts were generated in a pipeline. That means pipeline authors need
to name pipeline results in the type hinting way and propagate its value
with individual TaskRun results.

Now, Chains is able to dive into individual TaskRun results to understand
what artifacts were generated throughout a pipeline. This way, pipeline
authors no longer need to worry about the rules when writting a pipeline
as long as they pull in right tasks that produce type hinting results.

That said, the old behaviour - observing pipeline level results is reserved
by introducing a configmap field `artifacts.pipelinerun.observe-mode`
which allows configuring how chains observes the outputs.

Signed-off-by: Chuang Wang <chuangw@google.com>
  • Loading branch information
chuangw6 committed Aug 2, 2023
1 parent 25e7a6c commit c58bf20
Show file tree
Hide file tree
Showing 24 changed files with 322 additions and 51 deletions.
6 changes: 4 additions & 2 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,11 @@ Supported keys include:
| `artifacts.pipelinerun.format` | The format to store `PipelineRun` payloads in. | `in-toto`, `slsa/v1`| `in-toto` |
| `artifacts.pipelinerun.storage` | The storage backend to store `PipelineRun` signatures in. Multiple backends can be specified with comma-separated list ("tekton,oci"). To disable the `PipelineRun` artifact input an empty string (""). | `tekton`, `oci`, `gcs`, `docdb`, `grafeas` | `tekton` |
| `artifacts.pipelinerun.signer` | The signature backend to sign `PipelineRun` payloads with. | `x509`, `kms` | `x509` |
| `artifacts.pipelinerun.observe-mode` | The way that Chains observes inputs & outputs of a PipelineRun. The default option `pr` configures Chains to only inspect Pipeline level params/results, whereas the option `tr` configures Chains to dive into child TaskRuns. | `pr`, `tr` | `pr` |

> NOTE: For grafeas storage backend, currently we only support Container Analysis. We will make grafeas server address configurabe within a short time.
> NOTE: `slsa/v1` is an alias of `in-toto` for backwards compatibility.
> NOTE:
> - For grafeas storage backend, currently we only support Container Analysis. We will make grafeas server address configurabe within a short time.
> - `slsa/v1` is an alias of `in-toto` for backwards compatibility.
### OCI Configuration

Expand Down
98 changes: 91 additions & 7 deletions pkg/chains/formats/slsa/extract/extract.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,105 @@ import (
"github.com/in-toto/in-toto-golang/in_toto/slsa_provenance/common"
"github.com/tektoncd/chains/internal/backport"
"github.com/tektoncd/chains/pkg/artifacts"
"github.com/tektoncd/chains/pkg/chains/formats/slsa/internal/slsaconfig"
"github.com/tektoncd/chains/pkg/chains/objects"
"github.com/tektoncd/pipeline/pkg/apis/pipeline/v1beta1"
"knative.dev/pkg/logging"
)

// SubjectDigests returns software artifacts produced from the TaskRun/PipelineRun object
// in the form of standard subject field of intoto statement.
// The type hinting fields expected in results help identify the generated software artifacts.
// The type hinting fields expected in TaskRun results help identify the generated software artifacts in a TaskRun/PipelineRun.
// Valid type hinting fields must:
// - have suffix `IMAGE_URL` & `IMAGE_DIGEST` or `ARTIFACT_URI` & `ARTIFACT_DIGEST` pair.
// - the `*_DIGEST` field must be in the format of "<algorithm>:<actual-sha>" where the algorithm must be "sha256" and actual sha must be valid per https://github.com/opencontainers/image-spec/blob/main/descriptor.md#sha-256.
// - the `*_URL` or `*_URI` fields cannot be empty.
func SubjectDigests(ctx context.Context, obj objects.TektonObject) []intoto.Subject {
func SubjectDigests(ctx context.Context, obj objects.TektonObject, slsaconfig *slsaconfig.SlsaConfig) []intoto.Subject {
var subjects []intoto.Subject

switch obj.GetObject().(type) {
case *v1beta1.PipelineRun:
subjects = subjectsFromPipelineRun(ctx, obj, slsaconfig)
case *v1beta1.TaskRun:
subjects = subjectsFromTektonObject(ctx, obj)
}

sort.Slice(subjects, func(i, j int) bool {
return subjects[i].Name <= subjects[j].Name
})

return subjects
}

func subjectsFromPipelineRun(ctx context.Context, obj objects.TektonObject, slsaconfig *slsaconfig.SlsaConfig) []intoto.Subject {
logger := logging.FromContext(ctx)
// If the configured input/output observation mode is pipeline level, then
// call the generic function to parse the subject.
if slsaconfig.PrObserveMode == "pr" {
return subjectsFromTektonObject(ctx, obj)
}

// If the configured input/output observation mode is task level, then dive into
// individual taskruns and collect subjects.

var result []intoto.Subject

pro := obj.(*objects.PipelineRunObject)

pSpec := pro.Status.PipelineSpec
if pSpec != nil {
pipelineTasks := append(pSpec.Tasks, pSpec.Finally...)
for _, t := range pipelineTasks {
tr := pro.GetTaskRunFromTask(t.Name)
// Ignore Tasks that did not execute during the PipelineRun.
if tr == nil || tr.Status.CompletionTime == nil {
logger.Infof("taskrun status not found for task %s", t.Name)
continue
}

trSubjects := subjectsFromTektonObject(ctx, objects.NewTaskRunObject(tr))
for _, s := range trSubjects {
result = addSubject(result, s)
}
}
}

return result
}

// addSubject adds a new subject item to the original slice.
func addSubject(original []intoto.Subject, item intoto.Subject) []intoto.Subject {

for i, s := range original {
// if there is an equivalent entry in the original slice, do nothing or replace
// the original entry with the item if the item has more rich digest set.
if subjectEqual(s, item) {
if len(s.Digest) < len(item.Digest) {
original[i] = item
}
return original
}
}

original = append(original, item)
return original
}

// two subjects are equal if and only if they have same name and have at least
// one common algorithm and hex value.
func subjectEqual(x, y intoto.Subject) bool {
if x.Name != y.Name {
return false
}
for algo, hex := range x.Digest {
if v, ok := y.Digest[algo]; ok && v == hex {
return true
}
}
return false
}

func subjectsFromTektonObject(ctx context.Context, obj objects.TektonObject) []intoto.Subject {
logger := logging.FromContext(ctx)
var subjects []intoto.Subject

Expand Down Expand Up @@ -121,19 +207,17 @@ func SubjectDigests(ctx context.Context, obj objects.TektonObject) []intoto.Subj
})
}
}
sort.Slice(subjects, func(i, j int) bool {
return subjects[i].Name <= subjects[j].Name
})

return subjects
}

// RetrieveAllArtifactURIs returns all the URIs of the software artifacts produced from the run object.
// - It first extracts intoto subjects from run object results and converts the subjects
// to a slice of string URIs in the format of "NAME" + "@" + "ALGORITHM" + ":" + "DIGEST".
// - If no subjects could be extracted from results, then an empty slice is returned.
func RetrieveAllArtifactURIs(ctx context.Context, obj objects.TektonObject) []string {
func RetrieveAllArtifactURIs(ctx context.Context, obj objects.TektonObject, observeMode string) []string {
result := []string{}
subjects := SubjectDigests(ctx, obj)
subjects := SubjectDigests(ctx, obj, &slsaconfig.SlsaConfig{PrObserveMode: observeMode})

for _, s := range subjects {
for algo, digest := range s.Digest {
Expand Down
156 changes: 150 additions & 6 deletions pkg/chains/formats/slsa/extract/extract_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,16 @@ package extract_test
import (
"fmt"
"testing"
"time"

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
intoto "github.com/in-toto/in-toto-golang/in_toto"
"github.com/tektoncd/chains/pkg/chains/formats/slsa/extract"
"github.com/tektoncd/chains/pkg/chains/formats/slsa/internal/slsaconfig"
"github.com/tektoncd/chains/pkg/chains/objects"
"github.com/tektoncd/pipeline/pkg/apis/pipeline/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
logtesting "knative.dev/pkg/logging/testing"
)

Expand Down Expand Up @@ -102,16 +105,15 @@ func TestSubjectDigestsAndRetrieveAllArtifactURIs(t *testing.T) {
// test both taskrun object and pipelinerun object
runObjects := []objects.TektonObject{
createTaskRunObjectWithResults(tc.results),
createPipelineRunObjectWithResults(tc.results),
createPipelineRunObjectWithPipelineResults(tc.results),
}

for _, o := range runObjects {
gotSubjects := extract.SubjectDigests(ctx, o)
gotSubjects := extract.SubjectDigests(ctx, o, &slsaconfig.SlsaConfig{PrObserveMode: "pr"})
if diff := cmp.Diff(tc.wantSubjects, gotSubjects, cmpopts.SortSlices(func(x, y intoto.Subject) bool { return x.Name < y.Name })); diff != "" {
t.Errorf("Wrong subjects extracted, diff=%s", diff)
t.Errorf("Wrong subjects extracted, diff=%s, %s", diff, gotSubjects)
}

gotURIs := extract.RetrieveAllArtifactURIs(ctx, o)
gotURIs := extract.RetrieveAllArtifactURIs(ctx, o, "pr")
if diff := cmp.Diff(tc.wantFullURLs, gotURIs, cmpopts.SortSlices(func(x, y string) bool { return x < y })); diff != "" {
t.Errorf("Wrong URIs extracted, diff=%s", diff)
}
Expand All @@ -121,6 +123,107 @@ func TestSubjectDigestsAndRetrieveAllArtifactURIs(t *testing.T) {
}
}

func TestPipelineRunObserveModeForSubjects(t *testing.T) {
var tests = []struct {
name string
pro objects.TektonObject
observeMode string
wantSubjects []intoto.Subject
wantFullURLs []string
}{
{
name: "observe mode: pr",
pro: createPipelineRunObjectWithPipelineResults(map[string]string{artifactURL1: "sha256:" + artifactDigest1}),
observeMode: "pr",
wantSubjects: []intoto.Subject{
{
Name: artifactURL1,
Digest: map[string]string{
"sha256": artifactDigest1,
},
},
},
wantFullURLs: []string{fmt.Sprintf("%s@sha256:%s", artifactURL1, artifactDigest1)},
},
{
name: "observe mode: tr, no duplication",
pro: createPipelineRunObjectWithTaskRunResults([]artifact{{uri: artifactURL2, digest: "sha256:" + artifactDigest2}}),
observeMode: "tr",
wantSubjects: []intoto.Subject{
{
Name: artifactURL2,
Digest: map[string]string{
"sha256": artifactDigest2,
},
},
},
wantFullURLs: []string{fmt.Sprintf("%s@sha256:%s", artifactURL2, artifactDigest2)},
},
{
name: "observe mode: tr - same uri with different sha256 digests",
pro: createPipelineRunObjectWithTaskRunResults([]artifact{
{uri: artifactURL2, digest: "sha256:" + artifactDigest1},
{uri: artifactURL2, digest: "sha256:" + artifactDigest2},
}),
observeMode: "tr",
wantSubjects: []intoto.Subject{
{
Name: artifactURL2,
Digest: map[string]string{
"sha256": artifactDigest2,
},
},
{
Name: artifactURL2,
Digest: map[string]string{
"sha256": artifactDigest1,
},
},
},
wantFullURLs: []string{
fmt.Sprintf("%s@sha256:%s", artifactURL2, artifactDigest1),
fmt.Sprintf("%s@sha256:%s", artifactURL2, artifactDigest2),
},
},
{
name: "observe mode: tr - same uri with same sha256 digests",
pro: createPipelineRunObjectWithTaskRunResults([]artifact{
{uri: artifactURL2, digest: "sha256:" + artifactDigest2},
{uri: artifactURL2, digest: "sha256:" + artifactDigest2},
}),
observeMode: "tr",
wantSubjects: []intoto.Subject{
{
Name: artifactURL2,
Digest: map[string]string{
"sha256": artifactDigest2,
},
},
},
wantFullURLs: []string{
fmt.Sprintf("%s@sha256:%s", artifactURL2, artifactDigest2),
},
},
}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
ctx := logtesting.TestContextWithLogger(t)
// test both taskrun object and pipelinerun object

gotSubjects := extract.SubjectDigests(ctx, tc.pro, &slsaconfig.SlsaConfig{PrObserveMode: tc.observeMode})
if diff := cmp.Diff(tc.wantSubjects, gotSubjects, cmpopts.SortSlices(func(x, y intoto.Subject) bool { return x.Name < y.Name })); diff != "" {
t.Errorf("Wrong subjects extracted, diff=%s, %s", diff, gotSubjects)
}

gotURIs := extract.RetrieveAllArtifactURIs(ctx, tc.pro, tc.observeMode)
if diff := cmp.Diff(tc.wantFullURLs, gotURIs, cmpopts.SortSlices(func(x, y string) bool { return x < y })); diff != "" {
t.Errorf("Wrong URIs extracted, diff=%s", diff)
}
})
}
}

func createTaskRunObjectWithResults(results map[string]string) objects.TektonObject {
trResults := []v1beta1.TaskRunResult{}
prefix := 0
Expand All @@ -143,7 +246,7 @@ func createTaskRunObjectWithResults(results map[string]string) objects.TektonObj
)
}

func createPipelineRunObjectWithResults(results map[string]string) objects.TektonObject {
func createPipelineRunObjectWithPipelineResults(results map[string]string) objects.TektonObject {
prResults := []v1beta1.PipelineRunResult{}
prefix := 0
for url, digest := range results {
Expand All @@ -164,3 +267,44 @@ func createPipelineRunObjectWithResults(results map[string]string) objects.Tekto
},
)
}

type artifact struct {
uri string
digest string
}

// create a child taskrun for each result
func createPipelineRunObjectWithTaskRunResults(results []artifact) objects.TektonObject {
pro := objects.NewPipelineRunObject(&v1beta1.PipelineRun{
Status: v1beta1.PipelineRunStatus{
PipelineRunStatusFields: v1beta1.PipelineRunStatusFields{
PipelineSpec: &v1beta1.PipelineSpec{},
},
},
})

// create child taskruns with results and pipelinetask
prefix := 0
for _, r := range results {
// simulate child taskruns
pipelineTaskName := fmt.Sprintf("task-%d", prefix)
tr := &v1beta1.TaskRun{
ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{objects.PipelineTaskLabel: pipelineTaskName}},
Status: v1beta1.TaskRunStatus{
TaskRunStatusFields: v1beta1.TaskRunStatusFields{
CompletionTime: &metav1.Time{Time: time.Date(1995, time.December, 24, 6, 12, 12, 24, time.UTC)},
TaskRunResults: []v1beta1.TaskRunResult{
{Name: fmt.Sprintf("%v_IMAGE_DIGEST", prefix), Value: *v1beta1.NewStructuredValues(r.digest)},
{Name: fmt.Sprintf("%v_IMAGE_URL", prefix), Value: *v1beta1.NewStructuredValues(r.uri)},
},
},
},
}

pro.AppendTaskRun(tr)
pro.Status.PipelineSpec.Tasks = append(pro.Status.PipelineSpec.Tasks, v1beta1.PipelineTask{Name: pipelineTaskName})
prefix++
}

return pro
}
2 changes: 2 additions & 0 deletions pkg/chains/formats/slsa/internal/slsaconfig/slsaconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,6 @@ package slsaconfig
type SlsaConfig struct {
// BuilderID is the URI of the trusted build platform.
BuilderID string
// PrObserveMode configures whether to observe the pipeline level or task level inputs/outputs for a given pipelinerun.
PrObserveMode string
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"taskResults": [
{
"name": "IMAGES",
"value": "gcr.io/myimage@sha256:d4b63d3e24d6eef04a6dc0795cf8a73470688803d97c52cffa3c8d4efd3397b6,gcr.io/myimage@sha256:daa1a56e13c85cf164e7d9e595006649e3a04c47fe4a8261320e18a0bf3b0367"
"value": "gcr.io/myimage1@sha256:d4b63d3e24d6eef04a6dc0795cf8a73470688803d97c52cffa3c8d4efd3397b6,gcr.io/myimage2@sha256:daa1a56e13c85cf164e7d9e595006649e3a04c47fe4a8261320e18a0bf3b0367"
}
],
"taskSpec": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"taskResults": [
{
"name": "IMAGES",
"value": "gcr.io/myimage@sha256:d4b63d3e24d6eef04a6dc0795cf8a73470688803d97c52cffa3c8d4efd3397b6,gcr.io/myimage@sha256:daa1a56e13c85cf164e7d9e595006649e3a04c47fe4a8261320e18a0bf3b0367"
"value": "gcr.io/myimage1@sha256:d4b63d3e24d6eef04a6dc0795cf8a73470688803d97c52cffa3c8d4efd3397b6,gcr.io/myimage2@sha256:daa1a56e13c85cf164e7d9e595006649e3a04c47fe4a8261320e18a0bf3b0367"
}
],
"taskSpec": {
Expand Down
3 changes: 2 additions & 1 deletion pkg/chains/formats/slsa/v1/intotoite6.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ type InTotoIte6 struct {
func NewFormatter(cfg config.Config) (formats.Payloader, error) {
return &InTotoIte6{
slsaConfig: &slsaconfig.SlsaConfig{
BuilderID: cfg.Builder.ID,
BuilderID: cfg.Builder.ID,
PrObserveMode: cfg.Artifacts.PipelineRuns.ObserveMode,
},
}, nil
}
Expand Down
Loading

0 comments on commit c58bf20

Please sign in to comment.