Skip to content

Commit

Permalink
feat(gitparse): track ref sources
Browse files Browse the repository at this point in the history
'Hidden' refs, such as 'refs/pull/1004/head' may cause confusion if reported upon. GitHub, for example, will display a banner saying that the commit doesn't belong to the repository.
This parse the output of 'git log --source' and converts it to a human-readable format, IF the ref is 'hidden'.
  • Loading branch information
rgmz committed Dec 31, 2024
1 parent 58289a6 commit bef8f77
Show file tree
Hide file tree
Showing 12 changed files with 628 additions and 488 deletions.
13 changes: 7 additions & 6 deletions hack/snifftest/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,15 +197,16 @@ func main() {
SkipBinaries: true,
SkipArchives: false,
Concurrency: runtime.NumCPU(),
SourceMetadataFunc: func(file, email, commit, timestamp, repository string, line int64) *source_metadatapb.MetaData {
SourceMetadataFunc: func(repository, commit, commitSource, email, timestamp, file string, line int64) *source_metadatapb.MetaData {
return &source_metadatapb.MetaData{
Data: &source_metadatapb.MetaData_Git{
Git: &source_metadatapb.Git{
Commit: commit,
File: file,
Email: email,
Repository: repository,
Timestamp: timestamp,
Repository: repository,
Commit: commit,
CommitSource: commitSource,
Email: email,
Timestamp: timestamp,
File: file,
},
},
}
Expand Down
68 changes: 61 additions & 7 deletions pkg/gitparse/gitparse.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,9 @@ func (d *Diff) finalize() error { return d.contentWriter.CloseForWriting() }

// Commit contains commit header info and diffs.
type Commit struct {
Hash string
Hash string
// The source of a commit, if it doesn't exist in the repository's history.
Source string
Author string
Committer string
Date time.Time
Expand Down Expand Up @@ -231,19 +233,23 @@ func (c *Parser) RepoPath(
) (chan *Diff, error) {
args := []string{
"-C", source,
"--no-replace-objects",
"log",
"--patch", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---patch
"--full-history",
"--date=format:%a %b %d %H:%M:%S %Y %z",
"--pretty=fuller", // https://git-scm.com/docs/git-log#_pretty_formats
"--notes", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---notesltrefgt
"--source", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---source
}
if abbreviatedLog {
// https://git-scm.com/docs/git-log#Documentation/git-log.txt---diff-filterACDMRTUXB82308203
args = append(args, "--diff-filter=AM")
}
if head != "" {
args = append(args, head)
} else {
// https://git-scm.com/docs/git-log#Documentation/git-log.txt---all
args = append(args, "--all")
}
for _, glob := range excludedGlobs {
Expand Down Expand Up @@ -332,10 +338,9 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan
outReader := bufio.NewReader(stdOut)
var (
currentCommit *Commit

totalLogSize int
totalLogSize int
latestState = Initial
)
var latestState = Initial

diff := func(c *Commit, opts ...diffOption) *Diff {
opts = append(opts, withCustomContentWriter(bufferwriter.New()))
Expand Down Expand Up @@ -395,10 +400,18 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan
// Create a new currentDiff and currentCommit
currentCommit = &Commit{Message: strings.Builder{}}
currentDiff = diff(currentCommit)
// Check that the commit line contains a hash and set it.
if len(line) >= 47 {
currentCommit.Hash = string(line[7:47])

hash, ref := parseCommitLine(line)
if hash == nil || ref == nil {
ctx.Logger().Error(
fmt.Errorf(`expected line to match 'commit <hash> <ref>', got "%s"`, line),
"Failed to parse CommitLine")
latestState = ParseFailure
continue
}

currentCommit.Hash = string(hash)
currentCommit.Source = parseSourceRef(ref)
case isMergeLine(isStaged, latestState, line):
latestState = MergeLine
case isAuthorLine(isStaged, latestState, line):
Expand Down Expand Up @@ -614,6 +627,47 @@ func isCommitLine(isStaged bool, latestState ParseState, line []byte) bool {
return false
}

func parseCommitLine(line []byte) (hash []byte, ref []byte) {
// Check that the commit line contains a 40-character hash and set it.
// `commit e5575cd6f2d21d3a1a604287c7bf4a7eab2266e0\n`
if len(line) >= 47 {
hash = line[7:47]
}

// Check if the commit line includes branch references.
// `commit 2dbbb28727c7c2954438666dafba57bb8c714d3b refs/heads/fix/github-enterprise-gist\n`
if len(line) > 48 {
ref = line[48 : len(line)-1]
}

return
}

// ParseCommitSource s
// https://git-scm.com/docs/git-log#Documentation/git-log.txt---source
func parseSourceRef(ref []byte) string {
// We don't care about 'normal' refs.
if bytes.HasPrefix(ref, []byte("refs/heads/")) || bytes.HasPrefix(ref, []byte("refs/tags/")) {
return ""
}

// Handle GitHub pull requests.
// e.g., `refs/pull/238/head` or `refs/pull/1234/merge`
if after, ok := bytes.CutPrefix(ref, []byte("refs/pull/")); ok {
prNumber := after[:bytes.Index(after, []byte("/"))]
return "Pull request #" + string(prNumber)
}

// Handle GitLab merge requests
// e.g., `refs/merge-requests/238/head` or `refs/merge-requests/1234/merge`
if after, ok := bytes.CutPrefix(ref, []byte("refs/merge-requests/")); ok {
mrNumber := after[:bytes.Index(after, []byte("/"))]
return "Merge request #" + string(mrNumber)
}

return fmt.Sprintf("%s (hidden ref)", string(ref))
}

// Author: Bill Rich <bill.rich@trufflesec.com>
func isAuthorLine(isStaged bool, latestState ParseState, line []byte) bool {
if isStaged || !(latestState == CommitLine || latestState == MergeLine) {
Expand Down
39 changes: 39 additions & 0 deletions pkg/gitparse/gitparse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,45 @@ func TestLineChecksNoStaged(t *testing.T) {
}
}

func Test_parseCommitLine(t *testing.T) {
cases := map[string][]string{
"commit 198c63cb8212a99cc4352bc72f25e5444a786291 refs/heads/main\n": {"198c63cb8212a99cc4352bc72f25e5444a786291", "refs/heads/main"},
"commit e76dfb98ab9001daa869191b6aebe8cf4cd3b22a refs/remotes/origin/debug/aws-logging\n": {"e76dfb98ab9001daa869191b6aebe8cf4cd3b22a", "refs/remotes/origin/debug/aws-logging"},
}

for line, expected := range cases {
hash, ref := parseCommitLine([]byte(line))
if string(hash) != expected[0] {
t.Errorf("Expected: %s, Got: %s", expected[0], hash)
}
if string(ref) != expected[1] {
t.Errorf("Expected: %s, Got: %s", expected[1], ref)
}
}
}

func Test_parseSourceRef(t *testing.T) {
cases := map[string]string{
"refs/heads/master": "",
"refs/tags/v3.0.5": "",
// refs/merge-requests/33/head
"refs/heads/thog/mr/33/head": "Merge request #33",
// refs/merge-requests/19/merge
"refs/heads/thog/mr/19/merge": "Merge request #19",
// refs/pull/980/head
"refs/heads/thog/pr/980/head": "Pull request #980",
// refs/pull/1644/merge
"refs/heads/thog/pr/1644/merge": "Pull request #1644",
}

for line, expected := range cases {
source := parseSourceRef([]byte(line))
if source != expected {
t.Errorf("Expected: %s, Got: %s", expected, source)
}
}
}

func TestBinaryPathParse(t *testing.T) {
cases := map[string]string{
"Binary files a/trufflehog_3.42.0_linux_arm64.tar.gz and /dev/null differ\n": "",
Expand Down
Loading

0 comments on commit bef8f77

Please sign in to comment.