Skip to content

Commit

Permalink
scoring: use repo freshness as tiebreaker (#832)
Browse files Browse the repository at this point in the history
We ignore priority and instead use the latest commit date as repo rank.
This has a big impact for Sourcegraph because it means we switch from
star count to repo freshness as tiebreaker.

As a minor tweak, we also separate query based scores from tiebreakers.
To achieve this we reserve the last 7 digits of a score for tiebreakers:
- 5 digits (maxUint16) for repo rank
- 2 digits ([0,10]) for file order (2 digits).

Example:

Before:
score: 8775.35 <- atom(2):200, fragment:8550.00, repo-rank: 19, doc-order:6.35

After:
score: 8750_00019_06.35 <- atom(2):200, fragment:8550.00, repo-rank: 19, doc-order:6.35
  • Loading branch information
stefanhengl authored Oct 1, 2024
1 parent 282a251 commit d15aa28
Show file tree
Hide file tree
Showing 16 changed files with 238 additions and 144 deletions.
29 changes: 26 additions & 3 deletions api.go
Original file line number Diff line number Diff line change
Expand Up @@ -635,7 +635,16 @@ func (r *Repository) UnmarshalJSON(data []byte) error {
r.ID = uint32(id)
}

if v, ok := repo.RawConfig["priority"]; ok {
// Sourcegraph indexserver doesn't set repo.Rank, so we set it here. Setting it
// on read instead of during indexing allows us to avoid a complete reindex.
//
// Prefer "latest_commit_date" over "priority" for ranking. We keep priority for
// backwards compatibility.
if _, ok := repo.RawConfig["latest_commit_date"]; ok {
// We use the number of months since 1970 as a simple measure of repo freshness.
// It is monotonically increasing and stable across re-indexes and restarts.
r.Rank = monthsSince1970(repo.LatestCommitDate)
} else if v, ok := repo.RawConfig["priority"]; ok {
r.priority, err = strconv.ParseFloat(v, 64)
if err != nil {
r.priority = 0
Expand All @@ -645,14 +654,28 @@ func (r *Repository) UnmarshalJSON(data []byte) error {
// based on priority. Setting it on read instead of during indexing
// allows us to avoid a complete reindex.
if r.Rank == 0 && r.priority > 0 {
// Normalize the repo score within [0, 1), with the midpoint at 5,000. This means popular
// repos (roughly ones with over 5,000 stars) see diminishing returns from more stars.
// Normalize the repo score within [0, maxUint16), with the midpoint at 5,000.
// This means popular repos (roughly ones with over 5,000 stars) see diminishing
// returns from more stars.
r.Rank = uint16(r.priority / (5000.0 + r.priority) * maxUInt16)
}
}

return nil
}

// monthsSince1970 returns the number of months since 1970. It returns values in
// the range [0, maxUInt16]. The upper bound is reached in the year 7431, the
// lower bound for all dates before 1970.
func monthsSince1970(t time.Time) uint16 {
base := time.Unix(0, 0)
if t.Before(base) {
return 0
}
months := int(t.Year()-1970)*12 + int(t.Month()-1)
return uint16(min(months, maxUInt16))
}

// MergeMutable will merge x into r. mutated will be true if it made any
// changes. err is non-nil if we needed to mutate an immutable field.
//
Expand Down
26 changes: 26 additions & 0 deletions api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -368,3 +368,29 @@ func TestRepositoryMergeMutable(t *testing.T) {
}
})
}

func TestMonthsSince1970(t *testing.T) {
tests := []struct {
name string
input time.Time
expected uint16
}{
{"Before 1970", time.Date(1950, 12, 31, 0, 0, 0, 0, time.UTC), 0},
{"Unix 0", time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC), 0},
{"Feb 1970", time.Date(1970, 2, 1, 0, 0, 0, 0, time.UTC), 1},
{"Year 1989", time.Date(1989, 12, 13, 0, 0, 0, 0, time.UTC), 239},
{"Sep 2024", time.Date(2024, 9, 20, 0, 0, 0, 0, time.UTC), 656},
{"Oct 2024", time.Date(2024, 10, 20, 0, 0, 0, 0, time.UTC), 657},
{"Apr 7431", time.Date(7431, 4, 1, 0, 0, 0, 0, time.UTC), 65535},
{"9999", time.Date(9999, 0, 0, 0, 0, 0, 0, time.UTC), 65535},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := monthsSince1970(tt.input)
if result != tt.expected {
t.Errorf("expected %d, got %d", tt.expected, result)
}
})
}
}
Loading

0 comments on commit d15aa28

Please sign in to comment.