Skip to content

Commit

Permalink
Fix parse until more (#22)
Browse files Browse the repository at this point in the history
* add more failing test scenarios

* restore truncation
  • Loading branch information
facundoolano committed Aug 17, 2024
1 parent cf6507b commit 2ae0716
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 5 deletions.
28 changes: 25 additions & 3 deletions main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,9 +293,10 @@ func TestUpdatedLog(t *testing.T) {
os.Args = []string{"ngtop"}
_, spec := querySpecFromCLI()

previousOffset, err := logFile.Write([]byte(`xx.xx.xx.xx - - [24/Jul/2024:00:00:28 +0000] "GET /feed HTTP/1.1" 301 169 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
bytesWritten, err := logFile.Write([]byte(`xx.xx.xx.xx - - [24/Jul/2024:00:00:28 +0000] "GET /feed HTTP/1.1" 301 169 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
xx.xx.xx.xx - - [24/Jul/2024:00:00:30 +0000] "GET /feed HTTP/1.1" 301 169 "-" "feedi/0.1.0 (+https://github.com/facundoolano/feedi)"`))
assertEqual(t, err, nil)
previousOffset := bytesWritten

err = loadLogs(parser, logFile.Name(), dbs)
assertEqual(t, err, nil)
Expand All @@ -304,16 +305,37 @@ xx.xx.xx.xx - - [24/Jul/2024:00:00:30 +0000] "GET /feed HTTP/1.1" 301 169 "-" "f
assertEqual(t, rows[0][0], "2")

// append more logs to file
_, err = logFile.WriteAt([]byte(`xx.xx.xx.xx - - [24/Jul/2024:00:00:56 +0000] "GET /blog/deconstructing-the-role-playing-videogame/ HTTP/1.1" 200 14224 "-" "feedi/0.1.0 (+https://github.com/facundoolano/feedi)"
bytesWritten, err = logFile.WriteAt([]byte(`
xx.xx.xx.xx - - [24/Jul/2024:00:00:56 +0000] "GET /blog/deconstructing-the-role-playing-videogame/ HTTP/1.1" 200 14224 "-" "feedi/0.1.0 (+https://github.com/facundoolano/feedi)"
xx.xx.xx.xx - - [24/Jul/2024:00:01:18 +0000] "GET /feed.xml HTTP/1.1" 200 9641 "https://olano.dev/feed.xml" "FreshRSS/1.24.0 (Linux; https://freshrss.org)"`), int64(previousOffset))
assertEqual(t, err, nil)
previousOffset += bytesWritten

// run again with more entries and expect to see new requests
err = loadLogs(parser, logFile.Name(), dbs)
assertEqual(t, err, nil)
_, rows, err = dbs.QueryTop(spec)
assertEqual(t, err, nil)
assertEqual(t, rows[0][0], "4")

// run again and expect to see new requests
// run again without more entries, count should be the same
err = loadLogs(parser, logFile.Name(), dbs)
assertEqual(t, err, nil)
_, rows, err = dbs.QueryTop(spec)
assertEqual(t, err, nil)
assertEqual(t, rows[0][0], "4")

// append another one with the same date as the previous last one
_, err = logFile.WriteAt([]byte(`
xx.xx.xx.xx - - [24/Jul/2024:00:01:18 +0000] "GET /blog/deconstructing-the-role-playing-videogame/ HTTP/1.1" 200 14224 "-" "feedi/0.1.0 (+https://github.com/facundoolano/feedi)"`), int64(previousOffset))
assertEqual(t, err, nil)

// check that the new request is added even though it has the same date as the cut out one
err = loadLogs(parser, logFile.Name(), dbs)
assertEqual(t, err, nil)
_, rows, err = dbs.QueryTop(spec)
assertEqual(t, err, nil)
assertEqual(t, rows[0][0], "5")
}

// ------ HELPERS --------
Expand Down
13 changes: 13 additions & 0 deletions ngtop/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,23 @@ func (dbs *DBSession) Close() {

// Prepare a transaction to insert a new batch of log entries, returning the time of the last seen log entry.
func (dbs *DBSession) PrepareForUpdate() (*time.Time, error) {
// we want to avoid processed files that were already processed in the past. but we still want to add new log entries
// from the most recent files, which may have been extended since we last saw them.
// Since there is no "uniqueness" in logs (even the same ip can make the same request at the same second ---I checked),
// I remove the entries with the highest timestamp, and load everything up until including that timestamp but not older.
// The assumption is that any processing was completely finished, not interrupted.

var lastSeenTimeStr string
var lastSeemTime *time.Time
// this query error is acceptable in case of db not exists or empty
if err := dbs.db.QueryRow("SELECT max(time) FROM access_logs").Scan(&lastSeenTimeStr); err == nil {
query := "DELETE FROM access_logs WHERE time = ?"
_, err := dbs.db.Exec(query, lastSeenTimeStr)
log.Printf("query: %s %s\n", query, lastSeenTimeStr)
if err != nil {
return nil, err
}

t, _ := time.Parse(DB_DATE_LAYOUT, lastSeenTimeStr)
lastSeemTime = &t
}
Expand Down
4 changes: 2 additions & 2 deletions ngtop/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ func (parser LogParser) Parse(
}

if untilStr != "" && values["time"] < untilStr {
// if this file contains entries older than the untilStr, it means we already parsed part of it before
// since the files contains oldest entries at the beginning, we need to keep parsing until the end to get
// If this file contains entries older than the untilStr, it means we already parsed part of it before.
// Since the files contains oldest entries at the beginning, we need to keep parsing until the end to get
// all the updates, but we flag it as already seen so we skip parsing newer ones
alreadySeenFile = true
continue
Expand Down

0 comments on commit 2ae0716

Please sign in to comment.