Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix parse until more #22

Merged
merged 2 commits into from
Aug 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 25 additions & 3 deletions main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,9 +293,10 @@ func TestUpdatedLog(t *testing.T) {
os.Args = []string{"ngtop"}
_, spec := querySpecFromCLI()

previousOffset, err := logFile.Write([]byte(`xx.xx.xx.xx - - [24/Jul/2024:00:00:28 +0000] "GET /feed HTTP/1.1" 301 169 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
bytesWritten, err := logFile.Write([]byte(`xx.xx.xx.xx - - [24/Jul/2024:00:00:28 +0000] "GET /feed HTTP/1.1" 301 169 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
xx.xx.xx.xx - - [24/Jul/2024:00:00:30 +0000] "GET /feed HTTP/1.1" 301 169 "-" "feedi/0.1.0 (+https://github.com/facundoolano/feedi)"`))
assertEqual(t, err, nil)
previousOffset := bytesWritten

err = loadLogs(parser, logFile.Name(), dbs)
assertEqual(t, err, nil)
Expand All @@ -304,16 +305,37 @@ xx.xx.xx.xx - - [24/Jul/2024:00:00:30 +0000] "GET /feed HTTP/1.1" 301 169 "-" "f
assertEqual(t, rows[0][0], "2")

// append more logs to file
_, err = logFile.WriteAt([]byte(`xx.xx.xx.xx - - [24/Jul/2024:00:00:56 +0000] "GET /blog/deconstructing-the-role-playing-videogame/ HTTP/1.1" 200 14224 "-" "feedi/0.1.0 (+https://github.com/facundoolano/feedi)"
bytesWritten, err = logFile.WriteAt([]byte(`
xx.xx.xx.xx - - [24/Jul/2024:00:00:56 +0000] "GET /blog/deconstructing-the-role-playing-videogame/ HTTP/1.1" 200 14224 "-" "feedi/0.1.0 (+https://github.com/facundoolano/feedi)"
xx.xx.xx.xx - - [24/Jul/2024:00:01:18 +0000] "GET /feed.xml HTTP/1.1" 200 9641 "https://olano.dev/feed.xml" "FreshRSS/1.24.0 (Linux; https://freshrss.org)"`), int64(previousOffset))
assertEqual(t, err, nil)
previousOffset += bytesWritten

// run again with more entries and expect to see new requests
err = loadLogs(parser, logFile.Name(), dbs)
assertEqual(t, err, nil)
_, rows, err = dbs.QueryTop(spec)
assertEqual(t, err, nil)
assertEqual(t, rows[0][0], "4")

// run again and expect to see new requests
// run again without more entries, count should be the same
err = loadLogs(parser, logFile.Name(), dbs)
assertEqual(t, err, nil)
_, rows, err = dbs.QueryTop(spec)
assertEqual(t, err, nil)
assertEqual(t, rows[0][0], "4")

// append another one with the same date as the previous last one
_, err = logFile.WriteAt([]byte(`
xx.xx.xx.xx - - [24/Jul/2024:00:01:18 +0000] "GET /blog/deconstructing-the-role-playing-videogame/ HTTP/1.1" 200 14224 "-" "feedi/0.1.0 (+https://github.com/facundoolano/feedi)"`), int64(previousOffset))
assertEqual(t, err, nil)

// check that the new request is added even though it has the same date as the cut out one
err = loadLogs(parser, logFile.Name(), dbs)
assertEqual(t, err, nil)
_, rows, err = dbs.QueryTop(spec)
assertEqual(t, err, nil)
assertEqual(t, rows[0][0], "5")
}

// ------ HELPERS --------
Expand Down
13 changes: 13 additions & 0 deletions ngtop/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,23 @@ func (dbs *DBSession) Close() {

// Prepare a transaction to insert a new batch of log entries, returning the time of the last seen log entry.
func (dbs *DBSession) PrepareForUpdate() (*time.Time, error) {
// we want to avoid processed files that were already processed in the past. but we still want to add new log entries
// from the most recent files, which may have been extended since we last saw them.
// Since there is no "uniqueness" in logs (even the same ip can make the same request at the same second ---I checked),
// I remove the entries with the highest timestamp, and load everything up until including that timestamp but not older.
// The assumption is that any processing was completely finished, not interrupted.

var lastSeenTimeStr string
var lastSeemTime *time.Time
// this query error is acceptable in case of db not exists or empty
if err := dbs.db.QueryRow("SELECT max(time) FROM access_logs").Scan(&lastSeenTimeStr); err == nil {
query := "DELETE FROM access_logs WHERE time = ?"
_, err := dbs.db.Exec(query, lastSeenTimeStr)
log.Printf("query: %s %s\n", query, lastSeenTimeStr)
if err != nil {
return nil, err
}

t, _ := time.Parse(DB_DATE_LAYOUT, lastSeenTimeStr)
lastSeemTime = &t
}
Expand Down
4 changes: 2 additions & 2 deletions ngtop/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ func (parser LogParser) Parse(
}

if untilStr != "" && values["time"] < untilStr {
// if this file contains entries older than the untilStr, it means we already parsed part of it before
// since the files contains oldest entries at the beginning, we need to keep parsing until the end to get
// If this file contains entries older than the untilStr, it means we already parsed part of it before.
// Since the files contains oldest entries at the beginning, we need to keep parsing until the end to get
// all the updates, but we flag it as already seen so we skip parsing newer ones
alreadySeenFile = true
continue
Expand Down