diff --git a/main_test.go b/main_test.go index ac9c866..fa30a35 100644 --- a/main_test.go +++ b/main_test.go @@ -293,9 +293,10 @@ func TestUpdatedLog(t *testing.T) { os.Args = []string{"ngtop"} _, spec := querySpecFromCLI() - previousOffset, err := logFile.Write([]byte(`xx.xx.xx.xx - - [24/Jul/2024:00:00:28 +0000] "GET /feed HTTP/1.1" 301 169 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36" + bytesWritten, err := logFile.Write([]byte(`xx.xx.xx.xx - - [24/Jul/2024:00:00:28 +0000] "GET /feed HTTP/1.1" 301 169 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36" xx.xx.xx.xx - - [24/Jul/2024:00:00:30 +0000] "GET /feed HTTP/1.1" 301 169 "-" "feedi/0.1.0 (+https://github.com/facundoolano/feedi)"`)) assertEqual(t, err, nil) + previousOffset := bytesWritten err = loadLogs(parser, logFile.Name(), dbs) assertEqual(t, err, nil) @@ -304,16 +305,37 @@ xx.xx.xx.xx - - [24/Jul/2024:00:00:30 +0000] "GET /feed HTTP/1.1" 301 169 "-" "f assertEqual(t, rows[0][0], "2") // append more logs to file - _, err = logFile.WriteAt([]byte(`xx.xx.xx.xx - - [24/Jul/2024:00:00:56 +0000] "GET /blog/deconstructing-the-role-playing-videogame/ HTTP/1.1" 200 14224 "-" "feedi/0.1.0 (+https://github.com/facundoolano/feedi)" + bytesWritten, err = logFile.WriteAt([]byte(` +xx.xx.xx.xx - - [24/Jul/2024:00:00:56 +0000] "GET /blog/deconstructing-the-role-playing-videogame/ HTTP/1.1" 200 14224 "-" "feedi/0.1.0 (+https://github.com/facundoolano/feedi)" xx.xx.xx.xx - - [24/Jul/2024:00:01:18 +0000] "GET /feed.xml HTTP/1.1" 200 9641 "https://olano.dev/feed.xml" "FreshRSS/1.24.0 (Linux; https://freshrss.org)"`), int64(previousOffset)) assertEqual(t, err, nil) + previousOffset += bytesWritten + + // run again with more entries and expect to see new requests + err = loadLogs(parser, logFile.Name(), dbs) + assertEqual(t, err, nil) + _, rows, err = dbs.QueryTop(spec) + assertEqual(t, err, nil) + assertEqual(t, rows[0][0], "4") - // run again and expect to see new requests + // run again without more entries, count should be the same err = loadLogs(parser, logFile.Name(), dbs) assertEqual(t, err, nil) _, rows, err = dbs.QueryTop(spec) assertEqual(t, err, nil) assertEqual(t, rows[0][0], "4") + + // append another one with the same date as the previous last one + _, err = logFile.WriteAt([]byte(` +xx.xx.xx.xx - - [24/Jul/2024:00:01:18 +0000] "GET /blog/deconstructing-the-role-playing-videogame/ HTTP/1.1" 200 14224 "-" "feedi/0.1.0 (+https://github.com/facundoolano/feedi)"`), int64(previousOffset)) + assertEqual(t, err, nil) + + // check that the new request is added even though it has the same date as the cut out one + err = loadLogs(parser, logFile.Name(), dbs) + assertEqual(t, err, nil) + _, rows, err = dbs.QueryTop(spec) + assertEqual(t, err, nil) + assertEqual(t, rows[0][0], "5") } // ------ HELPERS -------- diff --git a/ngtop/db.go b/ngtop/db.go index fb293cc..24bac45 100644 --- a/ngtop/db.go +++ b/ngtop/db.go @@ -64,10 +64,23 @@ func (dbs *DBSession) Close() { // Prepare a transaction to insert a new batch of log entries, returning the time of the last seen log entry. func (dbs *DBSession) PrepareForUpdate() (*time.Time, error) { + // we want to avoid processed files that were already processed in the past. but we still want to add new log entries + // from the most recent files, which may have been extended since we last saw them. + // Since there is no "uniqueness" in logs (even the same ip can make the same request at the same second ---I checked), + // I remove the entries with the highest timestamp, and load everything up until including that timestamp but not older. + // The assumption is that any processing was completely finished, not interrupted. + var lastSeenTimeStr string var lastSeemTime *time.Time // this query error is acceptable in case of db not exists or empty if err := dbs.db.QueryRow("SELECT max(time) FROM access_logs").Scan(&lastSeenTimeStr); err == nil { + query := "DELETE FROM access_logs WHERE time = ?" + _, err := dbs.db.Exec(query, lastSeenTimeStr) + log.Printf("query: %s %s\n", query, lastSeenTimeStr) + if err != nil { + return nil, err + } + t, _ := time.Parse(DB_DATE_LAYOUT, lastSeenTimeStr) lastSeemTime = &t } diff --git a/ngtop/parser.go b/ngtop/parser.go index f463696..cd83062 100644 --- a/ngtop/parser.go +++ b/ngtop/parser.go @@ -101,8 +101,8 @@ func (parser LogParser) Parse( } if untilStr != "" && values["time"] < untilStr { - // if this file contains entries older than the untilStr, it means we already parsed part of it before - // since the files contains oldest entries at the beginning, we need to keep parsing until the end to get + // If this file contains entries older than the untilStr, it means we already parsed part of it before. + // Since the files contains oldest entries at the beginning, we need to keep parsing until the end to get // all the updates, but we flag it as already seen so we skip parsing newer ones alreadySeenFile = true continue