diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..75ee041 --- /dev/null +++ b/.env.example @@ -0,0 +1 @@ +db_path=data/ \ No newline at end of file diff --git a/.gitignore b/.gitignore index adbb97d..43c96a6 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -data/ \ No newline at end of file +data/ +.env \ No newline at end of file diff --git a/README.md b/README.md index eb48a96..d79a82e 100644 --- a/README.md +++ b/README.md @@ -5,13 +5,14 @@ Go-CaskDB is a disk-based, embedded, persistent, key-value store based on the [R ## Tasks - [x] Get, Set KV using disk as store - [x] Loading data from disk onto memory +- [ ] Support for generic key and values (right now only for strings) - [ ] Proper logging - [ ] need better way of handling bytes (very bad rn) +- [ ] testing and benchmarks - [ ] Crash Safety - [ ] Key Deletion -- [ ] Support for generic key and values (right now only for strings) - [ ] RB-tree to support range scans -- [ ] Split db file into several small files (implement merging compaction) +- [ ] Split db file into several small files (implement merging compaction using go-routines) - [ ] Cache - [ ] Garbage Collector for removing old deleted keys - [ ] Distributed using Paxos or consistent hashing diff --git a/format/decoder.go b/format/decoder.go index aa83c46..9fd2456 100644 --- a/format/decoder.go +++ b/format/decoder.go @@ -11,9 +11,9 @@ func DecodeHeader(buf []byte) (int64, int32, int32) { return int64(timestamp), int32(key_size), int32(value_size) } -func DecodeKeyValue(buf []byte) (int, string, string) { +func DecodeKeyValue(buf []byte) (int64, string, string) { timestamp, key_size, value_size := DecodeHeader(buf[:HEADER_SIZE]) key := string(buf[HEADER_SIZE : HEADER_SIZE+key_size]) value := string(buf[HEADER_SIZE+key_size : HEADER_SIZE+key_size+value_size]) - return int(timestamp), key, value + return timestamp, key, value } diff --git a/format/format_test.go b/format/format_test.go new file mode 100644 index 0000000..9651dde --- /dev/null +++ b/format/format_test.go @@ -0,0 +1,32 @@ +package format + +import ( + "math/rand" + "testing" + + "github.com/stretchr/testify/assert" +) + +func generateRandomHeader() (int64, int32, int32) { + return rand.Int63(), rand.Int31(), rand.Int31() +} + +func TestEncodeAndDecodeHeader(t *testing.T) { + timestamp, key_size, value_size := generateRandomHeader() + encodedHeader := encodeHeader(timestamp, key_size, value_size) + d_timestamp, d_key_size, d_value_size := DecodeHeader(encodedHeader.Bytes()) + assert.Equal(t, timestamp, d_timestamp, "Timestamps are not equal!") + assert.Equal(t, key_size, d_key_size, "Key sizes are not equal!") + assert.Equal(t, value_size, d_value_size, "Value sizes are not equal!") +} + +func TestEncodeAndDecodeKeyValue(t *testing.T) { + timestamp := int64(rand.Int63()) + key := "name" + value := "abeshek" + _, buf := EncodeKeyValue(timestamp, key, value) + d_timestamp, d_key, d_value := DecodeKeyValue(buf) + assert.Equal(t, timestamp, d_timestamp, "Timestamps are not equal!") + assert.Equal(t, key, d_key, "Keys are not equal!") + assert.Equal(t, value, d_value, "Values are not equal!") +} diff --git a/go.mod b/go.mod index 2887099..3f1d0f4 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,11 @@ module github.com/abesheknarayan/go-caskdb go 1.18 + +require ( + github.com/davecgh/go-spew v1.1.0 // indirect + github.com/joho/godotenv v1.4.0 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/stretchr/testify v1.7.1 // indirect + gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..da72bf5 --- /dev/null +++ b/go.sum @@ -0,0 +1,12 @@ +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/joho/godotenv v1.4.0 h1:3l4+N6zfMWnkbPEXKng2o2/MR5mSwTrBih4ZEkkz1lg= +github.com/joho/godotenv v1.4.0/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/main.go b/main.go index 18de634..04ea185 100644 --- a/main.go +++ b/main.go @@ -2,19 +2,27 @@ package main import ( "fmt" + "log" + "os" "github.com/abesheknarayan/go-caskdb/stores" + "github.com/joho/godotenv" ) func main() { - booksDb, err := stores.InitDb("books") + err := godotenv.Load(".env") + if err != nil { + log.Fatalf("failed to load env file") + } + path := os.Getenv("DB_PATH") + booksDb, err := stores.InitDb("test", path) if err != nil { fmt.Println(err) } booksDb.Set("name", "abeshek") fmt.Println(booksDb.Get("name")) - booksDb.Set("movie", "top gun") + booksDb.Set("movie", "top gun maverick") fmt.Println(booksDb.Get("movie")) - booksDb.CloseDB() + // booksDb.Cleanup() } diff --git a/run_tests.sh b/run_tests.sh new file mode 100644 index 0000000..7f41414 --- /dev/null +++ b/run_tests.sh @@ -0,0 +1 @@ +go test ./format ./stores \ No newline at end of file diff --git a/stores/disk_store.go b/stores/disk_store.go index 6cfdc5a..ef6cb5b 100644 --- a/stores/disk_store.go +++ b/stores/disk_store.go @@ -29,14 +29,14 @@ type DiskStore struct { } // creates a new db and returns the object ref -func InitDb(dbName string) (*DiskStore, error) { +func InitDb(dbName string, path string) (*DiskStore, error) { // if db is already present load it or else create new db - fileName := fmt.Sprintf("data/%s.db", dbName) + fileName := fmt.Sprintf("%s/%s.db", path, dbName) if _, err := os.Stat(fileName); errors.Is(err, os.ErrNotExist) { fmt.Println("file doesn't exist !!") - return createDB(dbName) + return createDB(dbName, path) } // open file in binary + append mode @@ -58,12 +58,14 @@ func InitDb(dbName string) (*DiskStore, error) { } // create new file -func createDB(dbName string) (*DiskStore, error) { - filename := fmt.Sprintf("data/%s.db", dbName) +func createDB(dbName string, path string) (*DiskStore, error) { + // path := + filename := fmt.Sprintf("%s/%s.db", path, dbName) fmt.Printf("creating new file %s\n", filename) f, err := os.Create(filename) if err != nil { + fmt.Println("here") return nil, err } @@ -155,6 +157,15 @@ func (d *DiskStore) Get(key string) string { return value } +// clears the db file and hash index +func (d *DiskStore) Cleanup() { + d.currentByteOffsetPosition = 0 + for k := range d.hashIndex { + delete(d.hashIndex, k) + } + os.Remove(d.filename) +} + func (d *DiskStore) CloseDB() { d.file.Sync() d.file.Close() diff --git a/stores/disk_store_test.go b/stores/disk_store_test.go new file mode 100644 index 0000000..bc3bb42 --- /dev/null +++ b/stores/disk_store_test.go @@ -0,0 +1,67 @@ +package stores + +import ( + "fmt" + "log" + "os" + "testing" + + "github.com/stretchr/testify/assert" +) + +var db *DiskStore +var tempDir string // for storing db + +func TestGet(t *testing.T) { + value := "pro tester" + db.Set("name", value) + assert.Equal(t, value, db.Get("name"), "Values are not equal!!") +} + +func TestInvalidKey(t *testing.T) { + // subject to change in future + assert.Equal(t, "", db.Get("random_key")) +} + +func TestPersistance(t *testing.T) { + db.Set("football", "cr7") + db.CloseDB() + var err error + db, err = InitDb("testdb", tempDir) + if err != nil { + t.Fatalf(err.Error()) + } + assert.Equal(t, "cr7", db.Get("football"), "Persistance failure!") +} + +func TestDbCleaup(t *testing.T) { + db.Set("name", "God") + db.CloseDB() + db.Cleanup() + assert.Equal(t, "", db.Get("name"), "Expected empty value") +} + +func setupTests(t *testing.T) { + fmt.Println("running setup") + tempDir = t.TempDir() + fmt.Println(tempDir) + var err error + db, err = InitDb("testdb", tempDir) + if err != nil { + t.Fatalf(err.Error()) + } +} + +func cleanupTests() { + log.Println("Cleaning up tests") + db.CloseDB() + db.Cleanup() +} + +// all setup is done here as this runs first, call all tests from here +func TestMain(m *testing.M) { + setupTests(&testing.T{}) + exit := m.Run() + cleanupTests() + os.Exit(exit) +}