From 4e2171de74326efa405126f544156d238777323d Mon Sep 17 00:00:00 2001 From: Eduardo Cuducos <4732915+cuducos@users.noreply.github.com> Date: Mon, 28 Oct 2024 08:46:07 -0400 Subject: [PATCH 1/3] Closes Badger DB properly --- transform/kv.go | 4 ++-- transform/transform.go | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/transform/kv.go b/transform/kv.go index c4976db..3f01635 100644 --- a/transform/kv.go +++ b/transform/kv.go @@ -162,9 +162,9 @@ func (kv *badgerStorage) enrichCompany(c *company) error { return nil } -func (b *badgerStorage) close() error { +func (b *badgerStorage) close(k bool) error { b.db.Close() - if b.path != "" { + if !k && b.path != "" { if err := os.RemoveAll(b.path); err != nil { return fmt.Errorf("error cleaning up badger storage directory: %w", err) } diff --git a/transform/transform.go b/transform/transform.go index 9f9ccfa..227e12d 100644 --- a/transform/transform.go +++ b/transform/transform.go @@ -84,6 +84,7 @@ func runStepOne(dir string, l lookups, isolated bool) (string, error) { if err != nil { return "", fmt.Errorf("could not create badger storage: %w", err) } + defer kv.close(isolated) if err := kv.load(dir, &l); err != nil { return "", fmt.Errorf("error loading data to badger: %w", err) } @@ -98,9 +99,7 @@ func runStepTwo(dir string, tmp string, db database, l lookups, maxParallelDBQue if err != nil { return fmt.Errorf("could not create badger storage: %w", err) } - if !isolated { - defer kv.close() - } + defer kv.close(isolated) j, err := createJSONRecordsTask(dir, db, &l, kv, batchSize, privacy) if err != nil { return fmt.Errorf("error creating new task for venues in %s: %w", dir, err) From fc2e4cc89e162086b16f0abad6be1ec187b4e9a6 Mon Sep 17 00:00:00 2001 From: Eduardo Cuducos <4732915+cuducos@users.noreply.github.com> Date: Mon, 28 Oct 2024 09:38:35 -0400 Subject: [PATCH 2/3] Fixes key-value storage interface --- transform/company_test.go | 4 ++-- transform/kv.go | 2 +- transform/kv_test.go | 6 +++--- transform/transform.go | 2 +- transform/venues_test.go | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/transform/company_test.go b/transform/company_test.go index b3f8ce5..7969e6d 100644 --- a/transform/company_test.go +++ b/transform/company_test.go @@ -117,7 +117,7 @@ func TestNewCompany(t *testing.T) { if err != nil { t.Errorf("expected no error creating badger, got %s", err) } - defer kv.close() + defer kv.close(false) lookups, err := newLookups(testdata) if err != nil { t.Errorf("expected no errors creating look up tables, got %v", err) @@ -273,7 +273,7 @@ func TestNewCompany(t *testing.T) { if err != nil { t.Errorf("expected no error creating badger, got %s", err) } - defer kv.close() + defer kv.close(false) lookups, err := newLookups(testdata) if err != nil { t.Errorf("expected no errors creating look up tables, got %v", err) diff --git a/transform/kv.go b/transform/kv.go index 3f01635..096289d 100644 --- a/transform/kv.go +++ b/transform/kv.go @@ -164,7 +164,7 @@ func (kv *badgerStorage) enrichCompany(c *company) error { func (b *badgerStorage) close(k bool) error { b.db.Close() - if !k && b.path != "" { + if !k { if err := os.RemoveAll(b.path); err != nil { return fmt.Errorf("error cleaning up badger storage directory: %w", err) } diff --git a/transform/kv_test.go b/transform/kv_test.go index 9a62432..642b676 100644 --- a/transform/kv_test.go +++ b/transform/kv_test.go @@ -19,7 +19,7 @@ func TestBadgerStorageClose(t *testing.T) { if err != nil { t.Errorf("expected no error creating badger storage, got %s", err) } - if err := kv.close(); err != nil { + if err := kv.close(false); err != nil { t.Errorf("expected no error closing badger storage, got %s", err) } if _, err := os.Stat(kv.path); err == nil || !os.IsNotExist(err) { @@ -74,7 +74,7 @@ func TestLoad(t *testing.T) { if err != nil { t.Fatalf("could not create badger storage: %s", err) } - defer kv.close() + defer kv.close(false) if err := kv.load(testdata, &l); err != nil { t.Errorf("expected no error loading data, got %s", err) } @@ -103,7 +103,7 @@ func TestEnrichCompany(t *testing.T) { if err != nil { t.Fatalf("could not create badger storage: %s", err) } - defer kv.close() + defer kv.close(false) if err := kv.load(testdata, &l); err != nil { t.Errorf("expected no error loading data, got %s", err) } diff --git a/transform/transform.go b/transform/transform.go index 227e12d..4c9606a 100644 --- a/transform/transform.go +++ b/transform/transform.go @@ -33,7 +33,7 @@ type database interface { type kvStorage interface { load(string, *lookups) error enrichCompany(*company) error - close() error + close(bool) error } type mode int diff --git a/transform/venues_test.go b/transform/venues_test.go index eee0ef1..b0d1a10 100644 --- a/transform/venues_test.go +++ b/transform/venues_test.go @@ -18,7 +18,7 @@ func TestTaskRun(t *testing.T) { if err != nil { t.Errorf("expected no error creating badger, got %s", err) } - defer kv.close() + defer kv.close(false) lookups, err := newLookups(testdata) if err != nil { t.Errorf("expected no errors creating look up tables, got %v", err) From 6628490d25751a98de2974545f24932d54c8a6bf Mon Sep 17 00:00:00 2001 From: Eduardo Cuducos <4732915+cuducos@users.noreply.github.com> Date: Tue, 29 Oct 2024 15:30:46 -0400 Subject: [PATCH 3/3] Refactors Badger usage for `transform` in separated steps --- go.mod | 2 -- transform/company_test.go | 8 ++--- transform/kv.go | 28 +++++++----------- transform/kv_test.go | 15 ++++------ transform/transform.go | 62 ++++++++++++++++++++------------------- transform/venues_test.go | 4 +-- 6 files changed, 55 insertions(+), 64 deletions(-) diff --git a/go.mod b/go.mod index daadcc6..2df1814 100644 --- a/go.mod +++ b/go.mod @@ -49,5 +49,3 @@ require ( google.golang.org/grpc v1.65.0 // indirect google.golang.org/protobuf v1.34.2 // indirect ) - -// +heroku goVersion go1.22 diff --git a/transform/company_test.go b/transform/company_test.go index 7969e6d..ed56083 100644 --- a/transform/company_test.go +++ b/transform/company_test.go @@ -108,7 +108,7 @@ func TestNewCompany(t *testing.T) { } t.Run("with privacy", func(t *testing.T) { - tmp, err := os.MkdirTemp("", fmt.Sprintf("%s-%s", badgerFilePrefix, time.Now().Format("20060102150405"))) + tmp, err := os.MkdirTemp("", fmt.Sprintf("minha-receita-%s-*", time.Now().Format("20060102150405"))) if err != nil { t.Fatal("error creating temporary key-value storage: %w", err) } @@ -117,7 +117,7 @@ func TestNewCompany(t *testing.T) { if err != nil { t.Errorf("expected no error creating badger, got %s", err) } - defer kv.close(false) + defer kv.close() lookups, err := newLookups(testdata) if err != nil { t.Errorf("expected no errors creating look up tables, got %v", err) @@ -264,7 +264,7 @@ func TestNewCompany(t *testing.T) { } }) t.Run("without privacy", func(t *testing.T) { - tmp, err := os.MkdirTemp("", fmt.Sprintf("%s-%s", badgerFilePrefix, time.Now().Format("20060102150405"))) + tmp, err := os.MkdirTemp("", fmt.Sprintf("minha-receita-%s-*", time.Now().Format("20060102150405"))) if err != nil { t.Fatal("error creating temporary key-value storage: %w", err) } @@ -273,7 +273,7 @@ func TestNewCompany(t *testing.T) { if err != nil { t.Errorf("expected no error creating badger, got %s", err) } - defer kv.close(false) + defer kv.close() lookups, err := newLookups(testdata) if err != nil { t.Errorf("expected no errors creating look up tables, got %v", err) diff --git a/transform/kv.go b/transform/kv.go index 096289d..fc1a5b4 100644 --- a/transform/kv.go +++ b/transform/kv.go @@ -162,31 +162,25 @@ func (kv *badgerStorage) enrichCompany(c *company) error { return nil } -func (b *badgerStorage) close(k bool) error { - b.db.Close() - if !k { - if err := os.RemoveAll(b.path); err != nil { - return fmt.Errorf("error cleaning up badger storage directory: %w", err) - } - } - return nil +func (b *badgerStorage) close() error { + return b.db.Close() } -type badgerLogger struct{} +type noLogger struct{} -func (*badgerLogger) Errorf(string, ...interface{}) {} -func (*badgerLogger) Warningf(string, ...interface{}) {} -func (*badgerLogger) Infof(string, ...interface{}) {} -func (*badgerLogger) Debugf(string, ...interface{}) {} +func (*noLogger) Errorf(string, ...interface{}) {} +func (*noLogger) Warningf(string, ...interface{}) {} +func (*noLogger) Infof(string, ...interface{}) {} +func (*noLogger) Debugf(string, ...interface{}) {} func newBadgerStorage(dir string) (*badgerStorage, error) { - var err error - var opt badger.Options + opt := badger.DefaultOptions(dir) if os.Getenv("DEBUG") != "" { log.Output(1, fmt.Sprintf("Creating temporary key-value storage at %s", dir)) + } else { + opt = opt.WithLogger(&noLogger{}) } - opt = badger.DefaultOptions(dir) - db, err := badger.Open(opt.WithLogger(&badgerLogger{})) + db, err := badger.Open(opt) if err != nil { return nil, fmt.Errorf("error creating badger key-value object: %w", err) } diff --git a/transform/kv_test.go b/transform/kv_test.go index 642b676..44e90d7 100644 --- a/transform/kv_test.go +++ b/transform/kv_test.go @@ -10,7 +10,7 @@ import ( ) func TestBadgerStorageClose(t *testing.T) { - tmp, err := os.MkdirTemp("", fmt.Sprintf("%s-%s", badgerFilePrefix, time.Now().Format("20060102150405"))) + tmp, err := os.MkdirTemp("", fmt.Sprintf("minha-receita-%s-*", time.Now().Format("20060102150405"))) if err != nil { t.Fatal("error creating temporary key-value storage: %w", err) } @@ -19,12 +19,9 @@ func TestBadgerStorageClose(t *testing.T) { if err != nil { t.Errorf("expected no error creating badger storage, got %s", err) } - if err := kv.close(false); err != nil { + if err := kv.close(); err != nil { t.Errorf("expected no error closing badger storage, got %s", err) } - if _, err := os.Stat(kv.path); err == nil || !os.IsNotExist(err) { - t.Errorf("expected %s to be gone, but got %s when opening it", kv.path, err) - } } func TestNewItem(t *testing.T) { @@ -65,7 +62,7 @@ func TestLoad(t *testing.T) { if err != nil { t.Fatalf("could not create lookups: %s", err) } - tmp, err := os.MkdirTemp("", fmt.Sprintf("%s-%s", badgerFilePrefix, time.Now().Format("20060102150405"))) + tmp, err := os.MkdirTemp("", fmt.Sprintf("minha-receita-%s-*", time.Now().Format("20060102150405"))) if err != nil { t.Fatal("error creating temporary key-value storage: %w", err) } @@ -74,7 +71,7 @@ func TestLoad(t *testing.T) { if err != nil { t.Fatalf("could not create badger storage: %s", err) } - defer kv.close(false) + defer kv.close() if err := kv.load(testdata, &l); err != nil { t.Errorf("expected no error loading data, got %s", err) } @@ -94,7 +91,7 @@ func TestEnrichCompany(t *testing.T) { if err != nil { t.Fatalf("could not create lookups: %s", err) } - tmp, err := os.MkdirTemp("", fmt.Sprintf("%s-%s", badgerFilePrefix, time.Now().Format("20060102150405"))) + tmp, err := os.MkdirTemp("", fmt.Sprintf("minha-receita-%s-*", time.Now().Format("20060102150405"))) if err != nil { t.Fatal("error creating temporary key-value storage: %w", err) } @@ -103,7 +100,7 @@ func TestEnrichCompany(t *testing.T) { if err != nil { t.Fatalf("could not create badger storage: %s", err) } - defer kv.close(false) + defer kv.close() if err := kv.load(testdata, &l); err != nil { t.Errorf("expected no error loading data, got %s", err) } diff --git a/transform/transform.go b/transform/transform.go index 4c9606a..9044bc7 100644 --- a/transform/transform.go +++ b/transform/transform.go @@ -19,8 +19,6 @@ const ( // BatchSize determines the size of the batches used to create the initial JSON // data in the database. BatchSize = 8192 - - badgerFilePrefix = "minha-receita-badger-" ) type database interface { @@ -33,7 +31,7 @@ type database interface { type kvStorage interface { load(string, *lookups) error enrichCompany(*company) error - close(bool) error + close() error } type mode int @@ -75,31 +73,24 @@ func saveUpdatedAt(db database, dir string) error { return db.MetaSave("updated-at", string(v)) } -func runStepOne(dir string, l lookups, isolated bool) (string, error) { - tmp, err := os.MkdirTemp("", fmt.Sprintf("%s-%s", badgerFilePrefix, time.Now().Format("20060102150405"))) - if err != nil { - return "", fmt.Errorf("error creating temporary key-value storage: %w", err) - } - kv, err := newBadgerStorage(tmp) +func runStepOne(dir string, pth string, l lookups) error { + kv, err := newBadgerStorage(pth) if err != nil { - return "", fmt.Errorf("could not create badger storage: %w", err) + return fmt.Errorf("could not create badger storage: %w", err) } - defer kv.close(isolated) + defer kv.close() if err := kv.load(dir, &l); err != nil { - return "", fmt.Errorf("error loading data to badger: %w", err) - } - if isolated { - fmt.Println(kv.path) + return fmt.Errorf("error loading data to badger: %w", err) } - return kv.path, nil + return nil } -func runStepTwo(dir string, tmp string, db database, l lookups, maxParallelDBQueries, batchSize int, privacy, isolated bool) error { - kv, err := newBadgerStorage(tmp) +func runStepTwo(dir string, pth string, db database, l lookups, maxParallelDBQueries, batchSize int, privacy bool) error { + kv, err := newBadgerStorage(pth) if err != nil { return fmt.Errorf("could not create badger storage: %w", err) } - defer kv.close(isolated) + defer kv.close() j, err := createJSONRecordsTask(dir, db, &l, kv, batchSize, privacy) if err != nil { return fmt.Errorf("error creating new task for venues in %s: %w", dir, err) @@ -112,27 +103,38 @@ func runStepTwo(dir string, tmp string, db database, l lookups, maxParallelDBQue // Transform the downloaded files for company venues creating a database record // per CNPJ -func Transform(dir string, db database, maxParallelDBQueries, batchSize int, privacy, s1 bool, s2 string) error { +func Transform(dir string, db database, max, s int, p, s1 bool, s2 string) error { m, err := transformMode(s1, s2) if err != nil { return fmt.Errorf("error determining transform mode: %w", err) } - var tmp string + var pth string + if m == stepTwo { + pth = s2 + } else { + pth, err = os.MkdirTemp("", fmt.Sprintf("minha-receita-%s-*", time.Now().Format("20060102150405"))) + } + if err != nil { + return fmt.Errorf("error creating temporary key-value storage: %w", err) + } + defer os.RemoveAll(pth) l, err := newLookups(dir) if err != nil { return fmt.Errorf("error creating look up tables from %s: %w", dir, err) } - if m != stepTwo { - tmp, err = runStepOne(dir, l, m == stepOne) - if err != nil { - return fmt.Errorf("error creating key-value storage: %w", err) + switch m { + case stepOne: + if err := runStepOne(dir, pth, l); err != nil { + return err } - } - if m != stepOne { - if s2 != "" { - tmp = s2 + fmt.Println(pth) + case stepTwo: + return runStepTwo(dir, pth, db, l, max, s, p) + case both: + if err := runStepOne(dir, pth, l); err != nil { + return err } - return runStepTwo(dir, tmp, db, l, maxParallelDBQueries, batchSize, privacy, m == stepTwo) + return runStepTwo(dir, pth, db, l, max, s, p) } return nil } diff --git a/transform/venues_test.go b/transform/venues_test.go index b0d1a10..3b216b9 100644 --- a/transform/venues_test.go +++ b/transform/venues_test.go @@ -9,7 +9,7 @@ import ( func TestTaskRun(t *testing.T) { db := newTestDB(t) - tmp, err := os.MkdirTemp("", fmt.Sprintf("%s-%s", badgerFilePrefix, time.Now().Format("20060102150405"))) + tmp, err := os.MkdirTemp("", fmt.Sprintf("minha-receita-%s-*", time.Now().Format("20060102150405"))) if err != nil { t.Fatal("error creating temporary key-value storage: %w", err) } @@ -18,7 +18,7 @@ func TestTaskRun(t *testing.T) { if err != nil { t.Errorf("expected no error creating badger, got %s", err) } - defer kv.close(false) + defer kv.close() lookups, err := newLookups(testdata) if err != nil { t.Errorf("expected no errors creating look up tables, got %v", err)