Skip to content

Commit

Permalink
Merge pull request #60 from cedadev/text-file-performance
Browse files Browse the repository at this point in the history
Improve text_file performance
  • Loading branch information
rhysrevans3 authored Jan 8, 2024
2 parents 4efc9ea + 6273a0f commit b8c3556
Showing 1 changed file with 15 additions and 3 deletions.
18 changes: 15 additions & 3 deletions stac_generator/plugins/inputs/text_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@


import json
from datetime import datetime
from os import listdir
from os.path import isdir, isfile, join

Expand Down Expand Up @@ -55,8 +56,19 @@ def __init__(self, **kwargs):

def run(self, generator: BaseGenerator):

start = datetime.now()
total_generated = 0
unique_lines = set()

for file in self.file_list:
with open(file) as f:
with open(file, "r", encoding="utf-8") as f:
for line in f:
data = json.loads(line)
generator.process(**data)
if line not in unique_lines:
total_generated += 1
unique_lines.add(line)
data = json.loads(line)
generator.process(**data)

end = datetime.now()
print(f"Processed {total_generated} elasticsearch records in {end-start}")

0 comments on commit b8c3556

Please sign in to comment.