-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathutils.py
40 lines (35 loc) · 996 Bytes
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import gc
import json
import string
import orjson
import torch
import pickle
import shutil
import time
from tqdm import tqdm
import multiprocessing
from pathlib import Path
from termcolor import colored
class IO:
@staticmethod
def is_valid_file(filepath):
filepath = Path(filepath)
return filepath.exists() and filepath.stat().st_size > 0
def load(path):
raise NotImplementedError
def dump(data, path):
raise NotImplementedError
class JsonLine(IO):
@staticmethod
def load(path, use_tqdm=False):
with open(path) as rf:
lines = rf.read().splitlines()
if use_tqdm:
lines = tqdm(lines, ncols=100, desc='Load JsonLine')
return [json.loads(l) for l in lines]
@staticmethod
def dump(instances, path):
assert type(instances) == list
lines = [json.dumps(d, ensure_ascii=False) for d in instances]
with open(path, 'w') as wf:
wf.write('\n'.join(lines))