-
Notifications
You must be signed in to change notification settings - Fork 5
/
convert.py
77 lines (67 loc) · 3.15 KB
/
convert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import json
import os
import argparse
from datetime import datetime
def sanitize_filename(filename):
if filename is None or filename.strip() == "":
return "noname"
invalid_characters = '<>:"/\\|?*\n\t'
for char in invalid_characters:
filename = filename.replace(char, '')
return filename
def get_conversation(node_id, mapping, list, last_author=None):
node = mapping[node_id]
if node.get('message') and 'content' in node['message'] and 'parts' in node['message']['content']:
content_parts = node['message']['content']['parts']
parts_text = []
for part in content_parts:
if isinstance(part, str):
parts_text.append(part)
elif isinstance(part, dict):
parts_text.append(str(part))
if parts_text:
author_role = node['message']['author']['role']
if author_role != "system" and author_role != last_author:
list.append(f"## {author_role}\n{''.join(parts_text)}")
elif author_role != "system":
list.append(f"{''.join(parts_text)}")
last_author = author_role
for child_id in node.get('children', []):
get_conversation(child_id, mapping, list, last_author)
def generate_unique_filename(base_path, title):
version = 0
title = title if title.strip() != "" else "noname"
file_path = os.path.join(base_path, f"{title}.md")
while os.path.exists(file_path):
version += 1
file_path = os.path.join(base_path, f"{title}_v{version}.md")
return file_path
def main(input_file, output_dir, use_date_folders):
if not os.path.isdir(output_dir):
os.makedirs(output_dir)
with open(input_file, 'r', encoding='utf-8') as f:
data = json.loads(f.read())
for item in data:
title = item.get("title")
title = sanitize_filename(title)
root_node_id = next(node_id for node_id, node in item['mapping'].items() if node.get('parent') is None)
list = []
get_conversation(root_node_id, item['mapping'], list)
if use_date_folders:
date_iso = datetime.fromtimestamp(item["create_time"]).date().isoformat()
date_folder = os.path.join(output_dir, date_iso)
if not os.path.isdir(date_folder):
os.makedirs(date_folder)
file_path = generate_unique_filename(date_folder, title)
else:
file_path = generate_unique_filename(output_dir, title)
print(f"Attempting to write to: {file_path}")
with open(file_path, 'w', encoding='utf-8') as outfile:
outfile.write('\n'.join(list))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Process conversation data.')
parser.add_argument('input_file', help='JSON file containing conversations')
parser.add_argument('output_dir', help='Directory to save output Markdown files')
parser.add_argument('--use-date-folders', action='store_true', help='Store files under date-based folders')
args = parser.parse_args()
main(args.input_file, args.output_dir, args.use_date_folders)