-
Notifications
You must be signed in to change notification settings - Fork 2
/
awesome_list_generator.py
132 lines (110 loc) · 5.44 KB
/
awesome_list_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
from dotenv import load_dotenv
from section_data_extractor import SectionDataExtractor, GithubMode
from section_markdown_generator import SectionMarkdownGenerator
from utils import save_markdown
load_dotenv()
class AwesomeListGenerator:
"""
A class used to generate a markdown awesome list for a specific keyword.
...
Attributes
----------
keyword : str
the keyword for which the awesome list will be generated. This is needed to fetch the data from the data sources,
for example, the GitHub API requires a keyword to search for repositories projects
The keyword is also used to generate the title of the awesome list in the markdown file
description : str
a description related to the keyword
model : str
the OpenAI model to be used for generating the markdown (default is "gpt-3.5-turbo-16k")
data_extraction_batch_size : int
the number of data items to process in each batch (default is 20)
For example, if the batch size is 10, then the data will be fetched from the data sources in batches of 10 (like 10 github projects at a time)
number_of_results : int
the number of results to fetch from each data source (default is 20). For example, fetch 20 github projects then process them with LLM model in batches based on data_extraction_batch_size.
section_data_extractor : SectionDataExtractor
an object of SectionDataExtractor to extract the data for each section
section_generator : SectionMarkdownGenerator
an object of SectionMarkdownGenerator to generate the markdown for each section from the extracted data
Methods
-------
save_and_return_awesome_list():
Generates and saves the awesome list into a markdown file, and returns the markdown content
"""
def __init__(
self,
keyword: str,
description: str,
model: str = "gpt-3.5-turbo-16k",
data_extraction_batch_size: int = 10,
number_of_results: int = 20,
github_mode: GithubMode = GithubMode.REPO
):
"""
Constructs all the necessary attributes for the AwesomeListGenerator object.
Parameters
----------
keyword : str
the keyword for which the awesome list will be generated
description : str
a description related to the keyword
model : str
the OpenAI model to be used for generating the markdown (default is "gpt-3.5-turbo-16k")
data_extraction_batch_size : int
the number of data items to process in each batch (default is 10)
number_of_results : int
the number of results to fetch from each data source (default is 20)
github_mode : GithubMode
the mode to use for fetching data from GitHub (default is GithubMode.REPO)
"""
self.keyword = keyword
self.description = description
self.model = model
self.data_extraction_batch_size = data_extraction_batch_size
self.section_data_extractor = SectionDataExtractor(
keyword=keyword, description=description, num_results=number_of_results, github_mode=github_mode
)
self.section_generator = SectionMarkdownGenerator(model)
def save_and_return_awesome_list(self) -> tuple[str, dict[str, float]]:
"""
Generates and saves the awesome list into a markdown file, and returns the markdown content.
Returns
-------
str
a string representing the content of the awesome list in markdown format
"""
data_types_info = self.section_data_extractor.get_data()
markdown_contents, total_tokens = self.section_generator.generate_markdown(
data_types_info, batch_size=self.data_extraction_batch_size
)
merged_markdown = self._merge_markdown_contents(markdown_contents)
save_markdown(f"{self.keyword}.md", merged_markdown)
usage_info = {"total_tokens": total_tokens}
return merged_markdown, usage_info
def _merge_markdown_contents(self, markdown_contents: dict[str, str]) -> str:
"""
Merges the markdown contents of all sections into one markdown, adds a main title, a description,
and a table of contents.
Parameters
----------
markdown_contents : dict[str, str]
a dictionary mapping each section to its corresponding markdown content
Returns
-------
str
a string representing the merged markdown content
"""
markdown = f"# Awesome {self.keyword}\n\n"
markdown += f"{self.description}\n\n"
markdown += "## Table of Contents\n\n"
for key in markdown_contents.keys():
markdown += f"- [{key}](#{key.lower().replace(' ', '-')})\n"
markdown += "\n"
for key, value in markdown_contents.items():
markdown += f"## {key}\n\n"
markdown += value + "\n"
# Advertisement for the project
markdown += "---\n\n"
markdown += "This initial version of the Awesome List was generated with the help of the [Awesome List Generator](https://github.com/alialsaeedi19/GPT-Awesome-List-Maker). "
markdown += "It's an open-source Python package that uses the power of GPT models to automatically curate and generate starting points for resource lists related to a specific topic. "
return markdown