diff --git a/.github/workflows/build-docker-image.yml b/.github/workflows/build_image.yml similarity index 100% rename from .github/workflows/build-docker-image.yml rename to .github/workflows/build_image.yml diff --git a/.github/workflows/publish-pypi-package.yml b/.github/workflows/publish_package.yml similarity index 100% rename from .github/workflows/publish-pypi-package.yml rename to .github/workflows/publish_package.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 1dad5f69..85b5e9b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,17 @@ All notable changes to the *readme-ai* project will be documented in this file. --- +## [v0.1.2] - *2023-09-25* + +### 🔐 Security + +- Implement custom directory tree method using pure Python [#53](https://github.com/eli64s/readme-ai/pull/53) + - Removes dependency on the tree command line tool. + - Improves security by removing the subprocess module. + - More details on these risk can be found [here](https://bandit.readthedocs.io/en/latest/plugins/b607_start_process_with_partial_path.html#b607-start-process-with-partial-path) + +--- + ## [v0.1.1] - *2023-09-24* ### 🚀 Features diff --git a/README.md b/README.md index 97c4c2c2..2bce1ee1 100644 --- a/README.md +++ b/README.md @@ -399,6 +399,7 @@ To generate a *README.md* file, use the `readmeai` command in your terminal, alo | Short Flag | Long Flag | Description | Status | |------------|----------------|---------------------------------------------------|--------------| | `-k` | `--api-key` | Your OpenAI API secret key. | Optional | +| `-c` | `--encoding` | Encodings specify how text is converted into tokens.| Optional | | `-e` | `--engine` | OpenAI GPT language model engine (gpt-3.5-turbo) | Optional | | `-f` | `--offline-mode`| Run offline without calling the OpenAI API. | Optional | | `-o` | `--output` | The output path for your README.md file. | Optional | diff --git a/poetry.lock b/poetry.lock index f27b3d1e..c8818bf5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -154,6 +154,19 @@ files = [ {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, ] +[[package]] +name = "asyncio" +version = "3.4.3" +description = "reference implementation of PEP 3156" +optional = false +python-versions = "*" +files = [ + {file = "asyncio-3.4.3-cp33-none-win32.whl", hash = "sha256:b62c9157d36187eca799c378e572c969f0da87cd5fc42ca372d92cdb06e7e1de"}, + {file = "asyncio-3.4.3-cp33-none-win_amd64.whl", hash = "sha256:c46a87b48213d7464f22d9a497b9eef8c1928b68320a2fa94240f969f6fec08c"}, + {file = "asyncio-3.4.3-py3-none-any.whl", hash = "sha256:c4d18b22701821de07bd6aea8b53d21449ec0ec5680645e5317062ea21817d2d"}, + {file = "asyncio-3.4.3.tar.gz", hash = "sha256:83360ff8bc97980e4ff25c964c7bd3923d333d177aa4f7fb736b019f26c7cb41"}, +] + [[package]] name = "attrs" version = "23.1.0" @@ -1581,4 +1594,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.8.1" -content-hash = "c358cb0829781affc50e8638048a3210fc5308e4d55aab86774b3fda61c0d3cb" +content-hash = "8d6c464acf3f7bf05c14a53cb28ee12a5d6c522cfde5b1847165fbdc871e6e63" diff --git a/pyproject.toml b/pyproject.toml index a3898509..f7046197 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "readmeai" -version = "0.3.083" +version = "0.3.084" description = "🚀 Generate beautiful README.md files from the terminal. Powered by OpenAI's GPT LLMs 💫" authors = ["Eli <0x.eli.64s@gmail.com>"] license = "MIT" @@ -12,26 +12,26 @@ readme = "README.md" homepage = "https://github.com/eli64s/readme-ai" documentation = "https://github.com/eli64s/readme-ai/blob/main/README.md" keywords = [ - 'python', - 'markdown', - 'readme', - 'documentation', - 'ai', - 'readme-badges', - 'openai', - 'readme-template', - 'shieldsio', - 'readme-md', - 'awesome-readme', - 'readme-generator', - 'readme-generation', - 'gpt-3', - 'openai-api', - 'automated-readme', - 'auto-readme', - 'readme-md-generator', - 'gpt-4', - 'llm-agent' + "python", + "cli", + "readme", + "ai", + "readme-badges", + "openai", + "language-model", + "hacktoberfest", + "readme-md", + "readme-generator", + "readme-generation", + "gpt-3", + "openai-api", + "automated-readme", + "gpt-4", + "chatgpt", + "openaiapi", + "gpt-35-turbo", + "llm-agent", + "openai-api-python" ] include = ["readmeai", "readmeai.*"] @@ -55,6 +55,8 @@ toml = "^0.10.2" pydantic = "^1.10.9" click = "^8.1.6" tornado = "^6.3.3" +asyncio = "^3.4.3" +aiohttp = "^3.8.5" [tool.poetry.dev-dependencies] black = "*" @@ -65,6 +67,7 @@ pytest-cov = "*" pre-commit = "*" [tool.isort] +profile = "black" line_length = 88 known_third_party = ["tenacity"] multi_line_output = 3 diff --git a/readmeai/builder.py b/readmeai/builder.py index f449c99d..17389a59 100644 --- a/readmeai/builder.py +++ b/readmeai/builder.py @@ -12,14 +12,14 @@ logger = logger.Logger(__name__) -def build_markdown_file( +def build_readme_file( config: conf.AppConfig, helper: conf.ConfigHelper, packages: list, - summaries: tuple, + code_summary: tuple, ) -> None: """Builds the README Markdown file for your codebase.""" - readme_sections = create_markdown_sections(config, helper, packages, summaries) + readme_sections = build_markdown_sections(config, helper, packages, code_summary) readme_file = "\n".join(readme_sections) readme_path = Path(config.paths.readme) @@ -28,11 +28,11 @@ def build_markdown_file( logger.info(f"README file generated at: {readme_path}") -def create_markdown_sections( +def build_markdown_sections( config: conf.AppConfig, helper: conf.ConfigHelper, packages: list, - summaries: tuple, + code_summary: tuple, ) -> List[str]: """Constructs each section of the README file.""" name = config.git.name @@ -51,10 +51,10 @@ def create_markdown_sections( else markdown_badges ) - markdown_setup_guide = create_setup_guide(config, helper, summaries) + markdown_setup_guide = create_setup_guide(config, helper, code_summary) - if not config.api.offline_mode: - tables = create_markdown_tables(summaries) + if config.api.offline_mode is False: + tables = create_markdown_tables(config.md.default, code_summary) config.md.tables = create_tables(tables, config.md.dropdown, user_repo) markdown_sections = [ @@ -145,11 +145,17 @@ def create_setup_guide( return (default_install_command, default_run_command, default_test_command) -def create_markdown_tables(summaries: Tuple[str, str]) -> List[Tuple[str, str]]: - """Formats the generated code summaries into a list.""" +def create_markdown_tables( + placeholder: str, code_summary: Tuple[str, str] +) -> List[Tuple[str, str]]: + """Formats the generated code code_summary into a list.""" summary_list = [] - for module, summary in summaries: - summary_list.append((module, summary)) + for summary in code_summary: + if isinstance(summary, tuple) and len(summary) == 2: + module, summary_text = summary + else: + module, summary_text = summary, placeholder + summary_list.append((module, summary_text)) return summary_list @@ -203,7 +209,7 @@ def create_table(data: List[Tuple[str, str]], user_repo_name: str) -> str: return "\n".join(formatted_lines) -def generate_code_summary_table(base_url: str, directory: Path, level=0) -> str: +def build_recursive_tables(base_url: str, directory: Path, placeholder) -> str: """Creates a Markdown table structure for the given directory.""" markdown = "" markdown += "| File | Summary |\n" @@ -211,17 +217,14 @@ def generate_code_summary_table(base_url: str, directory: Path, level=0) -> str: for item in sorted(directory.iterdir()): if item.is_file(): - relative_path = os.path.relpath(item, start=directory) - url_path = urllib.parse.quote(relative_path) - full_url = urllib.parse.urljoin(base_url, url_path) - markdown += f"| [{item.name}]({full_url}) | Summary of {item.name} |\n" + markdown += f"| [{item.name}]({item.name}) | {placeholder} |\n" for item in sorted(directory.iterdir()): if item.is_dir(): # If it is a sub-directory, create a collapsible section markdown += f"\n
{item.name}\n\n" # Recursive call for sub-directory - markdown += generate_code_summary_table(base_url, item, level + 1) + markdown += build_recursive_tables(base_url, item, placeholder) # Close the collapsible section markdown += "\n
\n\n" diff --git a/readmeai/conf/conf.toml b/readmeai/conf/conf.toml index 3fe44da2..2106dd44 100644 --- a/readmeai/conf/conf.toml +++ b/readmeai/conf/conf.toml @@ -26,7 +26,7 @@ readme = "readme-ai.md" # Prompts [prompts] code_summary = """Offer a comprehensive summary that encapsulates the core functionalities of the code: -\n{}\n Aim for precision and conciseness in your explanation, ensuring a fine balance between detail and brevity. +\nPath: {0}\nContents:\n{1}\n Aim for precision and conciseness in your explanation, ensuring a fine balance between detail and brevity. Limit your response to a maximum of 350 characters (including spaces). """ features = """Hello! Analyze the Git codebase {} and create a robust summary of the project's features. @@ -59,7 +59,7 @@ slogan = "Conceptualize a catchy and memorable slogan for the GitHub project: {} # Markdown Template Code [md] tables = "" -default = "▶︎ INSERT-DESCRIPTION" +default = "► INSERT-TEXT" dropdown = """
{}\n\n{}\n\n
\n""" header = """

diff --git a/readmeai/conf/ignore_files.toml b/readmeai/conf/ignore_files.toml index aef65d59..b4d34fdb 100644 --- a/readmeai/conf/ignore_files.toml +++ b/readmeai/conf/ignore_files.toml @@ -102,6 +102,7 @@ extensions = [ ".sample", ] files = [ + ".git", "AUTHORS", "CHANGELOG", "init.py", @@ -144,4 +145,5 @@ files = [ ".whitesource", ".prettierrc", ".pre-commit-config.yaml", + "poetry.lock", ] diff --git a/readmeai/main.py b/readmeai/main.py index 0d723620..9fdfabcd 100755 --- a/readmeai/main.py +++ b/readmeai/main.py @@ -19,17 +19,18 @@ config_helper = conf.load_config_helper(config_model) -async def main(repository: str, offline: bool) -> None: +async def main(repository: str) -> None: """Main entrypoint for the readme-ai application.""" config.git = conf.GitConfig(repository=repository) llm = model.OpenAIHandler(config) - await generate_readme(llm, offline) + await generate_readme(llm) -async def generate_readme(llm: model.OpenAIHandler, offline: bool) -> None: +async def generate_readme(llm: model.OpenAIHandler) -> None: """Orchestrates the README file generation process.""" name = config.git.name repository = config.git.repository + placeholder = config.md.default try: temp_dir = utils.clone_repo_to_temp_dir(repository) @@ -43,25 +44,26 @@ async def generate_readme(llm: model.OpenAIHandler, offline: bool) -> None: logger.info(f"Dependencies: {dependencies}") logger.info(f"Total files: {len(file_text)}") - if offline: - logger.warning("Skipping OpenAI API calls as offline mode is enabled.") - config.md.tables = builder.generate_code_summary_table(repository, temp_dir) - code_summary = config.md.tables - slogan, overview, features = ( - config.md.default, - config.md.default, - config.md.default, - ) - else: + if config.api.offline_mode is False: code_summary = await generate_code_to_text(llm, file_text) slogan, overview, features = await generate_markdown_text( llm, repository, code_summary ) await llm.close() + else: + config.md.tables = builder.build_recursive_tables( + repository, temp_dir, placeholder + ) + code_summary = placeholder + slogan, overview, features = ( + config.md.default, + config.md.default, + config.md.default, + ) config.md.header = config.md.header.format(name, slogan) config.md.intro = config.md.intro.format(overview, features) - builder.build_markdown_file(config, config_helper, dependencies, code_summary) + builder.build_readme_file(config, config_helper, dependencies, code_summary) except Exception as excinfo: logger.error(f"Exception: {excinfo}") @@ -99,6 +101,12 @@ async def generate_markdown_text( default=os.environ.get("OPENAI_API_KEY", None), help="OpenAI API secret key.", ) +@click.option( + "-c", + "--encoding", + default="cl100k_base", + help="Encodings specify how text is converted into tokens.", +) @click.option( "-e", "--engine", @@ -142,6 +150,7 @@ async def generate_markdown_text( ) def cli( api_key: str, + encoding: Optional[str], engine: Optional[str], offline_mode: bool, output: Optional[str], @@ -159,13 +168,12 @@ def cli( logger.info("README-AI is now executing.") logger.info(f"Output file: {config.paths.readme}") logger.info(f"OpenAI Engine: {config.api.engine}") - logger.info(f"OpenAI Temperature: {config.api.temperature}") - if not api_key: - logger.error("API key not found, running in offline mode...") + if not api_key and not offline_mode: offline_mode = True + config.api.offline_mode = offline_mode - asyncio.run(main(repository, offline_mode)) + asyncio.run(main(repository)) logger.info("README-AI execution complete.") diff --git a/readmeai/model.py b/readmeai/model.py index 41dcbc8b..81ba70e9 100644 --- a/readmeai/model.py +++ b/readmeai/model.py @@ -75,7 +75,7 @@ async def code_to_text( self.logger.warning(f"Ignoring file: {path}") continue - prompt_code = prompt.format(contents) + prompt_code = prompt.format(str(path), contents) prompt_length = len(prompt_code.split()) if prompt_length > self.tokens_max: exc = f"Prompt exceeds max token limit: {prompt_length}." @@ -171,7 +171,7 @@ async def generate_text( "messages": [ { "role": "system", - "content": "You're a brilliant Tech Lead.", + "content": "You're a lead AI researcher and distributed systems engineer.", }, {"role": "user", "content": prompt}, ], diff --git a/readmeai/utils.py b/readmeai/utils.py index 1d578592..88fd087b 100644 --- a/readmeai/utils.py +++ b/readmeai/utils.py @@ -28,11 +28,10 @@ def clone_repo_to_temp_dir(repo_path: str) -> Path: try: git.Repo.clone_from(repo_path, temp_dir, env=env) git_dir = Path(temp_dir) / ".git" + if git_dir.exists(): shutil.rmtree(git_dir) - logger.info(f"Cloned codebase {repo_path} to {temp_dir}.") - return Path(temp_dir) except git.GitCommandError as excinfo: