diff --git a/.copier-answers.yml b/.copier-answers.yml new file mode 100644 index 0000000..85076bc --- /dev/null +++ b/.copier-answers.yml @@ -0,0 +1,4 @@ +# Changes here will be overwritten by Copier +_commit: f9bd774 +_src_path: git@github.com:la-famiglia-jst2324/parma-mining-template.git +module_name: reddit diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..1edbed7 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,3 @@ +.conda_lock_files/** linguist-generated=true +* text=auto +*.{py,yaml,yml,sh} text eol=lf diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..e8c6373 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @robinholzi diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..63ee231 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,18 @@ +# Motivation + + + +# Changes + + + +# Checklist + +- [ ] added myself as assignee +- [ ] correct reviewers +- [ ] descriptive PR title using [conventional commits](https://www.conventionalcommits.org/en/v1.0.0/). +- [ ] description explains the motivation and details of the changes +- [ ] tests cover my changes +- [ ] documentation is updated +- [ ] CI is green +- [ ] breaking changes are discussed with the team and documented in the PR title `!` (e.g. `feat!: Update endpoint`) diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..0ce46da --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,8 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" + reviewers: + - "robinholzi" diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml new file mode 100644 index 0000000..d426e8e --- /dev/null +++ b/.github/release-drafter.yml @@ -0,0 +1,98 @@ +# PULL REQUEST LABELS +autolabeler: + # Conventional Commit Types (https://github.com/commitizen/conventional-commit-types) + - label: build + title: + - '/^build(\(.*\))?(\!)?\:/' + - label: chore + title: + - '/^chore(\(.*\))?(\!)?\:/' + - label: ci + title: + - '/^ci(\(.*\))?(\!)?\:/' + - label: documentation + title: + - '/^docs(\(.*\))?(\!)?\:/' + - label: enhancement + title: + - '/^feat(\(.*\))?(\!)?\:/' + - label: fix + title: + - '/^fix(\(.*\))?(\!)?\:/' + - label: performance + title: + - '/^perf(\(.*\))?(\!)?\:/' + - label: refactor + title: + - '/^refactor(\(.*\))?(\!)?\:/' + - label: revert + title: + - '/^revert(\(.*\))?(\!)?\:/' + - label: style + title: + - '/^style(\(.*\))?(\!)?\:/' + - label: test + title: + - '/^test(\(.*\))?(\!)?\:/' + # Custom Types + - label: breaking + title: + - '/^[a-z]+(\(.*\))?\!\:/' +# >>> AUTOMATIC VERSIONING (PRE 1.0) +version-resolver: + minor: + labels: + - breaking + default: patch +# >>> RELEASE CONFIGURATION +name-template: "v$RESOLVED_VERSION" +tag-template: "v$RESOLVED_VERSION" +category-template: "### $TITLE" +change-template: "- $TITLE by @$AUTHOR in [#$NUMBER]($URL)" +replacers: + # remove conventional commit tag & scope from change list + - search: '/- [a-z]+(\(.*\))?(\!)?\: /g' + replace: "- " +template: | + ## What's Changed + + $CHANGES + + **Full Changelog:** [`$PREVIOUS_TAG...v$RESOLVED_VERSION`](https://github.com/$OWNER/$REPOSITORY/compare/$PREVIOUS_TAG...v$RESOLVED_VERSION) +categories: + - title: ⚠️ Breaking Changes + labels: + - breaking + - title: ✨ New Features + labels: + - enhancement + - title: 🐞 Bug Fixes + labels: + - fix + - title: 🏎️ Performance Improvements + labels: + - performance + - title: 📚 Documentation + labels: + - documentation + - title: 🏗️ Testing + labels: + - test + - title: ⚙️ Automation + labels: + - ci + - title: 🛠 Builds + labels: + - build + - title: 💎 Code Style + labels: + - style + - title: 📦 Refactorings + labels: + - refactor + - title: ♻️ Chores + labels: + - chore + - title: 🗑 Reverts + labels: + - revert diff --git a/.github/workflows/chore.yml b/.github/workflows/chore.yml new file mode 100644 index 0000000..e89059e --- /dev/null +++ b/.github/workflows/chore.yml @@ -0,0 +1,66 @@ +name: Chore +on: + pull_request: + branches: [main] + types: [opened, reopened, edited, synchronize] + push: + branches: [main] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + check-pr-title: + name: PR title validation + if: github.event_name == 'pull_request' + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + steps: + - name: validate conventional commit message + id: lint + uses: amannn/action-semantic-pull-request@v5 + with: + subjectPattern: ^[A-Z].+[^. ]$ # subject must start with uppercase letter and may not end with a dot/space + env: + GITHUB_TOKEN: ${{ github.token }} + - name: post comment about invalid PR title + if: failure() + uses: marocchino/sticky-pull-request-comment@v2 + with: + header: conventional-commit-pr-title + message: | + Thanks for contributing to ParmaAI! 👋🏼 + + Please use PR titles according to [Conventional Commits specification](https://www.conventionalcommits.org/en/v1.0.0/) and it looks like your proposed title needs to be adjusted. + +
Details + + ``` + ${{ steps.lint.outputs.error_message }} + ``` + +
+ - name: delete comment about invalid PR title + if: success() + uses: marocchino/sticky-pull-request-comment@v2 + with: + header: conventional-commit-pr-title + delete: true + + release-drafter: + name: ${{ github.event_name == 'pull_request' && 'Assign Labels' || 'Draft Release' }} + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + steps: + - name: ${{ github.event_name == 'pull_request' && 'Assign labels' || 'Update release draft' }} + uses: release-drafter/release-drafter@v5 + with: + disable-releaser: ${{ github.event_name == 'pull_request' }} + disable-autolabeler: ${{ github.event_name == 'push' }} + env: + GITHUB_TOKEN: ${{ github.token }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..0541236 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,59 @@ +name: CI +permissions: write-all +on: + pull_request: + push: + branches: [main] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +defaults: + run: + shell: bash -el {0} + +jobs: + pre-commit-checks: + name: Pre-commit Checks + runs-on: ubuntu-latest + timeout-minutes: 30 + env: + PRE_COMMIT_USE_MICROMAMBA: 1 + steps: + - uses: actions/checkout@v4 + - name: micromamba installation + uses: mamba-org/setup-micromamba@da9b4fa3cd810fa222215d6062647d3e717e7662 + - name: add micromamba to GITHUB_PATH + run: echo "${HOME}/micromamba-bin" >> "$GITHUB_PATH" + - name: Install Python 3.11 + uses: actions/setup-python@v4 + with: + python-version: "3.11" + - name: Run pre-commit checks + uses: pre-commit/action@v3.0.0 + env: + PRE_COMMIT_USE_MICROMAMBA: 1 + + ci: + name: Testing CI + runs-on: ubuntu-latest + timeout-minutes: 30 + # TODO: include services database container once needed + steps: + - uses: actions/checkout@v4 + - name: conda env setup + uses: mamba-org/setup-micromamba@da9b4fa3cd810fa222215d6062647d3e717e7662 + with: + environment-file: environment.yml + cache-environment: true + - name: Install package + run: python -m pip install --no-build-isolation --no-deps --disable-pip-version-check -e . + - name: Run unit tests + run: python -m pytest --cov-report=xml + - name: Get Cover + uses: orgoro/coverage@v3.1 + if: github.event_name == 'pull_request' + with: + coverageFile: ./coverage.xml + token: ${{ github.token }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..c8b3cce --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,20 @@ +name: Deploy +on: + release: + types: [published] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +defaults: + run: + shell: bash -el {0} + +jobs: + prod-deployment: + name: Publish release + runs-on: ubuntu-latest + steps: + - name: dummy + run: echo "TODO" diff --git a/.github/workflows/tag-major.yml b/.github/workflows/tag-major.yml new file mode 100644 index 0000000..e5987cc --- /dev/null +++ b/.github/workflows/tag-major.yml @@ -0,0 +1,47 @@ +name: Major Tag +on: + release: + types: [published] + +permissions: + contents: write + +jobs: + create-major-tag: + name: major tag creation + runs-on: ubuntu-latest + steps: + - name: Get major version + id: version + run: | + MAJOR=$(echo ${{ github.ref_name }} | cut -d'.' -f1 | cut -c2-) + echo "major=$MAJOR" >> $GITHUB_OUTPUT + - name: Delete existing tag + continue-on-error: true + uses: actions/github-script@v6 + with: + script: | + await github.rest.git.deleteRef({ + owner: context.repo.owner, + repo: context.repo.repo, + ref: `tags/v${{ steps.version.outputs.major }}`, + }); + - name: Create tag + uses: actions/github-script@v6 + with: + script: | + const newTag = "v${{ steps.version.outputs.major }}"; + const createdTag = await github.rest.git.createTag({ + owner: context.repo.owner, + repo: context.repo.repo, + tag: newTag, + message: "Moving pointer to ${{ github.ref_name }}", + object: "${{ github.sha }}", + type: "commit", + }); + await github.rest.git.createRef({ + owner: context.repo.owner, + repo: context.repo.repo, + ref: `refs/tags/${newTag}`, + sha: createdTag.data.sha + }); diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7feb95b --- /dev/null +++ b/.gitignore @@ -0,0 +1,576 @@ +# Created by https://www.toptal.com/developers/gitignore/api/vim,node,linux,macos,windows,pycharm+all,webstorm+all,visualstudiocode,direnv,python +# Edit at https://www.toptal.com/developers/gitignore?templates=vim,node,linux,macos,windows,pycharm+all,webstorm+all,visualstudiocode,direnv,python + +### direnv ### +.direnv +.envrc + +### Linux ### +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### macOS Patch ### +# iCloud generated files +*.icloud + +### Node ### +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +lerna-debug.log* +.pnpm-debug.log* + +# Diagnostic reports (https://nodejs.org/api/report.html) +report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json + +# Runtime data +pids +*.pid +*.seed +*.pid.lock + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage +*.lcov + +# nyc test coverage +.nyc_output + +# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# Bower dependency directory (https://bower.io/) +bower_components + +# node-waf configuration +.lock-wscript + +# Compiled binary addons (https://nodejs.org/api/addons.html) +build/Release + +# Dependency directories +node_modules/ +jspm_packages/ + +# Snowpack dependency directory (https://snowpack.dev/) +web_modules/ + +# TypeScript cache +*.tsbuildinfo + +# Optional npm cache directory +.npm + +# Optional eslint cache +.eslintcache + +# Optional stylelint cache +.stylelintcache + +# Microbundle cache +.rpt2_cache/ +.rts2_cache_cjs/ +.rts2_cache_es/ +.rts2_cache_umd/ + +# Optional REPL history +.node_repl_history + +# Output of 'npm pack' +*.tgz + +# Yarn Integrity file +.yarn-integrity + +# dotenv environment variable files +.env +.env.development.local +.env.test.local +.env.production.local +.env.local + +# parcel-bundler cache (https://parceljs.org/) +.cache +.parcel-cache + +# Next.js build output +.next +out + +# Nuxt.js build / generate output +.nuxt +dist + +# Gatsby files +.cache/ +# Comment in the public line in if your project uses Gatsby and not Next.js +# https://nextjs.org/blog/next-9-1#public-directory-support +# public + +# vuepress build output +.vuepress/dist + +# vuepress v2.x temp and cache directory +.temp + +# Docusaurus cache and generated files +.docusaurus + +# Serverless directories +.serverless/ + +# FuseBox cache +.fusebox/ + +# DynamoDB Local files +.dynamodb/ + +# TernJS port file +.tern-port + +# Stores VSCode versions used for testing VSCode extensions +.vscode-test + +# yarn v2 +.yarn/cache +.yarn/unplugged +.yarn/build-state.yml +.yarn/install-state.gz +.pnp.* + +### Node Patch ### +# Serverless Webpack directories +.webpack/ + +# Optional stylelint cache + +# SvelteKit build / generate output +.svelte-kit + +### PyCharm+all ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# SonarLint plugin +.idea/sonarlint/ + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### PyCharm+all Patch ### +# Ignore everything but code style settings and run configurations +# that are supposed to be shared within teams. + +.idea/* + +!.idea/codeStyles +!.idea/runConfigurations + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### Python Patch ### +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + +# ruff +.ruff_cache/ + +# LSP config files +pyrightconfig.json + +### Vim ### +# Swap +[._]*.s[a-v][a-z] +!*.svg # comment out if you don't need vector files +[._]*.sw[a-p] +[._]s[a-rt-v][a-z] +[._]ss[a-gi-z] +[._]sw[a-p] + +# Session +Session.vim +Sessionx.vim + +# Temporary +.netrwhist +# Auto-generated tag files +tags +# Persistent undo +[._]*.un~ + +### VisualStudioCode ### +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +!.vscode/*.code-snippets + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history +.ionide + +### WebStorm+all ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff + +# AWS User-specific + +# Generated files + +# Sensitive or high-churn files + +# Gradle + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake + +# Mongo Explorer plugin + +# File-based project format + +# IntelliJ + +# mpeltonen/sbt-idea plugin + +# JIRA plugin + +# Cursive Clojure plugin + +# SonarLint plugin + +# Crashlytics plugin (for Android Studio and IntelliJ) + +# Editor-based Rest Client + +# Android studio 3.1+ serialized cache file + +### WebStorm+all Patch ### +# Ignore everything but code style settings and run configurations +# that are supposed to be shared within teams. + + + +### Windows ### +# Windows thumbnail cache files +Thumbs.db +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msix +*.msm +*.msp + +# Windows shortcuts +*.lnk + +# End of https://www.toptal.com/developers/gitignore/api/vim,node,linux,macos,windows,pycharm+all,webstorm+all,visualstudiocode,direnv,python + +# [CUSTOMIZATIONS] +.data diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..e171243 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,45 @@ +ci: + autoupdate_commit_msg: "chore: Update pre-commit hooks" +exclude: ^(\.conda_lock_files/) +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: "v0.1.4" + hooks: + - id: ruff-format + - repo: https://github.com/psf/black-pre-commit-mirror + rev: 23.10.1 + hooks: + - id: black + language_version: python3.11 + args: + - --safe + - --target-version=py311 + - repo: https://github.com/pre-commit/mirrors-mypy + rev: "v1.5.1" + hooks: + - id: mypy + additional_dependencies: + [types-setuptools, types-pyyaml, sqlalchemy-stubs] + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v3.0.3" + hooks: + - id: prettier + files: "\\.(md|json|yml|yaml)$" + - repo: https://github.com/myint/docformatter + rev: v1.7.5 + hooks: + - id: docformatter + args: [--in-place, --black] + - repo: https://github.com/sqlfluff/sqlfluff + rev: 2.3.5 + hooks: + - id: sqlfluff-fix + - repo: https://github.com/crate-ci/typos + rev: v1.16.22 + hooks: + - id: typos + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f08e3fb --- /dev/null +++ b/Makefile @@ -0,0 +1,29 @@ +.PHONY: prerequisites install dev test purge-db purge + +# This Makefile should provide you with a simple way to get your dev +# environment up and running. It will install all the dependencies +# needed to run the project, and then run the project. + +prerequisites: + # Make sure to have micromamba installed - a fast conda/mamba implementation with very low overhead. + # This will allow you to create a new environment with all the dependencies needed for this project. + # Conda environments also contain dedicated python interpreters that won't mess up your local python installation." + +install: + pre-commit install + micromamba create -f environment.yml # Create a new environment + # execute the following two steps manually + # micromamba activate parma-mining-reddit # Activate the new environment + # pip install -e . # Install the project in editable mode + +dev: + uvicorn parma_mining.reddit.api:app --reload + +test: + pytest tests/ + +purge-db: + # TODO + +purge: purge-db + rm -rf .mypy_cache .pytest_cache .coverage .eggs diff --git a/README.md b/README.md new file mode 100644 index 0000000..ee1b4a9 --- /dev/null +++ b/README.md @@ -0,0 +1,109 @@ +# parma-mining-reddit + +[![Chore](https://github.com/la-famiglia-jst2324/parma-mining-reddit/actions/workflows/chore.yml/badge.svg?branch=main)](https://github.com/la-famiglia-jst2324/parma-mining-reddit/actions/workflows/chore.yml) +[![CI](https://github.com/la-famiglia-jst2324/parma-mining-reddit/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/la-famiglia-jst2324/parma-mining-reddit/actions/workflows/ci.yml) +[![Deploy](https://github.com/la-famiglia-jst2324/parma-mining-reddit/actions/workflows/release.yml/badge.svg)](https://github.com/la-famiglia-jst2324/parma-mining-reddit/actions/workflows/release.yml) +[![Major Tag](https://github.com/la-famiglia-jst2324/parma-mining-reddit/actions/workflows/tag-major.yml/badge.svg)](https://github.com/la-famiglia-jst2324/parma-mining-reddit/actions/workflows/tag-major.yml) + +ParmaAI mining module for the reddit CRM. + +## Getting Started + +The following steps will get you started with the project. + +1. Pre-requisites: to be able to contribute to JST in this repository, make sure to comply with the following prerequisites. + + - Configure GitHub via an ssh key. Key based authenticated is highly encouraged. See [GitHub Docs](https://docs.github.com/en/github/authenticating-to-github/connecting-to-github-with-ssh) for more information. + - Please make sure to have an GPG key configured for GitHub. See [GitHub Docs](https://docs.github.com/en/authentication/managing-commit-signature-verification/adding-a-gpg-key-to-your-github-account) for more information. + - Install **micromamba**, a conda environment management package manager, as described [here](https://mamba.readthedocs.io/en/latest/micromamba-installation.html). Alternatively conda or mamba installations should also work, but are highly discouraged because of their slow performance. + +2. **Clone the repository** + + ```bash + git@github.com:la-famiglia-jst2324/parma-mining-reddit.git + ``` + +3. **Precommit & environment setup**: + + ```bash + make install # execute the last 2 steps manually! + ``` + +4. **Start the api server**: + + ```bash + make dev + ``` + + **Open [http://localhost:8000](http://localhost:8000) with your browser to see the result.** + + FastApi will provide you with an interactive documentation of the api. You can also use the swagger ui at [http://localhost:8000/docs](http://localhost:8000/docs) or the redoc ui at [http://localhost:8000/redoc](http://localhost:8000/redoc). + +5. Optional: Running the pre-commit pipeline manually + + ```bash + pre-commit run --all + ``` + +6. Test your code: + + ```bash + make test + ``` + +## PR workflow + +1. **Create a new branch** + [linear.app](linear.app) offers a button to copy branch names from tickets. + In case there is no ticket, please use feel free to use an arbitrary name or create a ticket. + GitHub CI doesn't care about the branch name, only the PR title matters. + + ```bash + # format: e.g. robinholzingr/meta-1-create-archtecture-drafts-diagrams-list-of-key-priorities + git checkout -b + ``` + +2. Open a PR and use a [conventional commit](https://www.conventionalcommits.org/en/v1.0.0/) PR title. + +3. Wait for CI pipeline to pass and if you are happy with your changes request a review. + +4. Merge the PR (using the "Squash and merge" option) and delete the branch. + Pay attention to include co-authors if anyone else contributed to the PR. + +5. If you want to release a new version to production, create a new release on GitHub. + The release version will be automatically derived from the PR titles + (breaking changes yield new major versions, new features yield new minor versions). + +### Directory structure + +```bash +. +├── parma_mining.reddit: Main sourcing code +│ └── api: FastAPI REST API +├─ tests: Tests for mining module +├── Makefile: Recipes for easy simplified setup and local development +├── README.md +├── docker-compose.yml: Docker compose file for local database +├── environment.yml: conda environment file +└── pyproject.toml: Python project configuration file +``` + +## Tech Stack + +Core libraries that this project uses: + +- [FastAPI](https://fastapi.tiangolo.com/): FastAPI is a modern, fast (high-performance), web framework for building APIs with Python 3.6+ based on standard Python type hints. +- [Pydantic](https://pydantic-docs.helpmanual.io/): Data validation and settings management using python type annotations. +- [Typer](https://typer.tiangolo.com/): Typer is a library for building CLI applications that users will love using and developers will love creating. +- [Polars](https://pola.rs): Polars is a blazingly fast data processing library written in Rust. It has a DataFrame API that is similar to Pandas and a Series API that is similar to NumPy. +- [Pytest](https://docs.pytest.org/en/6.2.x/): The pytest framework makes it easy to write small tests, yet scales to support complex functional testing for applications and libraries. + +## Deployment + +No deployment pipeline has been set up yet. + +Currently we are considering several backend frameworks like `Firebase`, `Supabase` or `AWS Amplify`. + +## Disclaimer + +In case there are any issues with the initial setup or important architectural decisions/integrations missing, please contact the meta team or @robinholzi directly. diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..2d22152 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,4 @@ +# version: "3.9" +# services: +# parma-mining-db: +# TODO: add a mongo.db instance if needed diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..33707e0 --- /dev/null +++ b/environment.yml @@ -0,0 +1,24 @@ +name: parma-mining-reddit +channels: + - conda-forge + - nodefaults +dependencies: + # Git + - pre-commit + # Python + - pip + - python =3.11 + - setuptools-scm + - setuptools >=61 # pyproject.toml support + # Development + - types-pyyaml + # Testing + - pytest >=6 # --import-mode option + - pytest-cov + # Dependencies (core) + - fastapi >=0.104.0 + - polars >=0.19.0 + - pydantic >=2 + - pyyaml + - typer >=0.9.0 + - uvicorn >=0.23.2 \ No newline at end of file diff --git a/parma_mining/__init__.py b/parma_mining/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/parma_mining/reddit/__init__.py b/parma_mining/reddit/__init__.py new file mode 100644 index 0000000..2f1c26f --- /dev/null +++ b/parma_mining/reddit/__init__.py @@ -0,0 +1,8 @@ +"""reddit data sourcing package.""" + +import importlib.metadata + +try: + __version__ = importlib.metadata.version(__name__) +except Exception: + __version__ = "unknown" diff --git a/parma_mining/reddit/api/__init__.py b/parma_mining/reddit/api/__init__.py new file mode 100644 index 0000000..5f7817a --- /dev/null +++ b/parma_mining/reddit/api/__init__.py @@ -0,0 +1,3 @@ +from .main import app # noqa + +__all__ = ["app"] diff --git a/parma_mining/reddit/api/main.py b/parma_mining/reddit/api/main.py new file mode 100644 index 0000000..b013962 --- /dev/null +++ b/parma_mining/reddit/api/main.py @@ -0,0 +1,12 @@ +"""Main entrypoint for the API routes in of parma-analytics.""" + +from fastapi import FastAPI + +app = FastAPI() + + +# root endpoint +@app.get("/", status_code=200) +def root(): + """Root endpoint for the API.""" + return {"welcome": "at parma-mining-reddit"} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..2658473 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,59 @@ +[build-system] +requires = ["setuptools", "setuptools-scm", "wheel"] + +[tool.setuptools_scm] +version_scheme = "post-release" + +[project] +name = "parma-mining-reddit" +description = "" +readme = "README.md" +dynamic = ["version"] +authors = [ + {name = "Robin Holzinger", email = "robin.holzinger@tum.de"}, +] +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.11", +] +requires-python = ">=3.11" + +[project.urls] +repository = "https://github.com/la-famiglia-jst2324/parma-mining-reddit" + +[tool.setuptools.packages.find] +include = ["parma_mining.*"] +namespaces = false + +[project.scripts] + +[tool.black] +exclude = ''' +/( + \.eggs + | \.git + | \.venv + | build + | dist +)/ +''' + +[tool.ruff] +line-length = 88 +select = ["F", "E", "W", "I", "N", "UP", "D", "PL"] +target-version = "py311" + +[tool.mypy] +python_version = '3.11' +ignore_missing_imports = true +no_implicit_optional = true +check_untyped_defs = true + +[tool.ruff.pydocstyle] +convention = "google" + +[tool.docformatter] +black = true + +[tool.pytest.ini_options] +addopts = "--import-mode=importlib --cov='parma_mining' --cov-report xml --color=yes" diff --git a/tests/test_dummy.py b/tests/test_dummy.py new file mode 100644 index 0000000..e444cb1 --- /dev/null +++ b/tests/test_dummy.py @@ -0,0 +1,9 @@ +import pytest + +from parma_mining.reddit import __version__ + + +@pytest.mark.parametrize("arg", [True, False]) +def test_dummy(arg: bool): + assert arg or not arg + assert len(__version__) > 0