-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
514 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
__pycache__ | ||
*.pyc | ||
*.pyo | ||
*.pyd | ||
.Python | ||
env/ | ||
venv/ | ||
.env | ||
*.log | ||
.git | ||
.gitignore | ||
.pytest_cache/ | ||
.coverage | ||
htmlcov/ | ||
.DS_Store | ||
chrome5/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
name: build docker & deploy | ||
|
||
on: | ||
push: | ||
branches: | ||
- main | ||
pull_request: | ||
workflow_dispatch: | ||
|
||
jobs: | ||
build: | ||
runs-on: ubuntu-latest | ||
permissions: | ||
packages: write | ||
contents: read | ||
outputs: | ||
IMAGE_ID: ${{ steps.prepare.outputs.IMAGE_ID }} | ||
steps: | ||
- uses: actions/checkout@v4 | ||
|
||
- name: Set up Docker Buildx | ||
uses: docker/setup-buildx-action@v3 | ||
|
||
- name: Log in to registry | ||
uses: docker/login-action@v3 | ||
with: | ||
registry: ghcr.io | ||
username: ${{ github.actor }} | ||
password: ${{ secrets.GITHUB_TOKEN }} | ||
|
||
- name: Prepare | ||
id: prepare | ||
run: | | ||
IMAGE_ID=ghcr.io/${{ github.repository }} | ||
# Change all uppercase to lowercase | ||
IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]') | ||
VERSION="${{ github.ref_name }}" | ||
# Strip slash from tag name | ||
[[ "${{ github.ref_type }}" == "branch" ]] && VERSION=$(echo $VERSION | sed 's/\//_/') | ||
# Strip "v" prefix from tag name | ||
[[ "${{ github.ref_type }}" == "tag" ]] && VERSION=$(echo $VERSION | sed -e 's/^v//') | ||
# Use Docker `latest` tag convention | ||
[ "$VERSION" == "main" ] && VERSION=latest | ||
echo "VERSION=$VERSION" >> $GITHUB_OUTPUT | ||
echo "IMAGE_ID=$IMAGE_ID" >> $GITHUB_OUTPUT | ||
- name: Build and push | ||
uses: docker/build-push-action@v6 | ||
with: | ||
context: . | ||
push: true | ||
tags: | | ||
${{ steps.prepare.outputs.IMAGE_ID }}:${{ steps.prepare.outputs.VERSION }} | ||
${{ steps.prepare.outputs.IMAGE_ID }}:${{ github.run_id }} | ||
cache-from: type=gha | ||
cache-to: type=gha,mode=max |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
# Python | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
*.so | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
|
||
# Virtual Environment | ||
venv/ | ||
env/ | ||
ENV/ | ||
|
||
# IDE | ||
.idea/ | ||
.vscode/ | ||
*.swp | ||
*.swo | ||
.DS_Store | ||
|
||
# Project specific | ||
chrome5/ | ||
*.tflite | ||
*.pb | ||
*.pb.gz | ||
*.tsv | ||
*.json | ||
|
||
# Logs | ||
*.log | ||
logs/ | ||
log/ | ||
|
||
# Test coverage | ||
.coverage | ||
htmlcov/ | ||
.pytest_cache/ | ||
.tox/ | ||
coverage.xml | ||
*.cover |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
FROM python:3.8-slim as downloader | ||
|
||
RUN apt-get update && apt-get install -y curl | ||
|
||
WORKDIR /model | ||
|
||
COPY download_model.sh . | ||
RUN chmod +x download_model.sh && ./download_model.sh | ||
|
||
FROM python:3.8-slim as builder | ||
|
||
WORKDIR /install | ||
|
||
RUN apt-get update && apt-get install -y \ | ||
build-essential \ | ||
python3-dev | ||
|
||
COPY requirements.txt . | ||
RUN pip3 install --prefix=/install -r requirements.txt && \ | ||
pip3 install --prefix=/install --extra-index-url https://google-coral.github.io/py-repo/ tflite_runtime | ||
|
||
FROM python:3.8-slim | ||
|
||
WORKDIR /app | ||
|
||
RUN apt-get update && apt-get install -y \ | ||
libusb-1.0-0 \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
COPY --from=downloader /model/chrome5 ./chrome5 | ||
COPY --from=builder /install /usr/local | ||
|
||
COPY src/ ./src/ | ||
|
||
ENV PYTHONPATH=/app | ||
EXPOSE 8000 | ||
|
||
CMD ["uvicorn", "src.app:app", "--host", "0.0.0.0", "--port", "8000"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
# URL Topic Classifier API | ||
|
||
A service that classifies domains into topics using Google Chrome's Topics API model. | ||
|
||
## Overview | ||
|
||
This service provides a REST API endpoint that accepts domains and returns their classified topics using the Chrome Topics API classifier model. It uses the same model and classification logic as Chrome's Topics API. | ||
|
||
This project is based on the [Chrome Topics Classifier](https://github.com/yohhaan/topics_classifier) repository, which provides Python examples for using Chrome's Topics API model. | ||
|
||
The Topics API is part of Chrome's Privacy Sandbox initiative, designed to help serve relevant ads without third-party cookies. You can learn more about it in the [Chrome Topics API documentation](https://developer.chrome.com/docs/privacy-sandbox/topics/). | ||
|
||
## Quick Start with Docker | ||
|
||
1. Build and run with Docker: | ||
```bash | ||
docker build -t url-classifier . | ||
docker run -p 8000:8000 url-classifier | ||
``` | ||
|
||
The API will be available at `http://localhost:8000` | ||
|
||
## Manual Setup | ||
|
||
_Note: It seems to work only with Python 3.8._ | ||
|
||
1. Create a virtual environment and activate it: | ||
```bash | ||
python -m venv venv | ||
source venv/bin/activate | ||
``` | ||
|
||
2. Install dependencies: | ||
```bash | ||
pip install -r requirements.txt | ||
pip3 install --extra-index-url https://google-coral.github.io/py-repo/ tflite_runtime | ||
``` | ||
|
||
3. Download the model files: | ||
```bash | ||
chmod +x scripts/download_model.sh | ||
./scripts/download_model.sh | ||
``` | ||
|
||
4. Run the application: | ||
```bash | ||
uvicorn src.app:app --reload | ||
``` | ||
|
||
## API Usage | ||
|
||
### Classify Domain | ||
|
||
**Endpoint:** `POST /classify` | ||
|
||
**Request:** | ||
```json | ||
{ | ||
"domain": "example.com" | ||
} | ||
``` | ||
|
||
**Response:** | ||
```json | ||
{ | ||
"domain": "example.com", | ||
"topics": [ | ||
{ | ||
"id": 123, | ||
"name": "Topic Name" | ||
} | ||
] | ||
} | ||
``` | ||
|
||
**cURL Example:** | ||
```bash | ||
curl -X POST "http://localhost:8000/classify" \ | ||
-H "Content-Type: application/json" \ | ||
-d '{"domain": "example.com"}' | ||
``` | ||
|
||
### API Documentation | ||
|
||
Once the server is running, you can access: | ||
- Swagger UI documentation: `http://localhost:8000/docs` | ||
- ReDoc documentation: `http://localhost:8000/redoc` | ||
|
||
## License | ||
|
||
MIT License | ||
|
||
## Credits | ||
|
||
This project uses the Topics API model from Google Chrome's Topics API implementation. The model and classification logic are based on the [Chrome Topics Classifier](https://github.com/yohhaan/topics_classifier) repository. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
#!/bin/sh | ||
|
||
mkdir -p chrome5 | ||
|
||
REPO_URL="https://raw.githubusercontent.com/yohhaan/topics_classifier/main" | ||
|
||
download_file() { | ||
echo "Downloading $1..." | ||
output=$(curl -L "$REPO_URL/$1" --create-dirs -o "$1" 2>&1) | ||
|
||
if [ $? -eq 0 ]; then | ||
echo "✓ Successfully downloaded $1" | ||
return 0 | ||
else | ||
echo "✗ Error downloading $1" | ||
echo "Curl output:" | ||
echo "$output" | ||
return 1 | ||
fi | ||
} | ||
|
||
for file in \ | ||
"chrome5/config.json" \ | ||
"chrome5/model-info.pb" \ | ||
"chrome5/model.tflite" \ | ||
"chrome5/override_list.pb.gz" \ | ||
"chrome5/override_list.tsv" \ | ||
"chrome5/taxonomy.tsv" \ | ||
"chrome5/utility_buckets.tsv" | ||
do | ||
download_file "$file" || exit 1 | ||
done | ||
|
||
for file in \ | ||
"chrome5/config.json" \ | ||
"chrome5/model.tflite" \ | ||
"chrome5/taxonomy.tsv" \ | ||
"chrome5/override_list.tsv" | ||
do | ||
if [ ! -f "$file" ]; then | ||
echo "Error: Essential file $file is missing!" | ||
exit 1 | ||
fi | ||
done | ||
|
||
echo "✓ All files downloaded successfully!" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
fastapi>=0.68.0 | ||
uvicorn>=0.15.0 | ||
pydantic>=1.8.0 | ||
black | ||
numpy | ||
pandas | ||
pyarrow | ||
tflite-support>=0.4.3 | ||
python-multipart>=0.0.5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
#!/bin/sh | ||
|
||
mkdir -p chrome5 | ||
|
||
REPO_URL="https://raw.githubusercontent.com/yohhaan/topics_classifier/main" | ||
|
||
download_file() { | ||
echo "Downloading $1..." | ||
output=$(curl -L "$REPO_URL/$1" --create-dirs -o "$1" 2>&1) | ||
|
||
if [ $? -eq 0 ]; then | ||
echo "✓ Successfully downloaded $1" | ||
return 0 | ||
else | ||
echo "✗ Error downloading $1" | ||
echo "Curl output:" | ||
echo "$output" | ||
return 1 | ||
fi | ||
} | ||
|
||
for file in \ | ||
"chrome5/config.json" \ | ||
"chrome5/model-info.pb" \ | ||
"chrome5/model.tflite" \ | ||
"chrome5/override_list.pb.gz" \ | ||
"chrome5/override_list.tsv" \ | ||
"chrome5/taxonomy.tsv" \ | ||
"chrome5/utility_buckets.tsv" | ||
do | ||
download_file "$file" || exit 1 | ||
done | ||
|
||
for file in \ | ||
"chrome5/config.json" \ | ||
"chrome5/model.tflite" \ | ||
"chrome5/taxonomy.tsv" \ | ||
"chrome5/override_list.tsv" | ||
do | ||
if [ ! -f "$file" ]; then | ||
echo "Error: Essential file $file is missing!" | ||
exit 1 | ||
fi | ||
done | ||
|
||
echo "✓ All files downloaded successfully!" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# Empty file to make the directory a Python package |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
from fastapi import FastAPI, HTTPException | ||
from pydantic import BaseModel | ||
from src.classifier import TopicsClassifier | ||
|
||
app = FastAPI(title="Topics Classification API") | ||
|
||
class DomainRequest(BaseModel): | ||
domain: str | ||
|
||
classifier = TopicsClassifier() | ||
|
||
@app.post("/classify") | ||
async def classify_domain(request: DomainRequest): | ||
try: | ||
topics = classifier.classify_domain(request.domain) | ||
return {"domain": request.domain, "topics": topics} | ||
except Exception as e: | ||
raise HTTPException(status_code=500, detail=str(e)) |
Oops, something went wrong.