Skip to content

Commit

Permalink
Merge branch 'ASU_wikiArticleSelection' of https://github.com/grey-bo…
Browse files Browse the repository at this point in the history
  • Loading branch information
cdvandyk-ca committed Nov 21, 2024
2 parents 87564c0 + d3cafc0 commit 06323c2
Showing 1 changed file with 60 additions and 2 deletions.
62 changes: 60 additions & 2 deletions fastapi/app/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from fastapi import FastAPI, HTTPException
from fastapi import FastAPI, HTTPException, Query
import uvicorn
from uvicorn import run
from typing import Union, List
import uvicorn
from typing import Union
import requests
from pydantic import BaseModel
from bs4 import BeautifulSoup
Expand All @@ -12,8 +14,63 @@ class ArticleResponse(BaseModel):
article: str
languages: List[str]

def get_wikipedia_url(title: str) -> str:
"""Get the Wikipedia article URL for a given title using the Wikipedia API."""
api_url = 'https://en.wikipedia.org/w/api.php'
params = {
'action': 'query',
'format': 'json',
'titles': title,
'prop': 'info',
'inprop': 'url',
}
response = requests.get(api_url, params=params)
data = response.json()
pages = data.get('query', {}).get('pages', {})
page = next(iter(pages.values()), None)

if not page or 'missing' in page:
raise HTTPException(status_code=404, detail="Wikipedia article not found.")

fullurl = page.get('fullurl')
if not fullurl:
raise HTTPException(status_code=404, detail="Wikipedia article URL not found.")

return fullurl

def get_wikipedia_url(title: str) -> str:
"""Get the Wikipedia article URL for a given title using the Wikipedia API."""
api_url = 'https://en.wikipedia.org/w/api.php'
params = {
'action': 'query',
'format': 'json',
'titles': title,
'prop': 'info',
'inprop': 'url',
}
response = requests.get(api_url, params=params)
data = response.json()
pages = data.get('query', {}).get('pages', {})
page = next(iter(pages.values()), None)

if not page or 'missing' in page:
raise HTTPException(status_code=404, detail="Wikipedia article not found.")

fullurl = page.get('fullurl')
if not fullurl:
raise HTTPException(status_code=404, detail="Wikipedia article URL not found.")

return fullurl

@app.get("/get_article", response_model=ArticleResponse)
def get_article(url: str):
def get_article(url: str = Query(None), title: str = Query(None)):
if not url and not title:
raise HTTPException(status_code=400, detail="Either 'url' or 'title' must be provided.")

if title:
# Use the Wikipedia API to get the URL from the title
url = get_wikipedia_url(title)

try:
page = requests.get(url)
page.raise_for_status() # Check if the request was successful
Expand Down Expand Up @@ -44,6 +101,7 @@ class Comparator(BaseModel):
source: str
target: str


@app.post("/api/v1/article/original/")
def get_orginal_article_by_url(url: Url):
return 'hello'
Expand Down

0 comments on commit 06323c2

Please sign in to comment.