-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #59 from boostcampaitech5/dev
[Release] Merge v0.1.0
- Loading branch information
Showing
93 changed files
with
26,184 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
if __name__ == "__main__": | ||
import uvicorn | ||
|
||
uvicorn.run("Keyword Extraction.main:app", host="0.0.0.0", port=30008, reload=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
import sys | ||
|
||
import pendulum | ||
|
||
from datetime import datetime, timedelta | ||
from pathlib import Path | ||
|
||
from fastapi import FastAPI, UploadFile, File, HTTPException, Depends | ||
from fastapi.param_functions import Depends | ||
from pydantic import BaseModel, Field | ||
from uuid import UUID, uuid4 | ||
from typing import List, Union, Optional, Dict, Any, Tuple | ||
|
||
|
||
from datetime import datetime | ||
import pandas as pd | ||
|
||
|
||
sys.path.append(str(Path.home().joinpath("level3_nlp_finalproject-nlp-04"))) | ||
from keyword_extractor.model import KeyBert | ||
|
||
app = FastAPI() | ||
tz = pendulum.timezone("Asia/Seoul") | ||
|
||
@app.get("/") | ||
def hello_word(): | ||
return {"hello": "world"} | ||
|
||
|
||
class Item(BaseModel): | ||
titles: Union[str, List[str]] | ||
contents: Union[str, List[str]] | ||
|
||
class Parameter(BaseModel): | ||
stop_words: Optional[Union[str, List[str]]] = None | ||
top_k: Optional[int] = 5 | ||
diversity: Optional[float] = 0.7 | ||
min_df: Optional[int] = 1 | ||
candidate_frac: Optional[float] = 0.3 | ||
vectorizer_type: Optional[str] = "tfidf" | ||
tag_type: Optional[str] = "okt" | ||
|
||
|
||
|
||
def get_model(model_number): | ||
# model 반환 함수 | ||
if model_number == "1": | ||
model = KeyBert("jhgan/ko-sroberta-multitask") | ||
elif model_number == "2": | ||
model = KeyBert("snunlp/KR-SBERT-V40K-klueNLI-augSTS") | ||
elif model_number == "3": | ||
model = KeyBert("sentence-transformers/paraphrase-multilingual-mpnet-base-v2") | ||
return model | ||
|
||
|
||
@app.post( | ||
"/keywordExtraction/{model_number}", | ||
name="keyword extraction", | ||
description="뉴스 기사에서 키워드를 추출하는 요약하는 API 입니다. `model_number`에는 `1`, `2`,`3` 중 하나를 선택하시면 됩니다.\ | ||
기본 모델 목록 1: jhgan/ko-sroberta-multitask, 2:snunlp/KR-SBERT-V40K-klueNLI-augSTS, 3:sentence-transformers/paraphrase-multilingual-mpnet-base-v2" | ||
) | ||
async def keywordExtraction(model_number: str, data_input: Item, parameter: Parameter = Depends()): | ||
if not model_number in ["1", "2","3"]: | ||
raise HTTPException(status_code=404, detail="'model_number' argument invalid! Type model name. \n Model: 1, 2, 3") | ||
|
||
if not data_input: | ||
raise HTTPException(status_code=404, detail="'data_input' argument invalid!") | ||
|
||
|
||
model = get_model(model_number) | ||
|
||
result = model.extract_keywords(docs=data_input.contents, | ||
titles=data_input.titles, | ||
stop_words = parameter.stop_words if parameter.stop_words else [], | ||
top_k = parameter.top_k, | ||
diversity = parameter.diversity, | ||
candidate_frac = parameter.candidate_frac, | ||
vectorizer_type = parameter.vectorizer_type, | ||
tag_type = parameter.tag_type, | ||
) | ||
return result, parameter |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
if __name__ == "__main__": | ||
import uvicorn | ||
|
||
uvicorn.run("app.main:app", host="0.0.0.0", port=30007, reload=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
from fastapi import FastAPI, UploadFile, File | ||
from fastapi.param_functions import Depends | ||
from pydantic import BaseModel, Field | ||
from uuid import UUID, uuid4 | ||
from typing import List, Union, Optional, Dict, Any | ||
|
||
from datetime import datetime | ||
|
||
import torch | ||
from transformers import AutoTokenizer, AutoModelForSequenceClassification | ||
|
||
app = FastAPI() | ||
|
||
MODEL_PATH = "/opt/ml/input/model-roberta_large-sota_trainer" | ||
tokenizer = AutoTokenizer.from_pretrained("klue/roberta-large") | ||
model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH) | ||
|
||
@app.get("/") | ||
def hello_world(): | ||
return {"hello": "world"} | ||
|
||
|
||
def predict_sentiment(text): | ||
model.eval() | ||
with torch.no_grad() : | ||
temp = tokenizer( | ||
text, | ||
return_tensors='pt', | ||
padding=True, | ||
truncation=True, | ||
## | ||
max_length=100, | ||
# stride=stride, | ||
# return_overflowing_tokens=True, | ||
return_offsets_mapping=False | ||
) | ||
|
||
|
||
predicted_label = model(input_ids=temp['input_ids'], | ||
token_type_ids=temp['token_type_ids']) | ||
|
||
print(predicted_label) | ||
|
||
results = [] | ||
results = torch.nn.Softmax(dim=-1)(predicted_label.logits) | ||
|
||
|
||
answer = [] | ||
print(results) | ||
for result in results : | ||
if result[0]>=result[1] : | ||
answer.append("부정") | ||
|
||
else : | ||
answer.append("긍정") | ||
|
||
return answer | ||
|
||
class FinanaceSentiment(BaseModel): | ||
corpus_list: list = [] | ||
title: str = "title" | ||
company: str = "삼성전자" | ||
result: Optional[List] | ||
|
||
@app.post("/classify_sentiment", description="문장의 감정을 분류합니다.") | ||
async def classify_sentiment(finance: FinanaceSentiment): | ||
# 입력으로 받은 텍스트를 모델로 예측합니다. | ||
predictions = predict_sentiment(finance.corpus_list) | ||
|
||
# 결과를 반환합니다. | ||
result = { | ||
"title": finance.title, | ||
# "input_text": finance.corpus, | ||
"sentiment": predictions | ||
} | ||
|
||
return predictions |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
SECRET_KEY=secret | ||
DEBUG=True | ||
MODEL_PATH=./ml/model/ | ||
MODEL_NAME=model.pkl |
Oops, something went wrong.