Skip to content

Commit

Permalink
Merge pull request #59 from boostcampaitech5/dev
Browse files Browse the repository at this point in the history
[Release] Merge v0.1.0
  • Loading branch information
dbsrlskfdk authored Jul 26, 2023
2 parents a265b2f + eb32760 commit 1c6f203
Show file tree
Hide file tree
Showing 93 changed files with 26,184 additions and 1 deletion.
159 changes: 159 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -365,3 +365,162 @@ $RECYCLE.BIN/
*.lnk

# End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,pycharm,windows,macos

# Ignore names for front-end.
# Created by https://www.toptal.com/developers/gitignore/api/react,node
# Edit at https://www.toptal.com/developers/gitignore?templates=react,node

### Node ###
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
.pnpm-debug.log*

# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json

# Runtime data
pids
*.pid
*.seed
*.pid.lock

# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov

# Coverage directory used by tools like istanbul
coverage
*.lcov

# nyc test coverage
.nyc_output

# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt

# Bower dependency directory (https://bower.io/)
bower_components

# node-waf configuration
.lock-wscript

# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release

# Dependency directories
node_modules/
jspm_packages/

# Snowpack dependency directory (https://snowpack.dev/)
web_modules/

# TypeScript cache
*.tsbuildinfo

# Optional npm cache directory
.npm

# Optional eslint cache
.eslintcache

# Optional stylelint cache
.stylelintcache

# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/

# Optional REPL history
.node_repl_history

# Output of 'npm pack'
*.tgz

# Yarn Integrity file
.yarn-integrity

# dotenv environment variable files
.env
.env.development.local
.env.test.local
.env.production.local
.env.local

# parcel-bundler cache (https://parceljs.org/)
.cache
.parcel-cache

# Next.js build output
.next
out

# Nuxt.js build / generate output
.nuxt
dist

# Gatsby files
.cache/
# Comment in the public line in if your project uses Gatsby and not Next.js
# https://nextjs.org/blog/next-9-1#public-directory-support
# public

# vuepress build output
.vuepress/dist

# vuepress v2.x temp and cache directory
.temp

# Docusaurus cache and generated files
.docusaurus

# Serverless directories
.serverless/

# FuseBox cache
.fusebox/

# DynamoDB Local files
.dynamodb/

# TernJS port file
.tern-port

# Stores VSCode versions used for testing VSCode extensions
.vscode-test

# yarn v2
.yarn/cache
.yarn/unplugged
.yarn/build-state.yml
.yarn/install-state.gz
.pnp.*

### Node Patch ###
# Serverless Webpack directories
.webpack/

# Optional stylelint cache

# SvelteKit build / generate output
.svelte-kit

### react ###
.DS_*
**/*.backup.*
**/*.back.*

node_modules

*.sublime*

psd
thumb
sketch

# End of https://www.toptal.com/developers/gitignore/api/react,node
4 changes: 4 additions & 0 deletions Backend/Keyword Extraction/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
if __name__ == "__main__":
import uvicorn

uvicorn.run("Keyword Extraction.main:app", host="0.0.0.0", port=30008, reload=True)
81 changes: 81 additions & 0 deletions Backend/Keyword Extraction/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import sys

import pendulum

from datetime import datetime, timedelta
from pathlib import Path

from fastapi import FastAPI, UploadFile, File, HTTPException, Depends
from fastapi.param_functions import Depends
from pydantic import BaseModel, Field
from uuid import UUID, uuid4
from typing import List, Union, Optional, Dict, Any, Tuple


from datetime import datetime
import pandas as pd


sys.path.append(str(Path.home().joinpath("level3_nlp_finalproject-nlp-04")))
from keyword_extractor.model import KeyBert

app = FastAPI()
tz = pendulum.timezone("Asia/Seoul")

@app.get("/")
def hello_word():
return {"hello": "world"}


class Item(BaseModel):
titles: Union[str, List[str]]
contents: Union[str, List[str]]

class Parameter(BaseModel):
stop_words: Optional[Union[str, List[str]]] = None
top_k: Optional[int] = 5
diversity: Optional[float] = 0.7
min_df: Optional[int] = 1
candidate_frac: Optional[float] = 0.3
vectorizer_type: Optional[str] = "tfidf"
tag_type: Optional[str] = "okt"



def get_model(model_number):
# model 반환 함수
if model_number == "1":
model = KeyBert("jhgan/ko-sroberta-multitask")
elif model_number == "2":
model = KeyBert("snunlp/KR-SBERT-V40K-klueNLI-augSTS")
elif model_number == "3":
model = KeyBert("sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
return model


@app.post(
"/keywordExtraction/{model_number}",
name="keyword extraction",
description="뉴스 기사에서 키워드를 추출하는 요약하는 API 입니다. `model_number`에는 `1`, `2`,`3` 중 하나를 선택하시면 됩니다.\
기본 모델 목록 1: jhgan/ko-sroberta-multitask, 2:snunlp/KR-SBERT-V40K-klueNLI-augSTS, 3:sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
)
async def keywordExtraction(model_number: str, data_input: Item, parameter: Parameter = Depends()):
if not model_number in ["1", "2","3"]:
raise HTTPException(status_code=404, detail="'model_number' argument invalid! Type model name. \n Model: 1, 2, 3")

if not data_input:
raise HTTPException(status_code=404, detail="'data_input' argument invalid!")


model = get_model(model_number)

result = model.extract_keywords(docs=data_input.contents,
titles=data_input.titles,
stop_words = parameter.stop_words if parameter.stop_words else [],
top_k = parameter.top_k,
diversity = parameter.diversity,
candidate_frac = parameter.candidate_frac,
vectorizer_type = parameter.vectorizer_type,
tag_type = parameter.tag_type,
)
return result, parameter
Empty file added Backend/Sentimental/.gitkeep
Empty file.
4 changes: 4 additions & 0 deletions Backend/Sentimental/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
if __name__ == "__main__":
import uvicorn

uvicorn.run("app.main:app", host="0.0.0.0", port=30007, reload=True)
77 changes: 77 additions & 0 deletions Backend/Sentimental/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from fastapi import FastAPI, UploadFile, File
from fastapi.param_functions import Depends
from pydantic import BaseModel, Field
from uuid import UUID, uuid4
from typing import List, Union, Optional, Dict, Any

from datetime import datetime

import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

app = FastAPI()

MODEL_PATH = "/opt/ml/input/model-roberta_large-sota_trainer"
tokenizer = AutoTokenizer.from_pretrained("klue/roberta-large")
model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)

@app.get("/")
def hello_world():
return {"hello": "world"}


def predict_sentiment(text):
model.eval()
with torch.no_grad() :
temp = tokenizer(
text,
return_tensors='pt',
padding=True,
truncation=True,
##
max_length=100,
# stride=stride,
# return_overflowing_tokens=True,
return_offsets_mapping=False
)


predicted_label = model(input_ids=temp['input_ids'],
token_type_ids=temp['token_type_ids'])

print(predicted_label)

results = []
results = torch.nn.Softmax(dim=-1)(predicted_label.logits)


answer = []
print(results)
for result in results :
if result[0]>=result[1] :
answer.append("부정")

else :
answer.append("긍정")

return answer

class FinanaceSentiment(BaseModel):
corpus_list: list = []
title: str = "title"
company: str = "삼성전자"
result: Optional[List]

@app.post("/classify_sentiment", description="문장의 감정을 분류합니다.")
async def classify_sentiment(finance: FinanaceSentiment):
# 입력으로 받은 텍스트를 모델로 예측합니다.
predictions = predict_sentiment(finance.corpus_list)

# 결과를 반환합니다.
result = {
"title": finance.title,
# "input_text": finance.corpus,
"sentiment": predictions
}

return predictions
Empty file added Backend/Summarization/.gitkeep
Empty file.
4 changes: 4 additions & 0 deletions Backend/Summarization/summe/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
SECRET_KEY=secret
DEBUG=True
MODEL_PATH=./ml/model/
MODEL_NAME=model.pkl
Loading

0 comments on commit 1c6f203

Please sign in to comment.