Merge pull request #59 from boostcampaitech5/dev

[Release] Merge v0.1.0
boostcampaitech5 · Jul 26, 2023 · 1c6f203 · 1c6f203
2 parents a265b2f + eb32760
commit 1c6f203
Show file tree

Hide file tree

Showing 93 changed files with 26,184 additions and 1 deletion.
diff --git a/.gitignore b/.gitignore
@@ -365,3 +365,162 @@ $RECYCLE.BIN/
 *.lnk
 
 # End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,pycharm,windows,macos
+
+# Ignore names for front-end. 
+# Created by https://www.toptal.com/developers/gitignore/api/react,node
+# Edit at https://www.toptal.com/developers/gitignore?templates=react,node
+
+### Node ###
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+lerna-debug.log*
+.pnpm-debug.log*
+
+# Diagnostic reports (https://nodejs.org/api/report.html)
+report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
+
+# Runtime data
+pids
+*.pid
+*.seed
+*.pid.lock
+
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+
+# Coverage directory used by tools like istanbul
+coverage
+*.lcov
+
+# nyc test coverage
+.nyc_output
+
+# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+
+# Bower dependency directory (https://bower.io/)
+bower_components
+
+# node-waf configuration
+.lock-wscript
+
+# Compiled binary addons (https://nodejs.org/api/addons.html)
+build/Release
+
+# Dependency directories
+node_modules/
+jspm_packages/
+
+# Snowpack dependency directory (https://snowpack.dev/)
+web_modules/
+
+# TypeScript cache
+*.tsbuildinfo
+
+# Optional npm cache directory
+.npm
+
+# Optional eslint cache
+.eslintcache
+
+# Optional stylelint cache
+.stylelintcache
+
+# Microbundle cache
+.rpt2_cache/
+.rts2_cache_cjs/
+.rts2_cache_es/
+.rts2_cache_umd/
+
+# Optional REPL history
+.node_repl_history
+
+# Output of 'npm pack'
+*.tgz
+
+# Yarn Integrity file
+.yarn-integrity
+
+# dotenv environment variable files
+.env
+.env.development.local
+.env.test.local
+.env.production.local
+.env.local
+
+# parcel-bundler cache (https://parceljs.org/)
+.cache
+.parcel-cache
+
+# Next.js build output
+.next
+out
+
+# Nuxt.js build / generate output
+.nuxt
+dist
+
+# Gatsby files
+.cache/
+# Comment in the public line in if your project uses Gatsby and not Next.js
+# https://nextjs.org/blog/next-9-1#public-directory-support
+# public
+
+# vuepress build output
+.vuepress/dist
+
+# vuepress v2.x temp and cache directory
+.temp
+
+# Docusaurus cache and generated files
+.docusaurus
+
+# Serverless directories
+.serverless/
+
+# FuseBox cache
+.fusebox/
+
+# DynamoDB Local files
+.dynamodb/
+
+# TernJS port file
+.tern-port
+
+# Stores VSCode versions used for testing VSCode extensions
+.vscode-test
+
+# yarn v2
+.yarn/cache
+.yarn/unplugged
+.yarn/build-state.yml
+.yarn/install-state.gz
+.pnp.*
+
+### Node Patch ###
+# Serverless Webpack directories
+.webpack/
+
+# Optional stylelint cache
+
+# SvelteKit build / generate output
+.svelte-kit
+
+### react ###
+.DS_*
+**/*.backup.*
+**/*.back.*
+
+node_modules
+
+*.sublime*
+
+psd
+thumb
+sketch
+
+# End of https://www.toptal.com/developers/gitignore/api/react,node
diff --git a/Backend/Keyword Extraction/__main__.py b/Backend/Keyword Extraction/__main__.py
@@ -0,0 +1,4 @@
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run("Keyword Extraction.main:app", host="0.0.0.0", port=30008, reload=True)
diff --git a/Backend/Keyword Extraction/main.py b/Backend/Keyword Extraction/main.py
@@ -0,0 +1,81 @@
+import sys
+
+import pendulum
+
+from datetime import datetime, timedelta
+from pathlib import Path
+
+from fastapi import FastAPI, UploadFile, File, HTTPException, Depends
+from fastapi.param_functions import Depends
+from pydantic import BaseModel, Field
+from uuid import UUID, uuid4
+from typing import List, Union, Optional, Dict, Any, Tuple
+
+
+from datetime import datetime
+import pandas as pd
+
+
+sys.path.append(str(Path.home().joinpath("level3_nlp_finalproject-nlp-04")))
+from keyword_extractor.model import KeyBert
+
+app = FastAPI()
+tz = pendulum.timezone("Asia/Seoul")
+
+@app.get("/")
+def hello_word():
+	return {"hello": "world"}
+
+
+class Item(BaseModel):
+    titles: Union[str, List[str]]
+    contents: Union[str, List[str]]
+
+class Parameter(BaseModel):
+    stop_words: Optional[Union[str, List[str]]] = None
+    top_k: Optional[int] = 5
+    diversity: Optional[float] = 0.7
+    min_df: Optional[int] = 1
+    candidate_frac: Optional[float] = 0.3
+    vectorizer_type: Optional[str] = "tfidf"
+    tag_type: Optional[str] = "okt"
+
+
+
+def get_model(model_number):
+    # model 반환 함수
+    if model_number == "1":
+        model = KeyBert("jhgan/ko-sroberta-multitask")
+    elif model_number == "2":
+        model = KeyBert("snunlp/KR-SBERT-V40K-klueNLI-augSTS")
+    elif model_number == "3":
+        model = KeyBert("sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
+    return model
+
+
+@app.post(
+    "/keywordExtraction/{model_number}",
+    name="keyword extraction",
+    description="뉴스 기사에서 키워드를 추출하는 요약하는 API 입니다. `model_number`에는 `1`, `2`,`3` 중 하나를 선택하시면 됩니다.\
+        기본 모델 목록 1: jhgan/ko-sroberta-multitask, 2:snunlp/KR-SBERT-V40K-klueNLI-augSTS, 3:sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
+)
+async def keywordExtraction(model_number: str, data_input: Item, parameter: Parameter = Depends()):
+    if not model_number in ["1", "2","3"]:
+        raise HTTPException(status_code=404, detail="'model_number' argument invalid! Type model name. \n Model: 1, 2, 3")
+
+    if not data_input:
+        raise HTTPException(status_code=404, detail="'data_input' argument invalid!")
+
+
+    model = get_model(model_number)
+
+    result = model.extract_keywords(docs=data_input.contents,
+                                    titles=data_input.titles,
+                                    stop_words = parameter.stop_words if parameter.stop_words else [],
+                                    top_k = parameter.top_k,
+                                    diversity = parameter.diversity,
+                                    candidate_frac = parameter.candidate_frac,
+                                    vectorizer_type = parameter.vectorizer_type,
+                                    tag_type = parameter.tag_type,
+                                    )
+    return result, parameter
diff --git a/Backend/Sentimental/.gitkeep b/Backend/Sentimental/.gitkeep
diff --git a/Backend/Sentimental/__main__.py b/Backend/Sentimental/__main__.py
@@ -0,0 +1,4 @@
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run("app.main:app", host="0.0.0.0", port=30007, reload=True)
diff --git a/Backend/Sentimental/main.py b/Backend/Sentimental/main.py
@@ -0,0 +1,77 @@
+from fastapi import FastAPI, UploadFile, File
+from fastapi.param_functions import Depends
+from pydantic import BaseModel, Field
+from uuid import UUID, uuid4
+from typing import List, Union, Optional, Dict, Any
+
+from datetime import datetime
+
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+
+app = FastAPI()
+
+MODEL_PATH = "/opt/ml/input/model-roberta_large-sota_trainer"
+tokenizer = AutoTokenizer.from_pretrained("klue/roberta-large")
+model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)
+
+@app.get("/")
+def hello_world():
+    return {"hello": "world"}
+
+
+def predict_sentiment(text):
+    model.eval()
+    with torch.no_grad() :
+        temp = tokenizer(
+            text,
+            return_tensors='pt',
+            padding=True,
+            truncation=True,
+            ##
+            max_length=100,
+            # stride=stride,
+            # return_overflowing_tokens=True,
+            return_offsets_mapping=False
+            )
+
+
+        predicted_label = model(input_ids=temp['input_ids'],
+                                token_type_ids=temp['token_type_ids'])
+
+    print(predicted_label)
+
+    results = []
+    results = torch.nn.Softmax(dim=-1)(predicted_label.logits)
+
+
+    answer = []
+    print(results)
+    for result in results :
+        if result[0]>=result[1] :
+            answer.append("부정")
+
+        else :
+            answer.append("긍정")
+
+    return answer
+
+class FinanaceSentiment(BaseModel):
+    corpus_list: list = []
+    title: str = "title"
+    company: str = "삼성전자"
+    result: Optional[List]
+
+@app.post("/classify_sentiment", description="문장의 감정을 분류합니다.")
+async def classify_sentiment(finance: FinanaceSentiment):
+    # 입력으로 받은 텍스트를 모델로 예측합니다.
+    predictions = predict_sentiment(finance.corpus_list)
+
+    # 결과를 반환합니다.
+    result = {
+        "title": finance.title,
+        # "input_text": finance.corpus,
+        "sentiment": predictions
+    }
+
+    return predictions
diff --git a/Backend/Summarization/.gitkeep b/Backend/Summarization/.gitkeep
diff --git a/Backend/Summarization/summe/.env.example b/Backend/Summarization/summe/.env.example
@@ -0,0 +1,4 @@
+SECRET_KEY=secret
+DEBUG=True
+MODEL_PATH=./ml/model/
+MODEL_NAME=model.pkl