Skip to content

Commit

Permalink
Merge pull request #78 from gomate-community/pipeline
Browse files Browse the repository at this point in the history
支持es、monngo以及minio
  • Loading branch information
yanqiangmiffy authored Nov 21, 2024
2 parents 4917ca2 + 40a9b23 commit 252e7fd
Show file tree
Hide file tree
Showing 22 changed files with 1,849 additions and 28 deletions.
1 change: 1 addition & 0 deletions api/apps/core/judge/bodys.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@ class JudgeBody(BaseModel):
"此外,成员国还在知识产权、电子商务、竞争、政府采购和中小企业等领域制订了高标准的自由贸易规则。   根据RCEP规定,协定签署后,RCEP各成员国将各自履行国内法律审批程序。协定生效需15个成员中至少9个成员批准,其中至少包括6个东盟成员国和中国、日本、韩国、澳大利亚和新西兰中至少3个国家。   新签署的协定吸引了全世界的注意力,具有以下重要意义:首先,RCEP是当今世界上最大的自由贸易协定。该协定覆盖22亿人口,约占世界人口的30%,国内生产总值(GDP)达26.",
"Nov 15, 2020 ... RCEP成员国包括东盟10国与中国、日本、韩国、澳大利亚、新西兰。RCEP是全球最大的自贸协定,15个成员国总人口、经济体量、贸易总额均占全球总量约30% ..."
], description="重排序检索文档")
method: str = Field("llm", description="使用方法llm或者bge")
31 changes: 23 additions & 8 deletions api/apps/core/judge/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,40 @@
from api.apps.core.judge.bodys import JudgeBody
from api.apps.handle.response.json_response import ApiResponse
from gomate.modules.judger.bge_judger import BgeJudger, BgeJudgerConfig
from gomate.modules.judger.chatgpt_judger import OpenaiJudger, OpenaiJudgerConfig

judge_router = APIRouter()

judge_config = BgeJudgerConfig(
model_name_or_path="/data/users/searchgpt/pretrained_models/bge-reranker-large"
)
bge_judger = BgeJudger(judge_config)

judger_config = OpenaiJudgerConfig(
api_url="https://aicloud.oneainexus.cn:30013/inference/aicloud-yanqiang/gomatellm/"
)
openai_judger = OpenaiJudger(judger_config)


# Create
@judge_router.post("/judge/", response_model=None, summary="判断文档相关性")
async def judge(judge_body: JudgeBody):
contexts = judge_body.contexts
query = judge_body.query
loguru.logger.info(query)
loguru.logger.info(contexts)
judge_docs = bge_judger.judge(
query=query,
documents=contexts,
is_sorted=False
)
loguru.logger.info(judge_docs)
method = judge_body.method

if method == 'llm':
loguru.logger.info("llm judge ...")
judge_docs = openai_judger.judge(
query=query,
documents=contexts,
is_sorted=False
)
else:
loguru.logger.info("bge judge...")
judge_docs = bge_judger.judge(
query=query,
documents=contexts,
is_sorted=False
)
return ApiResponse(judge_docs, message="判断文档是否相关成功")
4 changes: 2 additions & 2 deletions api/apps/core/parser/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ async def parser(file: UploadFile = File(...), chunk_size: int = 512):
chunks = tc.chunk_sentences(contents, chunk_size=chunk_size)
results.append(
{
'source': '来源',
'title': '标题',
'source': filename,
'title': filename,
'date': '20241008',
'sec_num': 0,
'content': ''.join(chunks),
Expand Down
3 changes: 0 additions & 3 deletions api/apps/core/rerank/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,9 @@
async def rerank(rerank_body: RerankBody):
contexts = rerank_body.contexts
query=rerank_body.query
loguru.logger.info(query)
loguru.logger.info(contexts)
top_docs = bge_reranker.rerank(
query=query,
documents=contexts,
is_sorted=False
)
loguru.logger.info(top_docs)
return ApiResponse(top_docs, message="重排序检索文档成功")
21 changes: 21 additions & 0 deletions api/apps/core/rewrite/bodys.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/env python
# -*- coding:utf-8 _*-
"""
@author:quincy qiang
@license: Apache Licence
@file: bodys.py
@time: 2024/06/13
@contact: yanqiangmiffy@gamil.com
@software: PyCharm
@description: coding..
"""
from typing import List

from pydantic import BaseModel, Field


class RewriteBody(BaseModel):
"""
# 入参模型定义
"""
query: str = Field("RCEP具体包括哪些国家", description="查询query")
38 changes: 38 additions & 0 deletions api/apps/core/rewrite/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env python
# -*- coding:utf-8 _*-
"""
@author:quincy qiang
@license: Apache Licence
@file: models.py
@time: 2024/06/13
@contact: yanqiangmiffy@gamil.com
@software: PyCharm
@description: coding..
"""
from pydantic import BaseModel
from pydantic import Field
from enum import IntEnum
from tortoise import fields
from apps.core.base import AbstractModel

# class ApplicationModel(BaseModel):
# application_name: str = Field("应用名称", description="应用名称")
# knowledge_id: str = Field("应用名称", description="知识库结合id")
# service_name: str = Field("官方体验服务", description="模型服务名称")
# model_name: str = Field("GoGPT2.0", description="大模型名称")
# temperature: float = Field(0.5, description="多样性大小")


class Application(AbstractModel):
application_name = fields.CharField(max_length=150, description="应用名称")
knowledge_id = fields.IntField(default=1, description="知识库id")
# knowledge_name=fields.CharField(max_length=150,description="知识库名称")
# service_name = fields.CharField(max_length=150, description="模型服务名称")
# model_name = fields.CharField(max_length=150, description="大模型名称")
service_id=fields.IntField(default=1,description="模型服务id")
model_id=fields.IntField(default=1,description="模型id")
temperature = fields.FloatField(default=0.5, description="多样性大小")

class Meta:
table = "rag_application"

32 changes: 32 additions & 0 deletions api/apps/core/rewrite/views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/env python
# -*- coding:utf-8 _*-
"""
@author:quincy qiang
@license: Apache Licence
@file: views.py
@time: 2024/06/13
@contact: yanqiangmiffy@gamil.com
@software: PyCharm
@description: coding..
"""
import loguru
from fastapi import APIRouter

from api.apps.core.rewrite.bodys import RewriteBody
from api.apps.handle.response.json_response import ApiResponse
from gomate.modules.rewriter.openai_rewrite import OpenaiRewriter,OpenaiRewriterConfig


rewriter_router = APIRouter()

rewriter_config = OpenaiRewriterConfig(
api_url="http://10.208.63.29:8888"
)
openai_rewriter = OpenaiRewriter(rewriter_config)

# Create
@rewriter_router.post("/rewrite/", response_model=None, summary="改写查询")
async def rewrite(rewrite_body: RewriteBody):
query = rewrite_body.query
response = openai_rewriter.rewrite(query)
return ApiResponse(response, message="改写查询")
2 changes: 2 additions & 0 deletions api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from apps.core.parser.views import parse_router
from apps.core.citation.views import citation_router
from apps.core.refiner.views import refiner_router
from apps.core.rewrite.views import rewriter_router
import uvicorn

app_config = AppConfig()
Expand All @@ -34,6 +35,7 @@
app.include_router(parse_router, prefix="/gomate_tool", tags=["parser"])
app.include_router(citation_router, prefix="/gomate_tool", tags=["citation"])
app.include_router(refiner_router, prefix="/gomate_tool", tags=["refiner"])
app.include_router(rewriter_router, prefix="/gomate_tool", tags=["rewrite"])

if __name__ == '__main__':
uvicorn.run('main:app', host=app_config.API_HOST, port=app_config.API_PORT, workers=8, reload=True)
6 changes: 3 additions & 3 deletions examples/judger/bgejudger_exmpale.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
@software: PyCharm
@description: coding..
"""

import pandas as pd
from gomate.modules.judger.bge_judger import BgeJudger, BgeJudgerConfig

if __name__ == '__main__':
Expand All @@ -22,8 +22,8 @@
'习近平主席着眼全人类福祉,积极促进世界和平,主张对话而不对抗,提出的共建“一带一路”和全球发展倡议、全球安全倡议、全球文明倡议,为解决当今世界面临的问题,推动建设更加和平、美好的世界提供了战略引领,巴方高度赞赏并全力支持中巴经济走廊有力促进了巴基斯坦国家发展,实实在在地造福了巴基斯坦人民巴方将学习借鉴中国的治国理政经验,同中方持续高质量共建“一带一路”,深化各领域务实合作巴政府再次对今年3月达苏恐袭事件造成中方人员遇难表示深切哀悼,将坚决打击并严惩涉案恐怖分子,采取切实有效措施,确保在巴中方人员、机构的安全我愿重申,没有任何势力能够阻挡中国发展壮大,没有任何势力能够动摇巴中铁杆友谊巴方将坚定不移做中国最可信赖的朋友和伙伴一个中国原则是巴政府坚定不移的承诺,台湾是中国领土不可分割的一部分巴方将继续毫不迟疑地坚定支持中国在涉台、涉藏、涉疆、南海等所有核心利益问题上的立场王毅、张又侠参加会见。',
'新华社北京6月7日电 (记者孙奕)6月7日下午,国家主席习近平在北京人民大会堂会见来华正式访问的巴基斯坦总理夏巴兹。习近平指出,中国和巴基斯坦是山水相连的好邻居、信义相交的好朋友,更是守望相助的好伙伴、患难与共的好兄弟。中巴全天候战略合作伙伴关系不断深化,拥有坚实的民意基础、强大的内生动力、广阔的发展前景。中方愿同巴方坚定相互支持,拉紧合作纽带,深化战略协调,加快构建新时代更加紧密的中巴命运共同体,为地区和平、稳定、发展、繁荣作出更大贡献。习近平强调,中巴铁杆友谊历久弥坚,关键在于双方始终相互理解、高度信任、坚定支持。感谢巴方在涉及中国核心利益和重大关切问题上长期坚定支持中国。中方也将一如既往坚定支持巴方捍卫国家主权和领土完整,支持巴方走符合本国国情的发展道路,支持巴方坚定打击恐怖主义。中方愿推动高质量共建“一带一路”同巴方发展规划对接,因地制宜开展农业、矿业、社会民生等领域合作,以打造中巴经济走廊“升级版”为中心,共建增长、民生、创新、绿色、开放“五大走廊”,推动高质量共建中巴经济走廊走深走实,助力巴基斯坦经济社会发展。希望巴方持续创造安全、稳定、可预期的营商环境,切实保障在巴中方人员、项目、机构安全。中方愿同巴方加强在联合国、上海合作组织等多边机制内的协调配合,共同推动平等有序的世界多极化和普惠包容的经济全球化,聚焦发展议程,应对安全挑战,完善全球治理,维护发展中国家共同利益和国际公平正义。夏巴兹表示,在习近平主席英明领导下,中国在脱贫、反腐和发展方面取得世界瞩目的伟大成就。习近平主席的远见卓识和杰出领导力,得到巴基斯坦人民和世界各国人民的由衷敬佩和高度评价。习近平主席着眼全人类福祉,积极促进世界和平,主张对话而不对抗,提出的共建“一带一路”和全球发展倡议、全球安全倡议、全球文明倡议,为解决当今世界面临的问题,推动建设更加和平、美好的世界提供了战略引领,巴方高度赞赏并全力支持。中巴经济走廊有力促进了巴基斯坦国家发展,实实在在地造福了巴基斯坦人民。巴方将学习借鉴中国的治国理政经验,同中方持续高质量共建“一带一路”,深化各领域务实合作。'
]
top_docs = bge_judger.judge(
judge_docs = bge_judger.judge(
query="发展新质生产力",
documents=contexts,
)
print(top_docs)
print(judge_docs)
26 changes: 26 additions & 0 deletions examples/judger/chatgpt_judger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import json

import pandas as pd

from gomate.modules.judger.chatgpt_judger import OpenaiJudger, OpenaiJudgerConfig

if __name__ == '__main__':

with open('citation.json', 'r', encoding="utf-8") as f:
data = json.load(f)

judger_config = OpenaiJudgerConfig(
api_url="https://aicloud.oneainexus.cn:30013/inference/aicloud-yanqiang/gomatellm/"
)
openai_judger = OpenaiJudger(judger_config)

documents = [
f"标题:{doc['newsinfo']['title']}\n日期:{doc['newsinfo']['date']}\n内容:{doc['newsinfo']['content']}\n" for doc
in data['selected_docs']
]
judge_docs = openai_judger.judge(
query="在“一带一路”国际合作高峰论坛上,习近平讲了什么?",
documents=documents,
)
# print(judge_docs)
print(pd.DataFrame(judge_docs))
Loading

0 comments on commit 252e7fd

Please sign in to comment.