Skip to content

Commit

Permalink
Merge pull request #80 from gomate-community/pipeline
Browse files Browse the repository at this point in the history
Rename Gommate to TrustRAG
  • Loading branch information
yanqiangmiffy authored Nov 28, 2024
2 parents d969659 + b4bdd5a commit 1334c44
Show file tree
Hide file tree
Showing 158 changed files with 213 additions and 209 deletions.
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -284,11 +284,10 @@ python app.py
```

浏览器访问:[127.0.0.1:7860](127.0.0.1:7860)
![demo.png](resources%2Fdemo.png)
![trustrag_demo.png](resources%2Ftrustrag_demo.png)

app后台日志:

![app_logging.png](resources%2Fapp_logging.png)
![app_logging3.png](resources%2Fapp_logging3.png)

## ⭐️ Star History

Expand All @@ -308,3 +307,7 @@ app后台日志:
群满或者合作交流可以联系:

<img src="https://raw.githubusercontent.com/yanqiangmiffy/Chinese-LangChain/master/images/personal.jpg" width="180px">

## 致谢
- 文档解析:[infiniflow/ragflow](https://github.com/infiniflow/ragflow/blob/main/deepdoc/README.md)
- PDF文件解析[opendatalab/MinerU](https://github.com/opendatalab/MinerU)
4 changes: 2 additions & 2 deletions api/apps/core/citation/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@

from api.apps.core.citation.bodys import CitationBody
from api.apps.handle.response.json_response import ApiResponse
from gomate.modules.citation.match_citation import MatchCitation
from gomate.modules.citation.source_citation import SourceCitation
from trustrag.modules.citation.match_citation import MatchCitation
from trustrag.modules.citation.source_citation import SourceCitation
mc = MatchCitation()
sc = SourceCitation()
citation_router = APIRouter()
Expand Down
4 changes: 2 additions & 2 deletions api/apps/core/judge/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@

from api.apps.core.judge.bodys import JudgeBody
from api.apps.handle.response.json_response import ApiResponse
from gomate.modules.judger.bge_judger import BgeJudger, BgeJudgerConfig
from gomate.modules.judger.chatgpt_judger import OpenaiJudger, OpenaiJudgerConfig
from trustrag.modules.judger.bge_judger import BgeJudger, BgeJudgerConfig
from trustrag.modules.judger.chatgpt_judger import OpenaiJudger, OpenaiJudgerConfig

judge_router = APIRouter()

Expand Down
16 changes: 8 additions & 8 deletions api/apps/core/parser/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@
from fastapi import File, UploadFile, HTTPException
from fastapi.responses import JSONResponse

from gomate.modules.document.chunk import TextChunker
from gomate.modules.document.docx_parser import DocxParser
from gomate.modules.document.excel_parser import ExcelParser
from gomate.modules.document.html_parser import HtmlParser
from gomate.modules.document.json_parser import JsonParser
from gomate.modules.document.pdf_parser_fast import PdfSimParser
from gomate.modules.document.ppt_parser import PptParser
from gomate.modules.document.txt_parser import TextParser
from trustrag.modules.document.chunk import TextChunker
from trustrag.modules.document.docx_parser import DocxParser
from trustrag.modules.document.excel_parser import ExcelParser
from trustrag.modules.document.html_parser import HtmlParser
from trustrag.modules.document.json_parser import JsonParser
from trustrag.modules.document.pdf_parser_fast import PdfSimParser
from trustrag.modules.document.ppt_parser import PptParser
from trustrag.modules.document.txt_parser import TextParser

tc = TextChunker()
parse_router = APIRouter()
Expand Down
2 changes: 1 addition & 1 deletion api/apps/core/refiner/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from api.apps.core.refiner.bodys import RefinerBody
from api.apps.handle.response.json_response import ApiResponse
from gomate.modules.refiner.compressor import LLMCompressApi
from trustrag.modules.refiner.compressor import LLMCompressApi

refiner_router = APIRouter()
compressor = LLMCompressApi()
Expand Down
2 changes: 1 addition & 1 deletion api/apps/core/rerank/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from api.apps.core.rerank.bodys import RerankBody
from api.apps.core.rerank.models import Application
from api.apps.handle.response.json_response import UserNotFoundResponse, ApiResponse
from gomate.modules.reranker.bge_reranker import BgeReranker, BgeRerankerConfig
from trustrag.modules.reranker.bge_reranker import BgeReranker, BgeRerankerConfig

# from apps.handle.exception.exception import MallException
# from apps.core.config.models import LLMModel
Expand Down
2 changes: 1 addition & 1 deletion api/apps/core/rewrite/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from api.apps.core.rewrite.bodys import RewriteBody
from api.apps.handle.response.json_response import ApiResponse
from gomate.modules.rewriter.openai_rewrite import OpenaiRewriter,OpenaiRewriterConfig
from trustrag.modules.rewriter.openai_rewrite import OpenaiRewriter,OpenaiRewriterConfig


rewriter_router = APIRouter()
Expand Down
18 changes: 9 additions & 9 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,19 @@
import gradio as gr
import loguru

from gomate.applications.rag import RagApplication, ApplicationConfig
from gomate.modules.reranker.bge_reranker import BgeRerankerConfig
from gomate.modules.retrieval.dense_retriever import DenseRetrieverConfig
from trustrag.applications.rag import RagApplication, ApplicationConfig
from trustrag.modules.reranker.bge_reranker import BgeRerankerConfig
from trustrag.modules.retrieval.dense_retriever import DenseRetrieverConfig

# 修改成自己的配置!!!
app_config = ApplicationConfig()
app_config.docs_path = "/data/users/searchgpt/yq/GoMate_dev/data/docs/"
app_config.docs_path = "/data/users/searchgpt/yq/TrustRAG/data/docs/"
app_config.llm_model_path = "/data/users/searchgpt/pretrained_models/glm-4-9b-chat"

retriever_config = DenseRetrieverConfig(
model_name_or_path="/data/users/searchgpt/pretrained_models/bge-large-zh-v1.5",
dim=1024,
index_dir='/data/users/searchgpt/yq/GoMate/examples/retrievers/dense_cache'
index_path='/data/users/searchgpt/yq/TrustRAG/examples/retrievers/dense_cache'
)
rerank_config = BgeRerankerConfig(
model_name_or_path="/data/users/searchgpt/pretrained_models/bge-reranker-large"
Expand Down Expand Up @@ -115,7 +115,7 @@ def predict(input,


with gr.Blocks(theme="soft") as demo:
gr.Markdown("""<h1><center>Gomate Application</center></h1>
gr.Markdown("""<h1><center>TrustRAG Application</center></h1>
<center><font size=3>
</center></font>
""")
Expand Down Expand Up @@ -177,16 +177,16 @@ def predict(input,
# )
with gr.Column(scale=4):
with gr.Row():
chatbot = gr.Chatbot(label='Gomate Application').style(height=650)
chatbot = gr.Chatbot(label='TrustRAG Application').style(height=650)
with gr.Row():
message = gr.Textbox(label='请输入问题')
with gr.Row():
clear_history = gr.Button("🧹 清除历史对话")
send = gr.Button("🚀 发送")
with gr.Row():
gr.Markdown("""提醒:<br>
[Gomate Application](https://github.com/gomate-community/GoMate) <br>
有任何使用问题[Github Issue区](https://github.com/gomate-community/GoMate)进行反馈.
[TrustRAG Application](https://github.com/TrustRAG-community/TrustRAG) <br>
有任何使用问题[Github Issue区](https://github.com/TrustRAG-community/TrustRAG)进行反馈.
<br>
""")
with gr.Column(scale=2):
Expand Down
16 changes: 8 additions & 8 deletions docs/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,14 @@ import pickle
import pandas as pd
from tqdm import tqdm

from gomate.modules.document.chunk import TextChunker
from gomate.modules.document.txt_parser import TextParser
from gomate.modules.document.utils import PROJECT_BASE
from gomate.modules.generator.llm import GLM4Chat
from gomate.modules.reranker.bge_reranker import BgeRerankerConfig, BgeReranker
from gomate.modules.retrieval.bm25s_retriever import BM25RetrieverConfig
from gomate.modules.retrieval.dense_retriever import DenseRetrieverConfig
from gomate.modules.retrieval.hybrid_retriever import HybridRetriever, HybridRetrieverConfig
from trustrag.modules.document.chunk import TextChunker
from trustrag.modules.document.txt_parser import TextParser
from trustrag.modules.document.utils import PROJECT_BASE
from trustrag.modules.generator.llm import GLM4Chat
from trustrag.modules.reranker.bge_reranker import BgeRerankerConfig, BgeReranker
from trustrag.modules.retrieval.bm25s_retriever import BM25RetrieverConfig
from trustrag.modules.retrieval.dense_retriever import DenseRetrieverConfig
from trustrag.modules.retrieval.hybrid_retriever import HybridRetriever, HybridRetrieverConfig
```


Expand Down
16 changes: 8 additions & 8 deletions docs/retrieval.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
- `index_path`:向量维度

```python
from gomate.modules.document.common_parser import CommonParser
from gomate.modules.document.utils import PROJECT_BASE
from gomate.modules.retrieval.bm25s_retriever import BM25RetrieverConfig, BM25Retriever
from trustrag.modules.document.common_parser import CommonParser
from trustrag.modules.document.utils import PROJECT_BASE
from trustrag.modules.retrieval.bm25s_retriever import BM25RetrieverConfig, BM25Retriever

if __name__ == '__main__':

Expand Down Expand Up @@ -55,7 +55,7 @@ if __name__ == '__main__':
import pandas as pd
from tqdm import tqdm

from gomate.modules.retrieval.dense_retriever import DenseRetriever, DenseRetrieverConfig
from trustrag.modules.retrieval.dense_retriever import DenseRetriever, DenseRetrieverConfig

if __name__ == '__main__':
retriever_config = DenseRetrieverConfig(
Expand Down Expand Up @@ -91,10 +91,10 @@ if __name__ == '__main__':
> 混合检索,将Bm25检索以及Dense检索的结果进行合并
```python
from gomate.modules.document.common_parser import CommonParser
from gomate.modules.retrieval.bm25s_retriever import BM25RetrieverConfig
from gomate.modules.retrieval.dense_retriever import DenseRetrieverConfig
from gomate.modules.retrieval.hybrid_retriever import HybridRetriever, HybridRetrieverConfig
from trustrag.modules.document.common_parser import CommonParser
from trustrag.modules.retrieval.bm25s_retriever import BM25RetrieverConfig
from trustrag.modules.retrieval.dense_retriever import DenseRetrieverConfig
from trustrag.modules.retrieval.hybrid_retriever import HybridRetriever, HybridRetrieverConfig

if __name__ == '__main__':
# BM25 and Dense Retriever configurations
Expand Down
11 changes: 6 additions & 5 deletions docs/rewriter.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
## HyDE

```python
import os

import pandas as pd
from tqdm import tqdm

from gomate.modules.document.common_parser import CommonParser
from gomate.modules.generator.llm import GLMChat
from gomate.modules.retrieval.dense_retriever import DenseRetriever, DenseRetrieverConfig
from gomate.modules.rewriter.hyde_rewriter import HydeRewriter
from gomate.modules.rewriter.promptor import Promptor
from trustrag.modules.document.common_parser import CommonParser
from trustrag.modules.generator.llm import GLMChat
from trustrag.modules.retrieval.dense_retriever import DenseRetriever, DenseRetrieverConfig
from trustrag.modules.rewriter.hyde_rewriter import HydeRewriter
from trustrag.modules.rewriter.promptor import Promptor

if __name__ == '__main__':
promptor = Promptor(task="WEB_SEARCH", language="zh")
Expand Down
2 changes: 1 addition & 1 deletion examples/citations/match_example.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from gomate.modules.citation.match_citation import MatchCitation
from trustrag.modules.citation.match_citation import MatchCitation
import json

mc = MatchCitation()
Expand Down
2 changes: 1 addition & 1 deletion examples/docs/convert2json_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import re

from gomate.modules.document.utils import PROJECT_BASE
from trustrag.modules.document.utils import PROJECT_BASE


def process_markdown(markdown_content):
Expand Down
2 changes: 1 addition & 1 deletion examples/judger/bgejudger_exmpale.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
@description: coding..
"""
import pandas as pd
from gomate.modules.judger.bge_judger import BgeJudger, BgeJudgerConfig
from trustrag.modules.judger.bge_judger import BgeJudger, BgeJudgerConfig

if __name__ == '__main__':
judger_config = BgeJudgerConfig(
Expand Down
2 changes: 1 addition & 1 deletion examples/judger/chatgpt_judger.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pandas as pd

from gomate.modules.judger.chatgpt_judger import OpenaiJudger, OpenaiJudgerConfig
from trustrag.modules.judger.chatgpt_judger import OpenaiJudger, OpenaiJudgerConfig

if __name__ == '__main__':

Expand Down
2 changes: 1 addition & 1 deletion examples/parsers/common_parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from gomate.modules.document.common_parser import CommonParser
from trustrag.modules.document.common_parser import CommonParser


# cp=CommonParser()
Expand Down
2 changes: 1 addition & 1 deletion examples/parsers/markdown_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
@software: PyCharm
@description: coding..
"""
from gomate.modules.document.markdown_parser import MarkdownParser
from trustrag.modules.document.markdown_parser import MarkdownParser


if __name__ == '__main__':
Expand Down
2 changes: 1 addition & 1 deletion examples/parsers/parser_examples.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from gomate.modules.document.common_parser import CommonParser
from trustrag.modules.document.common_parser import CommonParser

if __name__ == '__main__':
parser = CommonParser()
Expand Down
2 changes: 1 addition & 1 deletion examples/parsers/pdfparser_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
@software: PyCharm
@description: coding..
"""
from gomate.modules.document.pdf_parser_fast import PdfParserUsingPyMuPDF
from trustrag.modules.document.pdf_parser_fast import PdfParserUsingPyMuPDF


if __name__ == '__main__':
Expand Down
4 changes: 2 additions & 2 deletions examples/parsers/pdfparser_mineru.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
from gomate.modules.document.pdf_mineru_parser import PdfParserWithMinerU
from gomate.modules.document.utils import PROJECT_BASE
from trustrag.modules.document.pdf_mineru_parser import PdfParserWithMinerU
from trustrag.modules.document.utils import PROJECT_BASE
from tqdm import tqdm
if __name__ == '__main__':
pdf_parser=PdfParserWithMinerU(url='http://localhost:8888/pdf_parse')
Expand Down
2 changes: 1 addition & 1 deletion examples/parsers/textparser_exmaple.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
@software: PyCharm
@description: coding..
"""
from gomate.modules.document.text_parser import TextParser
from trustrag.modules.document.text_parser import TextParser



Expand Down
6 changes: 3 additions & 3 deletions examples/pipelines/insurance_qa_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
import json
import pandas as pd
from transformers import AutoTokenizer
from gomate.modules.retrieval.embedding import SBertEmbeddingModel
from gomate.modules.retrieval.faiss_retriever import FaissRetriever
from gomate.modules.retrieval.faiss_retriever import FaissRetrieverConfig
from trustrag.modules.retrieval.embedding import SBertEmbeddingModel
from trustrag.modules.retrieval.faiss_retriever import FaissRetriever
from trustrag.modules.retrieval.faiss_retriever import FaissRetrieverConfig

## step1 build faiss index
embedding_model_path = "/home/test/pretrained_models/bge-large-zh-v1.5"
Expand Down
8 changes: 4 additions & 4 deletions examples/rag/app_weibo.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@

import gradio as gr

from gomate.applications.rag_weibo import WeiboRagApplication, ApplicationConfig
from gomate.modules.document.utils import PROJECT_BASE
from gomate.modules.reranker.bge_reranker import BgeRerankerConfig
from gomate.modules.retrieval.dense_retriever import DenseRetrieverConfig
from trustrag.applications.rag_weibo import WeiboRagApplication, ApplicationConfig
from trustrag.modules.document.utils import PROJECT_BASE
from trustrag.modules.reranker.bge_reranker import BgeRerankerConfig
from trustrag.modules.retrieval.dense_retriever import DenseRetrieverConfig

# 修改成自己的配置!!!
app_config = ApplicationConfig()
Expand Down
16 changes: 8 additions & 8 deletions examples/rag/xunfei_rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
import pandas as pd
from tqdm import tqdm

from gomate.modules.document.chunk import TextChunker
from gomate.modules.document.txt_parser import TextParser
from gomate.modules.document.utils import PROJECT_BASE
from gomate.modules.generator.llm import GLM4Chat
from gomate.modules.reranker.bge_reranker import BgeRerankerConfig, BgeReranker
from gomate.modules.retrieval.bm25s_retriever import BM25RetrieverConfig
from gomate.modules.retrieval.dense_retriever import DenseRetrieverConfig
from gomate.modules.retrieval.hybrid_retriever import HybridRetriever, HybridRetrieverConfig
from trustrag.modules.document.chunk import TextChunker
from trustrag.modules.document.txt_parser import TextParser
from trustrag.modules.document.utils import PROJECT_BASE
from trustrag.modules.generator.llm import GLM4Chat
from trustrag.modules.reranker.bge_reranker import BgeRerankerConfig, BgeReranker
from trustrag.modules.retrieval.bm25s_retriever import BM25RetrieverConfig
from trustrag.modules.retrieval.dense_retriever import DenseRetrieverConfig
from trustrag.modules.retrieval.hybrid_retriever import HybridRetriever, HybridRetrieverConfig


def generate_chunks():
Expand Down
12 changes: 6 additions & 6 deletions examples/rag/xunfei_rag_bm25.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
import pandas as pd
from tqdm import tqdm

from gomate.modules.document.chunk import TextChunker
from gomate.modules.document.txt_parser import TextParser
from gomate.modules.document.utils import PROJECT_BASE
from gomate.modules.generator.llm import GLM4Chat
from gomate.modules.reranker.bge_reranker import BgeRerankerConfig, BgeReranker
from gomate.modules.retrieval.bm25s_retriever import BM25RetrieverConfig, BM25Retriever
from trustrag.modules.document.chunk import TextChunker
from trustrag.modules.document.txt_parser import TextParser
from trustrag.modules.document.utils import PROJECT_BASE
from trustrag.modules.generator.llm import GLM4Chat
from trustrag.modules.reranker.bge_reranker import BgeRerankerConfig, BgeReranker
from trustrag.modules.retrieval.bm25s_retriever import BM25RetrieverConfig, BM25Retriever


def generate_chunks():
Expand Down
10 changes: 5 additions & 5 deletions examples/rag/xunfei_rag_dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
import pandas as pd
from tqdm import tqdm

from gomate.modules.document.chunk import TextChunker
from gomate.modules.document.txt_parser import TextParser
from gomate.modules.document.utils import PROJECT_BASE
from gomate.modules.generator.llm import QwenChat
from gomate.modules.retrieval.dense_retriever import DenseRetrieverConfig,DenseRetriever
from trustrag.modules.document.chunk import TextChunker
from trustrag.modules.document.txt_parser import TextParser
from trustrag.modules.document.utils import PROJECT_BASE
from trustrag.modules.generator.llm import QwenChat
from trustrag.modules.retrieval.dense_retriever import DenseRetrieverConfig,DenseRetriever


def generate_chunks():
Expand Down
Loading

0 comments on commit 1334c44

Please sign in to comment.