-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #249 from assafelovic/feature/context_compressor
Feature/context compressor
- Loading branch information
Showing
12 changed files
with
116 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from .compression import ContextCompressor | ||
from .retriever import SearchAPIRetriever | ||
|
||
__all__ = ['ContextCompressor', 'SearchAPIRetriever'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
from .retriever import SearchAPIRetriever | ||
from langchain.retrievers import ( | ||
ContextualCompressionRetriever, | ||
) | ||
from langchain.retrievers.document_compressors import ( | ||
DocumentCompressorPipeline, | ||
EmbeddingsFilter, | ||
) | ||
from langchain.text_splitter import RecursiveCharacterTextSplitter | ||
|
||
|
||
class ContextCompressor: | ||
def __init__(self, documents, embeddings, max_results=5, **kwargs): | ||
self.max_results = max_results | ||
self.documents = documents | ||
self.kwargs = kwargs | ||
self.embeddings = embeddings | ||
|
||
def _get_contextual_retriever(self): | ||
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | ||
relevance_filter = EmbeddingsFilter(embeddings=self.embeddings, similarity_threshold=0.78) | ||
pipeline_compressor = DocumentCompressorPipeline( | ||
transformers=[splitter, relevance_filter] | ||
) | ||
base_retriever = SearchAPIRetriever( | ||
pages=self.documents | ||
) | ||
contextual_retriever = ContextualCompressionRetriever( | ||
base_compressor=pipeline_compressor, base_retriever=base_retriever | ||
) | ||
return contextual_retriever | ||
|
||
def _pretty_print_docs(self, docs, top_n): | ||
return f"\n".join(f"Source: {d.metadata.get('source')}\n" | ||
f"Title: {d.metadata.get('title')}\n" | ||
f"Content: {d.page_content}\n" | ||
for i, d in enumerate(docs) if i < top_n) | ||
|
||
def get_context(self, query, max_results=5): | ||
compressed_docs = self._get_contextual_retriever() | ||
relevant_docs = compressed_docs.get_relevant_documents(query) | ||
return self._pretty_print_docs(relevant_docs, max_results) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
import os | ||
from enum import Enum | ||
from typing import Any, Dict, List, Optional | ||
|
||
from langchain.callbacks.manager import CallbackManagerForRetrieverRun | ||
from langchain.schema import Document | ||
from langchain.schema.retriever import BaseRetriever | ||
|
||
|
||
class SearchAPIRetriever(BaseRetriever): | ||
"""Search API retriever.""" | ||
pages: List[Dict] = [] | ||
|
||
def _get_relevant_documents( | ||
self, query: str, *, run_manager: CallbackManagerForRetrieverRun | ||
) -> List[Document]: | ||
|
||
docs = [ | ||
Document( | ||
page_content=page.get("raw_content", ""), | ||
metadata={ | ||
"title": page.get("title", ""), | ||
"source": page.get("url", ""), | ||
}, | ||
) | ||
for page in self.pages | ||
] | ||
|
||
return docs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .embeddings import Memory |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
from langchain.vectorstores import FAISS | ||
from langchain.embeddings import OpenAIEmbeddings | ||
|
||
|
||
class Memory: | ||
def __init__(self, **kwargs): | ||
self._embeddings = OpenAIEmbeddings() | ||
|
||
def get_embeddings(self): | ||
return self._embeddings | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters