# This text splitter is used to create the child documents child_splitter = RecursiveCharacterTextSplitter(chunk_size=400) # The vectorstore to use to index the child chunks vectorstore = Chroma( collection_name="full_documents", embedding_function=OpenAIEmbeddings() ) # The storage layer for the parent documents store = InMemoryStore() retriever = ParentDocumentRetriever( vectorstore=vectorstore, docstore=store, child_splitter=child_splitter, )
# This text splitter is used to create the parent documents parent_splitter = RecursiveCharacterTextSplitter(chunk_size=2000) # This text splitter is used to create the child documents # It should create documents smaller than the parent child_splitter = RecursiveCharacterTextSplitter(chunk_size=400) # The vectorstore to use to index the child chunks vectorstore = Chroma( collection_name="split_parents", embedding_function=OpenAIEmbeddings() ) # The storage layer for the parent documents store = InMemoryStore()
from langchain.retrievers import EnsembleRetriever from langchain_community.retrievers import BM25Retriever from langchain_community.vectorstores import FAISS from langchain_openai import OpenAIEmbeddings
doc_list_1 = [ "I like apples", "I like oranges", "Apples and oranges are fruits", ]
# initialize the bm25 retriever and faiss retriever bm25_retriever = BM25Retriever.from_texts( doc_list_1, metadatas=[{"source": 1}] * len(doc_list_1) ) bm25_retriever.k = 2
doc_list_2 = [ "You like apples", "You like oranges", ]
compressed_docs = compression_retriever.get_relevant_documents( "What did the president say about Ketanji Jackson Brown" ) pretty_print_docs(compressed_docs)
EmbeddingsFilter的用法:
from langchain.retrievers.document_compressors import EmbeddingsFilter from langchain_openai import OpenAIEmbeddings