Skip to content

Commit 81e0dcf

Browse files
pavanjavapavanmantha
andauthored
-incorporated self correcting rag, -all code will defaultly support pdf and txt extentions, -other improvements in docs (#17)
Co-authored-by: pavanmantha <pavan.mantha@thevaslabs.io>
1 parent 025b62e commit 81e0dcf

24 files changed

Lines changed: 520 additions & 14 deletions

File tree

bootstraprag/cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ def create(project_name, framework, template, observability):
3535
'rag-with-react',
3636
'rag-with-hyde',
3737
'rag-with-flare',
38+
'rag-with-self-correction',
3839
'llama-agents-with-simpleq'
3940
]
4041
elif framework == 'None':

bootstraprag/templates/llamaindex/rag_with_flare/base_rag.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class BaseRAG:
3333
]
3434

3535
def __init__(self, data_path: str, chunk_size: int = 512, chunk_overlap: int = 200,
36-
required_exts: list[str] = ['.pdf'],
36+
required_exts: list[str] = ['.pdf', '.txt'],
3737
show_progress: bool = False, similarity_top_k: int = 3, max_iterations: int = 5):
3838
# load the local data directory and chunk the data for further processing
3939
self.docs = SimpleDirectoryReader(input_dir=data_path, required_exts=required_exts).load_data(

bootstraprag/templates/llamaindex/rag_with_hyde/base_rag.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class BaseRAG:
3737
Response, StreamingResponse, AsyncStreamingResponse, PydanticResponse
3838
]
3939

40-
def __init__(self, data_path: str, chunk_size: int = 512, chunk_overlap: int = 200, required_exts: list[str] = ['.pdf'],
40+
def __init__(self, data_path: str, chunk_size: int = 512, chunk_overlap: int = 200, required_exts: list[str] = ['.pdf', '.txt'],
4141
show_progress: bool = False, similarity_top_k: int = 3):
4242
# load the local data directory and chunk the data for further processing
4343
self.docs = SimpleDirectoryReader(input_dir=data_path, required_exts=required_exts).load_data(

bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/base_rag.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class BaseRAG:
4242
]
4343

4444
def __init__(self, data_path: str, chunk_size: int = 512, chunk_overlap: int = 200,
45-
required_exts: list[str] = ['.pdf'],
45+
required_exts: list[str] = ['.pdf', '.txt'],
4646
show_progress: bool = False, similarity_top_k: int = 3):
4747
# load the local data directory and chunk the data for further processing
4848
self.docs = SimpleDirectoryReader(input_dir=data_path, required_exts=required_exts).load_data(

bootstraprag/templates/llamaindex/rag_with_react/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,4 @@
1414
break
1515

1616
response = react_with_engine.query(user_query=user_query)
17-
print(response)
17+
print(response)

bootstraprag/templates/llamaindex/rag_with_react/react_agent_with_query_engine.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,12 @@
2323

2424

2525
class ReActWithQueryEngine:
26-
2726
RESPONSE_TYPE = Union[
2827
Response, StreamingResponse, AsyncStreamingResponse, PydanticResponse
2928
]
3029

31-
def __init__(self, input_dir: str, similarity_top_k: int = 3, chunk_size: int = 128, chunk_overlap: int = 100, show_progress: bool = False, no_of_iterations: int = 5):
30+
def __init__(self, input_dir: str, similarity_top_k: int = 3, chunk_size: int = 128, chunk_overlap: int = 100,
31+
show_progress: bool = False, no_of_iterations: int = 5, required_exts: list[str] = ['.pdf', '.txt']):
3232
self.index_loaded = False
3333
self.similarity_top_k = similarity_top_k
3434
self.input_dir = input_dir
@@ -38,6 +38,7 @@ def __init__(self, input_dir: str, similarity_top_k: int = 3, chunk_size: int =
3838
self.query_engine_tools = []
3939
self.show_progress = show_progress
4040
self.no_of_iterations = no_of_iterations
41+
self.required_exts = required_exts
4142

4243
# use your prefered vector embeddings model
4344
logger.info("initializing the OllamaEmbedding")
@@ -58,7 +59,8 @@ def __init__(self, input_dir: str, similarity_top_k: int = 3, chunk_size: int =
5859

5960
# Create a local Qdrant vector store
6061
logger.info("initializing the vector store related objects")
61-
self.client: qdrant_client.QdrantClient = qdrant_client.QdrantClient(url=os.environ['DB_URL'], api_key=os.environ['DB_API_KEY'])
62+
self.client: qdrant_client.QdrantClient = qdrant_client.QdrantClient(url=os.environ['DB_URL'],
63+
api_key=os.environ['DB_API_KEY'])
6264
self.vector_store = QdrantVectorStore(client=self.client, collection_name=os.environ['COLLECTION_NAME'])
6365
self._load_data_and_create_engine()
6466

@@ -72,13 +74,15 @@ def _load_data_and_create_engine(self):
7274

7375
if not self.index_loaded:
7476
# load data
75-
_docs = SimpleDirectoryReader(input_dir=self.input_dir).load_data(show_progress=self.show_progress)
77+
_docs = (SimpleDirectoryReader(input_dir=self.input_dir, required_exts=self.required_exts)
78+
.load_data(show_progress=self.show_progress))
7679

7780
# build and persist index
7881
storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
7982
logger.info("indexing the docs in VectorStoreIndex")
80-
self._index = VectorStoreIndex.from_documents(documents=_docs, storage_context=storage_context, show_progress=self.show_progress)
81-
83+
self._index = VectorStoreIndex.from_documents(documents=_docs, storage_context=storage_context,
84+
show_progress=self.show_progress)
85+
8286
self._engine = self._index.as_query_engine(similarity_top_k=self.similarity_top_k)
8387
self._create_query_engine_tools()
8488

bootstraprag/templates/llamaindex/rag_with_react_with_observability/react_agent_with_query_engine.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class ReActWithQueryEngine:
3434
]
3535

3636
def __init__(self, input_dir: str, similarity_top_k: int = 3, chunk_size: int = 128, chunk_overlap: int = 100,
37-
show_progress: bool = False, no_of_iterations: int = 5):
37+
show_progress: bool = False, no_of_iterations: int = 5, required_exts: list[str] = ['.pdf', '.txt']):
3838
self.index_loaded = False
3939
self.similarity_top_k = similarity_top_k
4040
self.input_dir = input_dir
@@ -44,6 +44,7 @@ def __init__(self, input_dir: str, similarity_top_k: int = 3, chunk_size: int =
4444
self.query_engine_tools = []
4545
self.show_progress = show_progress
4646
self.no_of_iterations = no_of_iterations
47+
self.required_exts = required_exts
4748

4849
# use your prefered vector embeddings model
4950
logger.info("initializing the OllamaEmbedding")
@@ -79,7 +80,8 @@ def _load_data_and_create_engine(self):
7980

8081
if not self.index_loaded:
8182
# load data
82-
_docs = SimpleDirectoryReader(input_dir=self.input_dir).load_data(show_progress=self.show_progress)
83+
_docs = (SimpleDirectoryReader(input_dir=self.input_dir, required_exts=self.required_exts)
84+
.load_data(show_progress=self.show_progress))
8385

8486
# build and persist index
8587
storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
DB_URL='http://localhost:6333'
2+
DB_API_KEY='th3s3cr3tk3y'
3+
COLLECTION_NAME='YOUR_COLLECTION'
4+
5+
OPENAI_API_KEY=''
6+
OPENAI_EMBED_MODEL=''
7+
8+
# use this incase you are prefering to experiment with local models.
9+
OLLAMA_BASE_URL='http://localhost:11434'
10+
OLLAMA_LLM_MODEL='llama3.1'
11+
OLLAMA_EMBED_MODEL='nomic-embed-text:latest'
12+
13+
# logger can be controlled usiing env
14+
CRITICAL = 50
15+
FATAL = 50
16+
ERROR = 40
17+
WARNING = 30
18+
WARN = 30
19+
INFO = 20
20+
DEBUG = 10
21+
NOTSET = 0
Binary file not shown.
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# driver code
2+
from self_correction_core import SelfCorrectingRAG
3+
4+
5+
self_correcting_rag = SelfCorrectingRAG(input_dir='data', show_progress=True, no_of_retries=3)
6+
7+
# Start a loop to continually get input from the user
8+
while True:
9+
# Get a query from the user
10+
user_query = input("Enter your query [type 'bye' to 'exit']: ")
11+
12+
# Check if the user wants to terminate the loop
13+
if user_query.lower() == "bye" or user_query.lower() == "exit":
14+
break
15+
16+
response1 = self_correcting_rag.query_with_retry_query_engine(query=user_query)
17+
print(response1)
18+
19+
response1 = self_correcting_rag.query_with_source_query_engine(query=user_query)
20+
print(response1)
21+
22+
response1 = self_correcting_rag.query_with_guideline_query_engine(query=user_query)
23+
print(response1)

0 commit comments

Comments
 (0)