| |
| |
| from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, Settings |
| |
| import logging |
| import sys |
| import gradio as gr |
| import os |
|
|
| from pinecone import Pinecone, ServerlessSpec |
| from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext , Settings |
| from llama_index.vector_stores.pinecone import PineconeVectorStore |
| from llama_index.readers.file import PDFReader |
| from llama_index.llms.openai import OpenAI |
| from llama_index.embeddings.openai import OpenAIEmbedding |
| |
| logging.basicConfig(stream=sys.stdout, level=logging.INFO) |
|
|
|
|
| Settings.llm = OpenAI(model="gpt-4o-mini", temperature=0.2) |
| Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002") |
| Settings.chunk_size = 600 |
| Settings.chunk_overlap = 200 |
|
|
| |
| system_prompt = ''' |
| You are AYesha, the Decoding Data Science (DDS) Enterprise HR Chatbot. Answer questions exclusively using the attached DDS HR Handbook. Base all responses on the most up-to-date information available in the handbook. Only respond to queries directly related to DDS HR policies as outlined in the handbook. |
| |
| - If a question pertains to topics outside DDS HR policies, respond politely, clarifying that you are a human resources bot and only answer DDS HR questions. |
| - For questions you cannot answer (e.g., requests for old policies, salary details, or confidential information), politely decline and direct the user to email connect@decodingdatascience.com. |
| - Never answer questions about anything outside of your scope. |
| - Persist in following these constraints for any follow-up questions. |
| - Before answering, carefully check that the information and query are within the allowed scope. Follow chain-of-thought reasoning: |
| 1. First, reason step-by-step whether the question is covered in the current handbook and is within HR. |
| 2. Only after confirming, produce a final answer. |
| |
| Format answers as concise, professional responses. Do not wrap answers in code blocks or any special formatting. |
| |
| Output requirements: |
| - For allowed HR questions, answer concisely based only on the latest DDS HR handbook information. |
| - For forbidden topics, output: “I’m sorry, I can only answer questions about the latest DDS HR policies. For confidential or other queries, please email connect@decodingdatascience.com.” |
| |
| |
| **Example 1** |
| User: What is the leave encashment policy at DDS? |
| Reasoning: This is an HR policy question found in the latest handbook. |
| Final Answer: [Provide answer summarized from the latest handbook’s section on leave encashment] |
| |
| **Example 2** |
| User: Can you tell me the salary range for Data Scientists? |
| Reasoning: Salary details are confidential and not shared by this bot. |
| Final Answer: I’m sorry, I can only answer questions about the latest DDS HR policies. For confidential or other queries, please email connect@decodingdatascience.com. |
| |
| **Example 3** |
| User: Can you explain what DDS does as a company overall? |
| Reasoning: This is not an HR question, so it cannot be answered. |
| Final Answer: I’m sorry, I only answer DDS HR policy questions as outlined in the handbook. |
| |
| (Real-world examples should be longer and use precise wording from the handbook where appropriate.) |
| |
| **Important instructions:** |
| - Only answer questions directly supported by the latest DDS HR handbook. |
| - Decline politely and redirect to the provided email address for any questions outside scope or for confidential information. |
| - Always reason before concluding. Only present the answer after checking scope and source. |
| |
| Remember: As AYesha, the DDS HR Enterprise Chatbot, you must never provide information outside authorized HR handbook content and always respond respectfully according to these constraints. |
| |
| ''' |
|
|
|
|
| |
|
|
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") |
| PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") |
|
|
|
|
| |
| pc = Pinecone(api_key=PINECONE_API_KEY) |
| index_name = "quickstart" |
| dimension = 1536 |
|
|
| |
| existing_indexes = [idx["name"] for idx in pc.list_indexes()] |
|
|
| if index_name in existing_indexes: |
| pc.delete_index(index_name) |
|
|
| |
| pc.create_index( |
| name=index_name, |
| dimension=dimension, |
| metric="euclidean", |
| spec=ServerlessSpec(cloud="aws", region="us-east-1"), |
| ) |
|
|
| pinecone_index = pc.Index(index_name) |
|
|
| |
| documents = SimpleDirectoryReader( |
| input_dir="data", |
| required_exts=[".pdf"], |
| file_extractor={".pdf": PDFReader()} |
| ).load_data() |
|
|
| if not documents: |
| raise ValueError("No PDF documents were loaded from the 'data' folder.") |
|
|
| |
| vector_store = PineconeVectorStore(pinecone_index=pinecone_index) |
| storage_context = StorageContext.from_defaults(vector_store=vector_store) |
|
|
| index = VectorStoreIndex.from_documents( |
| documents, |
| storage_context=storage_context |
| ) |
|
|
| |
| query_engine = index.as_query_engine(system_prompt=system_prompt) |
|
|
| |
| def query_doc(prompt): |
| try: |
| response = query_engine.query(prompt) |
| return str(response) |
| except Exception as e: |
| return f"Error: {str(e)}" |
|
|
| gr.Interface( |
| fn=query_doc, |
| inputs=gr.Textbox(label="Ask a question about the document"), |
| outputs=gr.Textbox(label="Answer"), |
| title="DDS Enterprise Chatbot", |
| description="Ask questions related to HR for latest Information." |
| ).launch(share=True) |