# If needed in Colab, install first: # !pip install -U gradio pinecone llama-index llama-index-vector-stores-pinecone llama-index-readers-file pypdf from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, Settings # --- Imports --- import logging import sys import gradio as gr import os from pinecone import Pinecone, ServerlessSpec from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, Settings from llama_index.vector_stores.pinecone import PineconeVectorStore from llama_index.readers.file import PDFReader from llama_index.llms.openai import OpenAI from llama_index.embeddings.openai import OpenAIEmbedding # --- Logging --- logging.basicConfig(stream=sys.stdout, level=logging.INFO) Settings.llm = OpenAI(model="gpt-4o-mini", temperature=0.2) Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002") Settings.chunk_size = 600 Settings.chunk_overlap = 200 # Define a system prompt system_prompt = ''' You are AYesha, the Decoding Data Science (DDS) Enterprise HR Chatbot. Answer questions exclusively using the attached DDS HR Handbook. Base all responses on the most up-to-date information available in the handbook. Only respond to queries directly related to DDS HR policies as outlined in the handbook. - If a question pertains to topics outside DDS HR policies, respond politely, clarifying that you are a human resources bot and only answer DDS HR questions. - For questions you cannot answer (e.g., requests for old policies, salary details, or confidential information), politely decline and direct the user to email connect@decodingdatascience.com. - Never answer questions about anything outside of your scope. - Persist in following these constraints for any follow-up questions. - Before answering, carefully check that the information and query are within the allowed scope. Follow chain-of-thought reasoning: 1. First, reason step-by-step whether the question is covered in the current handbook and is within HR. 2. Only after confirming, produce a final answer. Format answers as concise, professional responses. Do not wrap answers in code blocks or any special formatting. Output requirements: - For allowed HR questions, answer concisely based only on the latest DDS HR handbook information. - For forbidden topics, output: “I’m sorry, I can only answer questions about the latest DDS HR policies. For confidential or other queries, please email connect@decodingdatascience.com.” **Example 1** User: What is the leave encashment policy at DDS? Reasoning: This is an HR policy question found in the latest handbook. Final Answer: [Provide answer summarized from the latest handbook’s section on leave encashment] **Example 2** User: Can you tell me the salary range for Data Scientists? Reasoning: Salary details are confidential and not shared by this bot. Final Answer: I’m sorry, I can only answer questions about the latest DDS HR policies. For confidential or other queries, please email connect@decodingdatascience.com. **Example 3** User: Can you explain what DDS does as a company overall? Reasoning: This is not an HR question, so it cannot be answered. Final Answer: I’m sorry, I only answer DDS HR policy questions as outlined in the handbook. (Real-world examples should be longer and use precise wording from the handbook where appropriate.) **Important instructions:** - Only answer questions directly supported by the latest DDS HR handbook. - Decline politely and redirect to the provided email address for any questions outside scope or for confidential information. - Always reason before concluding. Only present the answer after checking scope and source. Remember: As AYesha, the DDS HR Enterprise Chatbot, you must never provide information outside authorized HR handbook content and always respond respectfully according to these constraints. ''' # --- Load API Key from Hugging face environment --- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") # --- Initialize Pinecone --- pc = Pinecone(api_key=PINECONE_API_KEY) index_name = "quickstart" dimension = 1536 # --- Delete index if it already exists (optional) --- existing_indexes = [idx["name"] for idx in pc.list_indexes()] if index_name in existing_indexes: pc.delete_index(index_name) # --- Create Pinecone index --- pc.create_index( name=index_name, dimension=dimension, metric="euclidean", spec=ServerlessSpec(cloud="aws", region="us-east-1"), ) pinecone_index = pc.Index(index_name) # --- Load PDF documents from folder --- documents = SimpleDirectoryReader( input_dir="data", required_exts=[".pdf"], file_extractor={".pdf": PDFReader()} ).load_data() if not documents: raise ValueError("No PDF documents were loaded from the 'data' folder.") # --- Create Vector Index --- vector_store = PineconeVectorStore(pinecone_index=pinecone_index) storage_context = StorageContext.from_defaults(vector_store=vector_store) index = VectorStoreIndex.from_documents( documents, storage_context=storage_context ) # --- Query Engine --- query_engine = index.as_query_engine(system_prompt=system_prompt) # --- Gradio App --- def query_doc(prompt): try: response = query_engine.query(prompt) return str(response) except Exception as e: return f"Error: {str(e)}" # ------------------------------------------------------------------- # Professional Gradio UI # Only the Gradio interface is updated below. # No RAG logic, LLM, embeddings, Pinecone, PDF loading, or prompt rules changed. # ------------------------------------------------------------------- CUSTOM_CSS = """ .gradio-container { max-width: 1180px !important; margin: 0 auto !important; } .dds-hero { border: 1px solid var(--border-color-primary); background: var(--block-background-fill); border-radius: 22px; padding: 28px; margin-bottom: 18px; } .dds-title { font-size: 2rem; font-weight: 750; letter-spacing: -0.02em; margin-bottom: 8px; } .dds-subtitle { font-size: 1rem; color: var(--body-text-color-subdued); max-width: 880px; line-height: 1.6; } .dds-badges { display: flex; flex-wrap: wrap; gap: 8px; margin-top: 18px; } .dds-badge { border: 1px solid var(--border-color-primary); background: var(--background-fill-secondary); color: var(--body-text-color); border-radius: 999px; padding: 7px 12px; font-size: 0.86rem; } .dds-card { border: 1px solid var(--border-color-primary); background: var(--block-background-fill); border-radius: 18px; padding: 18px; margin-bottom: 12px; } .dds-muted { color: var(--body-text-color-subdued); font-size: 0.92rem; line-height: 1.55; } .dds-small-heading { font-size: 1rem; font-weight: 700; margin-bottom: 8px; } .dds-footer { text-align: center; color: var(--body-text-color-subdued); font-size: 0.86rem; margin-top: 16px; } textarea { border-radius: 14px !important; } button { border-radius: 12px !important; } """ example_questions = [ "What is the leave policy at DDS?", "How can I apply for annual leave?", "What should I do if I have an HR-related concern?", "Can you explain the employee code of conduct?", "What is the process for reporting a workplace issue?" ] def respond(message, history): """ UI wrapper only. Calls the existing query_doc() function without changing backend logic. """ if history is None: history = [] message = (message or "").strip() if not message: return history, "" answer = query_doc(message) history = history + [ {"role": "user", "content": message}, {"role": "assistant", "content": answer} ] return history, "" theme = gr.themes.Default( primary_hue="slate", secondary_hue="gray", neutral_hue="gray", spacing_size="md", radius_size="lg", text_size="md" ) with gr.Blocks( theme=theme, css=CUSTOM_CSS, title="DDS HR Enterprise Chatbot", fill_width=True ) as demo: gr.HTML("""