Spaces:
Sleeping
Sleeping
| import os | |
| import time | |
| import logging | |
| import sys | |
| import gradio as gr | |
| from pinecone import Pinecone, ServerlessSpec | |
| from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, Settings | |
| from llama_index.vector_stores.pinecone import PineconeVectorStore | |
| from llama_index.readers.file import PDFReader | |
| from llama_index.llms.openai import OpenAI | |
| from llama_index.embeddings.openai import OpenAIEmbedding | |
| # ----------------------------- | |
| # Logging | |
| # ----------------------------- | |
| logging.basicConfig(stream=sys.stdout, level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # ----------------------------- | |
| # Environment Variables | |
| # Add these in Hugging Face Spaces Secrets | |
| # ----------------------------- | |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
| PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") | |
| PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME", "dds-hr-chatbot") | |
| PINECONE_CLOUD = os.getenv("PINECONE_CLOUD", "aws") | |
| PINECONE_REGION = os.getenv("PINECONE_REGION", "us-east-1") | |
| REINDEX_ON_STARTUP = os.getenv("REINDEX_ON_STARTUP", "false").lower() == "true" | |
| DATA_DIR = "data" | |
| if not OPENAI_API_KEY: | |
| raise ValueError("OPENAI_API_KEY is missing. Please add it in Hugging Face Spaces secrets.") | |
| if not PINECONE_API_KEY: | |
| raise ValueError("PINECONE_API_KEY is missing. Please add it in Hugging Face Spaces secrets.") | |
| # ----------------------------- | |
| # LlamaIndex Settings | |
| # ----------------------------- | |
| Settings.llm = OpenAI( | |
| model="gpt-4o-mini", | |
| temperature=0.2, | |
| api_key=OPENAI_API_KEY | |
| ) | |
| Settings.embed_model = OpenAIEmbedding( | |
| model="text-embedding-ada-002", | |
| api_key=OPENAI_API_KEY | |
| ) | |
| Settings.chunk_size = 600 | |
| Settings.chunk_overlap = 200 | |
| # ----------------------------- | |
| # System Prompt | |
| # ----------------------------- | |
| system_prompt = """ | |
| You are Ayesha, the Decoding Data Science (DDS) Enterprise HR Chatbot. | |
| Your role is to answer questions using only the uploaded DDS HR Handbook. | |
| Core rules: | |
| - Answer only DDS HR policy questions that are supported by the handbook. | |
| - Do not answer questions outside HR policy scope. | |
| - Do not answer confidential questions, salary questions, legal questions, or old-policy questions. | |
| - If the answer is not available in the handbook, politely say that the information is not available and direct the user to connect@decodingdatascience.com. | |
| - Do not reveal internal reasoning. | |
| - Keep answers concise, professional, and helpful. | |
| - Never invent information. | |
| For forbidden, confidential, unsupported, or out-of-scope topics, respond with: | |
| “I’m sorry, I can only answer questions about the latest DDS HR policies. For confidential or other queries, please email connect@decodingdatascience.com.” | |
| Remember: You are Ayesha, the DDS Enterprise HR Chatbot. You must only answer from the authorized HR handbook content. | |
| """ | |
| # ----------------------------- | |
| # Pinecone Setup | |
| # ----------------------------- | |
| def get_existing_index_names(pc): | |
| """ | |
| Handles different Pinecone SDK return styles safely. | |
| """ | |
| try: | |
| return pc.list_indexes().names() | |
| except Exception: | |
| indexes = pc.list_indexes() | |
| names = [] | |
| for index_info in indexes: | |
| if isinstance(index_info, dict): | |
| names.append(index_info.get("name")) | |
| else: | |
| names.append(getattr(index_info, "name", None)) | |
| return [name for name in names if name] | |
| def setup_pinecone_index(): | |
| pc = Pinecone(api_key=PINECONE_API_KEY) | |
| existing_indexes = get_existing_index_names(pc) | |
| if PINECONE_INDEX_NAME not in existing_indexes: | |
| logger.info(f"Creating Pinecone index: {PINECONE_INDEX_NAME}") | |
| pc.create_index( | |
| name=PINECONE_INDEX_NAME, | |
| dimension=1536, | |
| metric="cosine", | |
| spec=ServerlessSpec( | |
| cloud=PINECONE_CLOUD, | |
| region=PINECONE_REGION | |
| ) | |
| ) | |
| while True: | |
| description = pc.describe_index(PINECONE_INDEX_NAME) | |
| try: | |
| is_ready = description.status["ready"] | |
| except Exception: | |
| is_ready = getattr(description.status, "ready", False) | |
| if is_ready: | |
| break | |
| logger.info("Waiting for Pinecone index to be ready...") | |
| time.sleep(2) | |
| else: | |
| logger.info(f"Using existing Pinecone index: {PINECONE_INDEX_NAME}") | |
| return pc.Index(PINECONE_INDEX_NAME) | |
| # ----------------------------- | |
| # Load or Create LlamaIndex Query Engine | |
| # ----------------------------- | |
| def build_query_engine(): | |
| pinecone_index = setup_pinecone_index() | |
| vector_store = PineconeVectorStore( | |
| pinecone_index=pinecone_index | |
| ) | |
| storage_context = StorageContext.from_defaults( | |
| vector_store=vector_store | |
| ) | |
| index_stats = pinecone_index.describe_index_stats() | |
| total_vectors = index_stats.get("total_vector_count", 0) | |
| if total_vectors == 0 or REINDEX_ON_STARTUP: | |
| logger.info("Loading documents and creating vector index...") | |
| if not os.path.exists(DATA_DIR): | |
| raise ValueError( | |
| "The 'data' folder is missing. Please create a data folder and upload your PDF file inside it." | |
| ) | |
| documents = SimpleDirectoryReader( | |
| input_dir=DATA_DIR, | |
| required_exts=[".pdf"], | |
| file_extractor={".pdf": PDFReader()} | |
| ).load_data() | |
| if not documents: | |
| raise ValueError("No PDF documents were loaded from the 'data' folder.") | |
| index = VectorStoreIndex.from_documents( | |
| documents, | |
| storage_context=storage_context | |
| ) | |
| logger.info("Documents indexed successfully.") | |
| else: | |
| logger.info("Existing Pinecone vectors found. Loading index from vector store.") | |
| index = VectorStoreIndex.from_vector_store( | |
| vector_store=vector_store | |
| ) | |
| query_engine = index.as_query_engine( | |
| similarity_top_k=5, | |
| system_prompt=system_prompt | |
| ) | |
| return query_engine | |
| query_engine = build_query_engine() | |
| # ----------------------------- | |
| # Query Function | |
| # ----------------------------- | |
| def query_doc(prompt): | |
| try: | |
| response = query_engine.query(prompt) | |
| return str(response) | |
| except Exception as e: | |
| logger.error(f"Error while answering query: {e}") | |
| return "Sorry, something went wrong while processing your question. Please try again." | |
| # ----------------------------- | |
| # Example Questions | |
| # ----------------------------- | |
| example_questions = [ | |
| "What is the leave policy?", | |
| "What is the work from home policy?", | |
| "What is the probation policy?", | |
| "What are the employee code of conduct rules?", | |
| "Who should I contact for confidential HR questions?" | |
| ] | |
| # ----------------------------- | |
| # Chat Functions | |
| # ----------------------------- | |
| initial_chat = [ | |
| { | |
| "role": "assistant", | |
| "content": "Hello, I am Ayesha, the DDS Enterprise HR Chatbot. Ask me a question about DDS HR policies." | |
| } | |
| ] | |
| def respond(message, chat_history): | |
| if chat_history is None: | |
| chat_history = initial_chat.copy() | |
| if not message or not message.strip(): | |
| chat_history.append( | |
| { | |
| "role": "assistant", | |
| "content": "Please enter a question about the DDS HR handbook." | |
| } | |
| ) | |
| return "", chat_history | |
| answer = query_doc(message) | |
| chat_history.append( | |
| { | |
| "role": "user", | |
| "content": message | |
| } | |
| ) | |
| chat_history.append( | |
| { | |
| "role": "assistant", | |
| "content": answer | |
| } | |
| ) | |
| return "", chat_history | |
| def clear_chat(): | |
| return initial_chat.copy() | |
| def set_example_question(question): | |
| return question | |
| # ----------------------------- | |
| # Professional Gradio UI | |
| # ----------------------------- | |
| DDS_LOGO_URL = "https://raw.githubusercontent.com/Decoding-Data-Science/airesidency/main/dds-logo-removebg-preview.png" | |
| custom_css = """ | |
| body { | |
| background: linear-gradient(135deg, #f8fafc 0%, #eef2ff 45%, #f8fafc 100%); | |
| } | |
| .gradio-container { | |
| font-family: Inter, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; | |
| } | |
| .main-container { | |
| max-width: 1250px; | |
| margin: auto; | |
| } | |
| .header-card { | |
| background: rgba(255, 255, 255, 0.95); | |
| border-radius: 24px; | |
| padding: 26px; | |
| box-shadow: 0 16px 40px rgba(15, 23, 42, 0.08); | |
| border: 1px solid #e5e7eb; | |
| margin-bottom: 20px; | |
| } | |
| .sidebar-card { | |
| background: rgba(255, 255, 255, 0.96); | |
| border-radius: 24px; | |
| padding: 24px; | |
| box-shadow: 0 16px 40px rgba(15, 23, 42, 0.08); | |
| border: 1px solid #e5e7eb; | |
| height: 100%; | |
| } | |
| .chat-card { | |
| background: rgba(255, 255, 255, 0.96); | |
| border-radius: 24px; | |
| padding: 22px; | |
| box-shadow: 0 16px 40px rgba(15, 23, 42, 0.08); | |
| border: 1px solid #e5e7eb; | |
| } | |
| .logo-img { | |
| max-width: 175px; | |
| margin-bottom: 8px; | |
| } | |
| .title-text { | |
| font-size: 32px; | |
| font-weight: 850; | |
| color: #111827; | |
| margin-bottom: 8px; | |
| letter-spacing: -0.03em; | |
| } | |
| .subtitle-text { | |
| font-size: 16px; | |
| color: #4b5563; | |
| line-height: 1.65; | |
| max-width: 850px; | |
| } | |
| .badge { | |
| display: inline-block; | |
| background: #eef2ff; | |
| color: #3730a3; | |
| padding: 7px 13px; | |
| border-radius: 999px; | |
| font-size: 13px; | |
| font-weight: 650; | |
| margin-right: 7px; | |
| margin-bottom: 8px; | |
| } | |
| .status-box { | |
| background: #f8fafc; | |
| border: 1px solid #e5e7eb; | |
| padding: 14px; | |
| border-radius: 16px; | |
| font-size: 14px; | |
| color: #374151; | |
| line-height: 1.6; | |
| } | |
| .small-note { | |
| font-size: 13px; | |
| color: #6b7280; | |
| line-height: 1.55; | |
| } | |
| .footer-note { | |
| font-size: 13px; | |
| color: #6b7280; | |
| text-align: center; | |
| margin-top: 18px; | |
| } | |
| #chatbot { | |
| min-height: 540px; | |
| border-radius: 18px; | |
| border: 1px solid #e5e7eb; | |
| } | |
| #question_box textarea { | |
| border-radius: 16px !important; | |
| } | |
| .example-button { | |
| margin-bottom: 8px !important; | |
| border-radius: 14px !important; | |
| white-space: normal !important; | |
| text-align: left !important; | |
| } | |
| .primary-action { | |
| border-radius: 14px !important; | |
| } | |
| .clear-action { | |
| border-radius: 14px !important; | |
| } | |
| """ | |
| with gr.Blocks(title="DDS Enterprise HR Chatbot") as demo: | |
| with gr.Column(elem_classes=["main-container"]): | |
| # ----------------------------- | |
| # Header | |
| # ----------------------------- | |
| with gr.Row(elem_classes=["header-card"]): | |
| with gr.Column(scale=1, min_width=190): | |
| gr.HTML( | |
| f""" | |
| <img src="{DDS_LOGO_URL}" class="logo-img" alt="DDS Logo"> | |
| """ | |
| ) | |
| with gr.Column(scale=5): | |
| gr.HTML( | |
| """ | |
| <div class="title-text">DDS Enterprise HR Chatbot</div> | |
| <div class="subtitle-text"> | |
| A professional HR policy assistant built for Decoding Data Science. | |
| Ask questions from the uploaded DDS HR Handbook and get clear, concise answers | |
| based on the available document content. | |
| </div> | |
| <br> | |
| <span class="badge">HR Handbook Q&A</span> | |
| <span class="badge">LlamaIndex</span> | |
| <span class="badge">Pinecone</span> | |
| <span class="badge">OpenAI</span> | |
| <span class="badge">Gradio</span> | |
| """ | |
| ) | |
| # ----------------------------- | |
| # Two Column Layout | |
| # ----------------------------- | |
| with gr.Row(): | |
| # Left Sidebar | |
| with gr.Column(scale=1, min_width=300, elem_classes=["sidebar-card"]): | |
| gr.Markdown( | |
| """ | |
| ### What this assistant can help with | |
| This chatbot answers questions only from the uploaded DDS HR Handbook. | |
| **You can ask about:** | |
| - Leave policies | |
| - Work from home rules | |
| - Probation guidelines | |
| - Code of conduct | |
| - Employee handbook policies | |
| - HR contact process | |
| """ | |
| ) | |
| gr.HTML( | |
| """ | |
| <div class="status-box"> | |
| <strong>Scope:</strong> DDS HR policies only<br> | |
| <strong>Data source:</strong> Uploaded HR handbook<br> | |
| <strong>Confidential questions:</strong> Redirected to HR email | |
| </div> | |
| """ | |
| ) | |
| gr.Markdown("### Quick questions") | |
| example_buttons = [] | |
| for question in example_questions: | |
| btn = gr.Button( | |
| question, | |
| variant="secondary", | |
| size="sm", | |
| elem_classes=["example-button"] | |
| ) | |
| example_buttons.append(btn) | |
| gr.HTML( | |
| """ | |
| <hr> | |
| <div class="small-note"> | |
| <strong>Important:</strong><br> | |
| This chatbot does not answer salary, confidential, legal, or non-HR questions. | |
| For confidential queries, contact | |
| <strong>connect@decodingdatascience.com</strong>. | |
| </div> | |
| """ | |
| ) | |
| # Right Chat Area | |
| with gr.Column(scale=3, elem_classes=["chat-card"]): | |
| chatbot = gr.Chatbot( | |
| label="DDS HR Assistant", | |
| elem_id="chatbot", | |
| value=initial_chat.copy(), | |
| height=540 | |
| ) | |
| user_input = gr.Textbox( | |
| label="Ask your HR policy question", | |
| placeholder="Example: What is the leave policy?", | |
| lines=2, | |
| elem_id="question_box" | |
| ) | |
| with gr.Row(): | |
| submit_btn = gr.Button( | |
| "Ask Question", | |
| variant="primary", | |
| elem_classes=["primary-action"] | |
| ) | |
| clear_btn = gr.Button( | |
| "Clear Chat", | |
| variant="secondary", | |
| elem_classes=["clear-action"] | |
| ) | |
| gr.Markdown( | |
| """ | |
| **Tip:** Ask specific questions for better answers. | |
| Example: “What does the handbook say about probation?” instead of “Tell me everything.” | |
| """ | |
| ) | |
| # ----------------------------- | |
| # Button Actions | |
| # ----------------------------- | |
| submit_btn.click( | |
| fn=respond, | |
| inputs=[user_input, chatbot], | |
| outputs=[user_input, chatbot] | |
| ) | |
| user_input.submit( | |
| fn=respond, | |
| inputs=[user_input, chatbot], | |
| outputs=[user_input, chatbot] | |
| ) | |
| clear_btn.click( | |
| fn=clear_chat, | |
| inputs=None, | |
| outputs=chatbot | |
| ) | |
| for btn, question in zip(example_buttons, example_questions): | |
| btn.click( | |
| fn=set_example_question, | |
| inputs=gr.State(question), | |
| outputs=user_input | |
| ) | |
| # ----------------------------- | |
| # Footer | |
| # ----------------------------- | |
| gr.HTML( | |
| """ | |
| <div class="footer-note"> | |
| Built by Decoding Data Science | Enterprise HR Chatbot Demo | |
| </div> | |
| """ | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch( | |
| theme=gr.themes.Soft( | |
| primary_hue="indigo", | |
| neutral_hue="slate" | |
| ), | |
| css=custom_css | |
| ) |