diff --git a/.hintrc b/.hintrc new file mode 100644 index 0000000000..2fa1a2e82a --- /dev/null +++ b/.hintrc @@ -0,0 +1,13 @@ +{ + "extends": [ + "development" + ], + "hints": { + "axe/forms": [ + "default", + { + "select-name": "off" + } + ] + } +} \ No newline at end of file diff --git a/app.py b/app.py index f80bb53c3e..505c4cf3d6 100644 --- a/app.py +++ b/app.py @@ -197,19 +197,29 @@ def should_use_data(): return False + @app.route("/conversation", methods=["GET", "POST"]) def conversation(): message_uuid = str(uuid.uuid4()) - request_body = request.json - return conversation_internal(request_body, message_uuid) - -def conversation_internal(request_body, message_uuid): + request_body = None + file = request.files.get("file", None) + + request_body = {} + for key in request.form: + try: + request_body[key] = json.loads(request.form[key]) + except json.JSONDecodeError: + request_body[key] = request.form[key] + return conversation_internal(request_body, message_uuid, file) + +def conversation_internal(request_body, message_uuid, file=None): try: + print(request_body) use_data = should_use_data() if use_data: - return orchestrator.conversation_with_data(request_body, message_uuid) + return orchestrator.conversation_with_data(request_body, message_uuid, file) else: - return orchestrator.conversation_without_data(request_body, message_uuid) + return orchestrator.conversation_without_data(request_body, message_uuid, file) except Exception as e: logging.exception("Exception in /conversation") return jsonify({"error": str(e)}), 500 diff --git a/appsvc.yaml b/appsvc.yaml new file mode 100644 index 0000000000..5ff3642c13 --- /dev/null +++ b/appsvc.yaml @@ -0,0 +1,5 @@ +version: 1 + +pre-build: | + export POETRY_HTTP_BASIC_TNRDEV_USERNAME=agent + export POETRY_HTTP_BASIC_TNRDEV_PASSWORD=$AZURE_ARTIFACTS_PAT \ No newline at end of file diff --git a/backend/orchestrators/AdvancedOrchestrator.py b/backend/orchestrators/AdvancedOrchestrator.py index be3cfb64cb..55000b79e2 100644 --- a/backend/orchestrators/AdvancedOrchestrator.py +++ b/backend/orchestrators/AdvancedOrchestrator.py @@ -13,7 +13,7 @@ def __init__(self): self.load_balancer = LoadBalancer() # Post chat info if data configured - def conversation_with_data(self, request_body, message_uuid): + def conversation_with_data(self, request_body, message_uuid, file=None): logging.debug(f"ADV ORCH COVO W/ DATA") # Get a weighted random OpenAIContext object openai_context = self.load_balancer.get_openai_context() @@ -57,7 +57,7 @@ def conversation_with_data(self, request_body, message_uuid): return Response(super().stream_with_data(body, headers, endpoint, message_uuid, history_metadata), mimetype='text/event-stream') # Post chat info if data not configured - def conversation_without_data(self, request_body, message_uuid): + def conversation_without_data(self, request_body, message_uuid, file=None): # Get a weighted random OpenAIContext object openai_context = self.load_balancer.get_openai_context() diff --git a/backend/orchestrators/DefaultOrchestrator.py b/backend/orchestrators/DefaultOrchestrator.py index b71ec11ae3..04693dbe74 100644 --- a/backend/orchestrators/DefaultOrchestrator.py +++ b/backend/orchestrators/DefaultOrchestrator.py @@ -4,7 +4,7 @@ class DefaultOrchestrator(Orchestrator): # Post chat info if data configured - def conversation_with_data(self, request_body, message_uuid): + def conversation_with_data(self, request_body, message_uuid, file=None): # Set up request variables body, headers = super().prepare_body_headers_with_data(request) base_url = super().AZURE_OPENAI_ENDPOINT if super().AZURE_OPENAI_ENDPOINT else f"https://{super().AZURE_OPENAI_RESOURCE}.openai.azure.com/" @@ -32,7 +32,7 @@ def conversation_with_data(self, request_body, message_uuid): return Response(super().stream_with_data(body, headers, endpoint, message_uuid, history_metadata), mimetype='text/event-stream') # Post chat info if data not configured - def conversation_without_data(self, request_body, message_uuid): + def conversation_without_data(self, request_body, message_uuid, file=None): # Setup for direct query to OpenAI openai.api_type = "azure" openai.api_base = super().AZURE_OPENAI_ENDPOINT if super().AZURE_OPENAI_ENDPOINT else f"https://{super().AZURE_OPENAI_RESOURCE}.openai.azure.com/" @@ -50,11 +50,20 @@ def conversation_without_data(self, request_body, message_uuid): for message in request_messages: if message: + # if message starts with User Provided File: then skip + if message["content"].startswith("User Provided File: "): + continue messages.append({ "role": message["role"] , - "content": message["content"] + "content": f"{message['content']}" }) + if(file): + messages.append({ + "role": "user", + "content": f"User Provided File: {super().parse_file(file)}" + }) + history_metadata = request_body.get("history_metadata", {}) history_metadata = super().conversation_client.create_conversation_item( request_body, diff --git a/backend/orchestrators/DynamicFormOrchestrator.py b/backend/orchestrators/DynamicFormOrchestrator.py new file mode 100644 index 0000000000..e03bb9aceb --- /dev/null +++ b/backend/orchestrators/DynamicFormOrchestrator.py @@ -0,0 +1,486 @@ +""" +Updated the DefaultOrchestrator to use JSONChat / BasicChat from TNR AI Tools. +""" + +import logging +import pydantic +from azure.identity import DefaultAzureCredential, get_bearer_token_provider +import openai +import time +from types import SimpleNamespace +from pydantic import BaseModel, Field +from flask import Response, request, jsonify +import requests +from typing import Any, Dict, List, Tuple +from .Orchestrator import Orchestrator +import flask +from tnr_ai_tools.json_response_utils import ( + get_format_instructions, + parse_json_str_into_validated_dict, +) +from tnr_ai_tools.json_chat import JSONChat +from tnr_ai_tools.basic_chat import BasicChat +from semantic_kernel.contents.chat_history import ChatHistory +from werkzeug.datastructures.file_storage import FileStorage +from typing_extensions import Annotated, Doc +from dataclasses import dataclass + + +@dataclass +class ResponseDataclass: + message: Annotated[str, Doc("The assistant's message content.")] + dynamic_form_data: Annotated[Dict[str, Any], Doc("The updated dynamic form data.")] + + +class ResponsePydantic(BaseModel): + message: str = Field(description="The assistant's message content.") + dynamic_form_data: Any = Field(description="The updated dynamic form data.") + + +class DynamicFormOrchestrator(Orchestrator): + def __init__(self): + """ + Orchestrates conversation for dynamic forms (e.g. profile form data for matcmaker project) + By default, it uses the api key is provided in AZURE_OPENAI_KEY env variable. + Otherwise, it falls back to trying to grab a token credential from AD token provider, associated with your logged-in Azure account. + """ + api_endpoint = ( + super().env_params.AZURE_OPENAI_ENDPOINT + if super().env_params.AZURE_OPENAI_ENDPOINT + else f"https://{super().env_params.AZURE_OPENAI_RESOURCE}.openai.azure.com/" + ) + api_version = "2024-05-01-preview" + self.api_deployment = "gpt-4o" + + api_key = super().env_params.AZURE_OPENAI_KEY + if api_key in [None, ""]: + use_ad_token_provider = True + api_key = None + else: + use_ad_token_provider = False + + prompt_template = "{{$input_text}}" + self.json_chat = JSONChat( + prompt_template=prompt_template, + json_schema_class=ResponseDataclass, + api_endpoint=api_endpoint, + api_version=api_version, + api_deployment=self.api_deployment, + use_ad_token_provider=use_ad_token_provider, + api_key=api_key, + temperature=float(super().env_params.AZURE_OPENAI_TEMPERATURE), + max_tokens=int(super().env_params.AZURE_OPENAI_MAX_TOKENS), + top_p=float(super().env_params.AZURE_OPENAI_TOP_P), + ) + + # For AOAI On Your Data (OYD), i.e. LM inference with data source, we cannot use JSONChat because it abstracts away the AOAI call. Thus, we use the bare-bones AOAI chat completions client for this purpose. + # TODO allow option to pass in api key + azure_ad_token_provider = get_bearer_token_provider( + DefaultAzureCredential(), + "https://cognitiveservices.azure.com/.default", + ) + self.aoai_client = openai.AzureOpenAI( + azure_endpoint=api_endpoint, + api_version=api_version, + api_key=api_key, + azure_ad_token_provider=azure_ad_token_provider, + ) + + def conversation_without_data( + self, + request_body: Dict[str, Any], + message_uuid: str, + file: FileStorage | None = None, + ) -> Tuple[flask.Response, int]: + """ + Invokes LM call using the messages from the request_body, and returns a flask Response (with application/json mimetype) and a status code 200. + Optionally can take in a file that was attached to the message. + + Example of `request_body`: + {'messages': + [ + {'id': '44398a69-ca9a-ed82-948a-c1d3b7b39a0b', 'role': 'user', 'content': 'hi', 'date': '2024-08-20T19:32:35.357Z'}, + {}, + {'id': 'c196a220-54ed-4ec8-9163-999a67070407', 'role': 'assistant', 'content': 'Hello, how may I assist you?', 'date': '2024-08-20T19:32:35.397Z'}, + {'id': 'de388a30-0edd-493d-435b-8d3f3b597328', 'role': 'user', 'content': 'Help me summarize a document', 'date': '2024-08-20T19:32:42.037Z'}, + {}, + {'id': 'ebb1ddaf-dbd8-43d6-a4c4-5c2e5c4631f0', 'role': 'assistant', 'content': 'Sure, please attach a document.', 'date': '2024-08-20T19:32:42.087Z'} + ] + } + """ + # Extract text from file + file_name = None + file_content = None + if file: + file_name = file.filename + file_content = self.parse_file(file) + message_for_attachment = dict( + role="system", + content=f"{file_name}{file_content}", + ) + message_for_context = dict( + role="system", + content=f"The attachment `{file_name}` was attached here in the conversation.", + ) + + # Construct prompt using chat history + chat_history = ChatHistory() + chat_history.add_message( + { + "role": "system", + "content": super().env_params.AZURE_OPENAI_SYSTEM_MESSAGE, + } + ) + if file: + chat_history.add_message(message_for_attachment) + + request_messages = clean_up_messages(request_body["messages"]) + [chat_history.add_message(m) for m in request_messages] + + if file: + chat_history.add_message(message_for_context) + prompt = chat_history.to_prompt() + + # Create conversation item in client + history_metadata = request_body.get("history_metadata", {}) + history_metadata = super().conversation_client.create_conversation_item( + request_body, + super().env_params.AZURE_OPENAI_RESOURCE, + super().env_params.AZURE_OPENAI_MODEL, + super().env_params.AZURE_OPENAI_TEMPERATURE, + history_metadata, + ) + + # Send request to chat completion + response = self.json_chat.generate_response(input_text=prompt) + # TODO: timestamp from AOAI API call is more accurate, but this will do for now + gen_timestamp = int(time.time()) + + if not super().env_params.SHOULD_STREAM: + response_obj = { + "id": message_uuid, + "model": self.api_deployment, + "created": gen_timestamp, + "object": "chat.completion", + "choices": [ + { + "messages": [ + { + "role": "assistant", + "content": response["message"], + } + ] + } + ], + "dynamic_form_data": response["dynamic_form_data"], + "history_metadata": history_metadata, + } + self.conversation_client.log_non_stream(response_obj) + return flask.jsonify(response_obj), 200 + + else: + raise Exception("Streaming is not implemented yet") + + def conversation_with_data( + self, + request_body: Dict[str, Any], + message_uuid: str, + file: FileStorage | None = None, + ) -> Tuple[flask.Response, int]: + """ + Invoke AOAI On Your Data (OYD), i.e. an LM call with data source (e.g. Azure AI Search). Also optionally uses file attachment in the conversation context. + """ + # TODO: include file in system messages if file is provided + if file is not None: + raise NotImplementedError + + # Massage args into usable form + messages = [] + messages.extend(clean_up_messages(request_body["messages"])) + + # Create conversation item in client + history_metadata = request_body.get("history_metadata", {}) + history_metadata = super().conversation_client.create_conversation_item( + request_body, + super().env_params.AZURE_OPENAI_RESOURCE, + super().env_params.AZURE_OPENAI_MODEL, + super().env_params.AZURE_OPENAI_TEMPERATURE, + history_metadata, + ) + + # pre-inference step: Add format instructions + format_instructions = get_format_instructions(ResponsePydantic) + system_message = f"{super().env_params.AZURE_OPENAI_SYSTEM_MESSAGE}\n\nOutput format instructions: {format_instructions}" + + # TODO: Implement logic to choose data source type; currently it's hardcoded to azure search + data_source_config = get_data_source_config( + data_source_type="azure_search", + env_dict=self.env_params.__dict__, + system_message=system_message, + ) + + response = self.aoai_client.chat.completions.create( + model=self.api_deployment, + messages=messages, + extra_body={"data_sources": [data_source_config]}, + # Note: When using AOAI On Your Data (i.e. using data source), this param `response_format` does not seem to be honored as of 2024/09/03, as seen from quick testing. Regardless, I left it set here to "json_object" because it can't hurt, even though I think it is ignored in the code logic. + response_format=dict(type="json_object"), + temperature=float(super().env_params.AZURE_OPENAI_TEMPERATURE), + max_tokens=int(super().env_params.AZURE_OPENAI_MAX_TOKENS), + top_p=float(super().env_params.AZURE_OPENAI_TOP_P), + ) + + # post-inference step: validate and parse response + response_message_str = response.choices[0].message.content + # Note: As of 2024/09/03, AOAI On Your Data does not seem to switch to JSON output mode even when setting `response_format=dict(type="json_object")` in the chat completion creation. Thus, we need to catch if the output is ever invalid and repair it. + # TODO: One suggestion to do a programmatic json cleanup step (does not require LM call) is to remove the '```json' and '```' delimiters if they exist and any text outside of them. Hopefully this would fix the json output enough sometimes so the repair attempt LM call would not be needed. + try: + structured_response_dict = parse_json_str_into_validated_dict( + json_str=response_message_str, + model_cls=ResponsePydantic, + ) + except pydantic.ValidationError as e: + logging.warn( + f"Attempting to repair the response that was caught with this error: {str(e)}" + ) + + repaired_response = self.aoai_client.chat.completions.create( + model=self.api_deployment, + messages=[ + dict( + role="user", + content=f"Rewrite the following text into parsable JSON form:\n\n{response_message_str}\n\n{format_instructions}", + ), + ], + response_format=dict(type="json_object"), + temperature=float(super().env_params.AZURE_OPENAI_TEMPERATURE), + max_tokens=int(super().env_params.AZURE_OPENAI_MAX_TOKENS), + top_p=float(super().env_params.AZURE_OPENAI_TOP_P), + ) + repaired_response_str = repaired_response.choices[0].message.content + structured_response_dict = parse_json_str_into_validated_dict( + json_str=repaired_response_str, + model_cls=ResponsePydantic, + ) + + # massage data + if not super().env_params.SHOULD_STREAM: + response_obj = { + "id": message_uuid, + "model": response.model, + "created": response.created, + "object": response.object, + "choices": [ + { + "messages": [ + { + "role": "assistant", + "content": structured_response_dict["message"], + } + ] + } + ], + "dynamic_form_data": structured_response_dict["dynamic_form_data"], + "history_metadata": history_metadata, + } + self.conversation_client.log_non_stream(response_obj) + return flask.jsonify(response_obj), 200 + else: + raise Exception("Streaming is not implemented yet") + +def clean_up_messages(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """ + Given a messages list where each message might have fields beyond "role" and "content", or might be empty dicts themselves, clean up the messages such that each message exactly only has "role" and "content". + This is so the messages are clean enough to feed to an inference. + """ + non_empty_messages = [m for m in messages if m != {}] + + new_messages = [] + for m in non_empty_messages: + new_message = dict( + role=m["role"], + content=m["content"], + ) + new_messages.append(new_message) + return new_messages + +def get_simple_azure_search_config( + azure_search_endpoint: str, + azure_search_index: str, +) -> Dict[str, Any]: + """ + Construct a data source config (intended to be ingested by AOAI chat completion inference with data source). + Simplest possible configuration, with least settings needed. + This is data source config of type "azure_search, and uses system assigned managed identity + """ + config = { + "type": "azure_search", + "parameters": { + "endpoint": azure_search_endpoint, + "index_name": azure_search_index, + "authentication": { + "type": "system_assigned_managed_identity", + }, + }, + } + return config + + +ALLOWED_DATA_SOURCE_TYPES = ("azure_search", "azure_cosmos_db") + +# TODO: Most of the code logic here was copy-pasted from 'prepare_body_headers_with_data' method of Orchestrator.py, but I felt it would be cleaner code to de-couple it from the Orchestrator class and instead be a standalone function. This way, it's easier to use and to test. Need to figure out if there's a better file to house this code. +def get_data_source_config( + data_source_type: str, + env_dict: Dict[str, Any], + system_message: str, +) -> Dict[str, Any]: + """ + Create a config dictionary intended to be passed into the `extra_body` parameter as a "data_source", used in the AzureOpenAI API call `client.chat.completions.create`. + """ + if data_source_type not in ALLOWED_DATA_SOURCE_TYPES: + raise ValueError( + f"Specified data_source_type must be one of {ALLOWED_DATA_SOURCE_TYPES}. The specified value was: {data_source_type}" + ) + + e = SimpleNamespace(**env_dict) + + config = None + if data_source_type == "azure_search": + # Set query type + query_type = "simple" + if e.AZURE_SEARCH_QUERY_TYPE: + query_type = e.AZURE_SEARCH_QUERY_TYPE + elif ( + e.AZURE_SEARCH_USE_SEMANTIC_SEARCH.lower() == "true" + and e.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG + ): + query_type = "semantic" + + # Set filter + filter_string = None + userToken = None + if e.AZURE_SEARCH_PERMITTED_GROUPS_COLUMN: + userToken = request.headers.get("X-MS-TOKEN-AAD-ACCESS-TOKEN", "") + if e.DEBUG_LOGGING: + logging.debug( + f"USER TOKEN is {'present' if userToken else 'not present'}" + ) + + filter_string = generateFilterString( + userToken, + azure_search_permitted_groups_column=e.AZURE_SEARCH_PERMITTED_GROUPS_COLUMN, + ) + if e.DEBUG_LOGGING: + logging.debug(f"FILTER: {filter_string}") + + config = { + "type": "azure_search", + "parameters": { + "endpoint": f"https://{e.AZURE_SEARCH_SERVICE}.search.windows.net", + "index_name": e.AZURE_SEARCH_INDEX, + "authentication": { + "type": "system_assigned_managed_identity", + }, + "embedding_dependency": { + "type": "deployment_name", + "deployment_name": e.AZURE_OPENAI_EMBEDDING_NAME, + }, + "fields_mapping": { + "content_fields": ( + parse_multi_columns(e.AZURE_SEARCH_CONTENT_COLUMNS) + if e.AZURE_SEARCH_CONTENT_COLUMNS + else [] + ), + "title_field": ( + e.AZURE_SEARCH_TITLE_COLUMN + if e.AZURE_SEARCH_TITLE_COLUMN + else None + ), + "url_field": ( + e.AZURE_SEARCH_URL_COLUMN if e.AZURE_SEARCH_URL_COLUMN else None + ), + "filepath_field": ( + e.AZURE_SEARCH_FILENAME_COLUMN + if e.AZURE_SEARCH_FILENAME_COLUMN + else None + ), + "vector_fields": ( + parse_multi_columns(e.AZURE_SEARCH_VECTOR_COLUMNS) + if e.AZURE_SEARCH_VECTOR_COLUMNS + else [] + ), + }, + "in_scope": ( + True if e.AZURE_SEARCH_ENABLE_IN_DOMAIN.lower() == "true" else False + ), + "top_n_documents": int(e.AZURE_SEARCH_TOP_K), + "query_type": query_type, + "semantic_configuration": ( + e.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG + if e.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG + else "" + ), + "role_information": system_message, + "filter": filter_string, + "strictness": int(e.AZURE_SEARCH_STRICTNESS), + }, + } + elif data_source_type == "azure_cosmos_db": + raise NotImplementedError + assert isinstance(config, dict) + return config + + +# TODO: deal with duplicate code in Orchestrator.py +def parse_multi_columns(columns: str) -> list: + if "|" in columns: + return columns.split("|") + else: + return columns.split(",") + + +# TODO: deal with duplicate code in Orchestrator.py +def fetchUserGroups(userToken, nextLink=None, debug_logging: bool = True): + # Recursively fetch group membership + if nextLink: + endpoint = nextLink + else: + endpoint = "https://graph.microsoft.com/v1.0/me/transitiveMemberOf?$select=id" + + headers = {"Authorization": "bearer " + userToken} + try: + r = requests.get(endpoint, headers=headers) + if r.status_code != 200: + if debug_logging: + logging.error(f"Error fetching user groups: {r.status_code} {r.text}") + return [] + + r = r.json() + if "@odata.nextLink" in r: + nextLinkData = fetchUserGroups( + userToken, r["@odata.nextLink"], debug_logging + ) + r["value"].extend(nextLinkData) + + return r["value"] + except Exception as e: + logging.error(f"Exception in fetchUserGroups: {e}") + return [] + + +# TODO: deal with duplicate code in Orchestrator.py +def generateFilterString( + userToken, azure_search_permitted_groups_column: str, debug_logging: bool +): + # Get list of groups user is a member of + userGroups = fetchUserGroups(userToken, debug_logging=debug_logging) + + # Construct filter string + if not userGroups: + logging.debug("No user groups found") + + group_ids = ", ".join([obj["id"] for obj in userGroups]) + return f"{azure_search_permitted_groups_column}/any(g:search.in(g, '{group_ids}'))" + + + diff --git a/backend/orchestrators/Orchestrator.py b/backend/orchestrators/Orchestrator.py index 6c94978d4b..08b5581c07 100644 --- a/backend/orchestrators/Orchestrator.py +++ b/backend/orchestrators/Orchestrator.py @@ -2,109 +2,170 @@ import os import json import logging +from types import SimpleNamespace import requests import copy from dotenv import load_dotenv from azure.identity import DefaultAzureCredential +from docx import Document +import fitz from backend.conversationtelemetry import ConversationTelemetryClient +from werkzeug.datastructures.file_storage import FileStorage + load_dotenv() -class Orchestrator(ABC): - DEBUG = os.environ.get("DEBUG", "false") - DEBUG_LOGGING = DEBUG.lower() == "true" - @abstractmethod - def conversation_with_data(self, request_body, message_uuid): - pass +def extract_env_params_into_simple_namespace() -> SimpleNamespace: + """Extract env params from ox.environ (choosing default values as needed)""" + p = SimpleNamespace() - @abstractmethod - def conversation_without_data(self, request_body, message_uuid): - pass + p.DEBUG = os.environ.get("DEBUG", "false") + p.DEBUG_LOGGING = p.DEBUG.lower() == "true" # Initialize search variables - DATASOURCE_TYPE = os.environ.get("DATASOURCE_TYPE", "AzureCognitiveSearch") - SEARCH_TOP_K = os.environ.get("SEARCH_TOP_K", 5) - SEARCH_STRICTNESS = os.environ.get("SEARCH_STRICTNESS", 3) - SEARCH_ENABLE_IN_DOMAIN = os.environ.get("SEARCH_ENABLE_IN_DOMAIN", "true") + p.DATASOURCE_TYPE = os.environ.get("DATASOURCE_TYPE", "AzureCognitiveSearch") + p.SEARCH_TOP_K = os.environ.get("SEARCH_TOP_K", 5) + p.SEARCH_STRICTNESS = os.environ.get("SEARCH_STRICTNESS", 3) + p.SEARCH_ENABLE_IN_DOMAIN = os.environ.get("SEARCH_ENABLE_IN_DOMAIN", "true") # Azure OpenAI Settings - AZURE_OPENAI_TEMPERATURE = os.environ.get("AZURE_OPENAI_TEMPERATURE", 0) - AZURE_OPENAI_EMBEDDING_ENDPOINT = os.environ.get("AZURE_OPENAI_EMBEDDING_ENDPOINT") - AZURE_OPENAI_MAX_TOKENS = os.environ.get("AZURE_OPENAI_MAX_TOKENS", 1000) - AZURE_OPENAI_MODEL = os.environ.get("AZURE_OPENAI_MODEL") - AZURE_OPENAI_TOP_P = os.environ.get("AZURE_OPENAI_TOP_P", 1.0) - AZURE_OPENAI_ENDPOINT = os.environ.get("AZURE_OPENAI_ENDPOINT") - AZURE_OPENAI_STOP_SEQUENCE = os.environ.get("AZURE_OPENAI_STOP_SEQUENCE") - AZURE_OPENAI_STREAM = os.environ.get("AZURE_OPENAI_STREAM", "true") - AZURE_OPENAI_EMBEDDING_KEY = os.environ.get("AZURE_OPENAI_EMBEDDING_KEY") - AZURE_OPENAI_KEY = os.environ.get("AZURE_OPENAI_KEY") - AZURE_OPENAI_EMBEDDING_NAME = os.environ.get("AZURE_OPENAI_EMBEDDING_NAME", "") - AZURE_OPENAI_RESOURCE = os.environ.get("AZURE_OPENAI_RESOURCE") - AZURE_OPENAI_PREVIEW_API_VERSION = os.environ.get("AZURE_OPENAI_PREVIEW_API_VERSION", "2023-08-01-preview") + p.AZURE_OPENAI_TEMPERATURE = os.environ.get("AZURE_OPENAI_TEMPERATURE", 0) + p.AZURE_OPENAI_EMBEDDING_ENDPOINT = os.environ.get( + "AZURE_OPENAI_EMBEDDING_ENDPOINT" + ) + p.AZURE_OPENAI_MAX_TOKENS = os.environ.get("AZURE_OPENAI_MAX_TOKENS", 1000) + p.AZURE_OPENAI_MODEL = os.environ.get("AZURE_OPENAI_MODEL") + p.AZURE_OPENAI_TOP_P = os.environ.get("AZURE_OPENAI_TOP_P", 1.0) + p.AZURE_OPENAI_ENDPOINT = os.environ.get("AZURE_OPENAI_ENDPOINT") + p.AZURE_OPENAI_STOP_SEQUENCE = os.environ.get("AZURE_OPENAI_STOP_SEQUENCE") + p.AZURE_OPENAI_STREAM = os.environ.get("AZURE_OPENAI_STREAM", "true") + p.AZURE_OPENAI_EMBEDDING_KEY = os.environ.get("AZURE_OPENAI_EMBEDDING_KEY") + p.AZURE_OPENAI_KEY = os.environ.get("AZURE_OPENAI_KEY") + p.AZURE_OPENAI_EMBEDDING_NAME = os.environ.get("AZURE_OPENAI_EMBEDDING_NAME", "") + p.AZURE_OPENAI_RESOURCE = os.environ.get("AZURE_OPENAI_RESOURCE") + p.AZURE_OPENAI_PREVIEW_API_VERSION = os.environ.get( + "AZURE_OPENAI_PREVIEW_API_VERSION", "2023-08-01-preview" + ) # AZURE_OPENAI_SYSTEM_MESSAGE = os.environ.get("AZURE_OPENAI_SYSTEM_MESSAGE", "You are an AI assistant that helps people find information.") - AZURE_OPENAI_SYSTEM_MESSAGE = os.environ.get("AZURE_OPENAI_SYSTEM_MESSAGE") + p.AZURE_OPENAI_SYSTEM_MESSAGE = os.environ.get("AZURE_OPENAI_SYSTEM_MESSAGE") # Azure Search Settings - AZURE_SEARCH_QUERY_TYPE = os.environ.get("AZURE_SEARCH_QUERY_TYPE") - AZURE_SEARCH_USE_SEMANTIC_SEARCH = os.environ.get("AZURE_SEARCH_USE_SEMANTIC_SEARCH", "false") - AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG = os.environ.get("AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG", "default") - AZURE_SEARCH_PERMITTED_GROUPS_COLUMN = os.environ.get("AZURE_SEARCH_PERMITTED_GROUPS_COLUMN") - AZURE_SEARCH_SERVICE = os.environ.get("AZURE_SEARCH_SERVICE") - AZURE_SEARCH_KEY = os.environ.get("AZURE_SEARCH_KEY") - AZURE_SEARCH_INDEX = os.environ.get("AZURE_SEARCH_INDEX") - AZURE_SEARCH_CONTENT_COLUMNS = os.environ.get("AZURE_SEARCH_CONTENT_COLUMNS") - AZURE_SEARCH_TITLE_COLUMN = os.environ.get("AZURE_SEARCH_TITLE_COLUMN") - AZURE_SEARCH_URL_COLUMN = os.environ.get("AZURE_SEARCH_URL_COLUMN") - AZURE_SEARCH_FILENAME_COLUMN = os.environ.get("AZURE_SEARCH_FILENAME_COLUMN") - AZURE_SEARCH_VECTOR_COLUMNS = os.environ.get("AZURE_SEARCH_VECTOR_COLUMNS") - AZURE_SEARCH_ENABLE_IN_DOMAIN = os.environ.get("AZURE_SEARCH_ENABLE_IN_DOMAIN", SEARCH_ENABLE_IN_DOMAIN) - AZURE_SEARCH_TOP_K = os.environ.get("AZURE_SEARCH_TOP_K", SEARCH_TOP_K) - AZURE_SEARCH_STRICTNESS = os.environ.get("AZURE_SEARCH_STRICTNESS", SEARCH_STRICTNESS) + p.AZURE_SEARCH_QUERY_TYPE = os.environ.get("AZURE_SEARCH_QUERY_TYPE") + p.AZURE_SEARCH_USE_SEMANTIC_SEARCH = os.environ.get( + "AZURE_SEARCH_USE_SEMANTIC_SEARCH", "false" + ) + p.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG = os.environ.get( + "AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG", "default" + ) + p.AZURE_SEARCH_PERMITTED_GROUPS_COLUMN = os.environ.get( + "AZURE_SEARCH_PERMITTED_GROUPS_COLUMN" + ) + p.AZURE_SEARCH_SERVICE = os.environ.get("AZURE_SEARCH_SERVICE") + p.AZURE_SEARCH_KEY = os.environ.get("AZURE_SEARCH_KEY") + p.AZURE_SEARCH_INDEX = os.environ.get("AZURE_SEARCH_INDEX") + p.AZURE_SEARCH_CONTENT_COLUMNS = os.environ.get("AZURE_SEARCH_CONTENT_COLUMNS") + p.AZURE_SEARCH_TITLE_COLUMN = os.environ.get("AZURE_SEARCH_TITLE_COLUMN") + p.AZURE_SEARCH_URL_COLUMN = os.environ.get("AZURE_SEARCH_URL_COLUMN") + p.AZURE_SEARCH_FILENAME_COLUMN = os.environ.get("AZURE_SEARCH_FILENAME_COLUMN") + p.AZURE_SEARCH_VECTOR_COLUMNS = os.environ.get("AZURE_SEARCH_VECTOR_COLUMNS") + p.AZURE_SEARCH_ENABLE_IN_DOMAIN = os.environ.get( + "AZURE_SEARCH_ENABLE_IN_DOMAIN", p.SEARCH_ENABLE_IN_DOMAIN + ) + p.AZURE_SEARCH_TOP_K = os.environ.get("AZURE_SEARCH_TOP_K", p.SEARCH_TOP_K) + p.AZURE_SEARCH_STRICTNESS = os.environ.get( + "AZURE_SEARCH_STRICTNESS", p.SEARCH_STRICTNESS + ) # Azure CosmosDB - AZURE_COSMOSDB_ENDPOINT = f'https://{os.environ.get("MSR_AZURE_COSMOSDB_ACCOUNT")}.documents.azure.com:443/' - AZURE_COSMOSDB_DATABASE_NAME = os.environ.get("MSR_AZURE_COSMOSDB_DATABASE") - AZURE_COSMOSDB_CONTAINER_NAME = os.environ.get("MSR_AZURE_COSMOSDB_CONVERSATIONS_CONTAINER") + p.AZURE_COSMOSDB_ENDPOINT = f'https://{os.environ.get("MSR_AZURE_COSMOSDB_ACCOUNT")}.documents.azure.com:443/' + p.AZURE_COSMOSDB_DATABASE_NAME = os.environ.get("MSR_AZURE_COSMOSDB_DATABASE") + p.AZURE_COSMOSDB_CONTAINER_NAME = os.environ.get( + "MSR_AZURE_COSMOSDB_CONVERSATIONS_CONTAINER" + ) # CosmosDB Mongo vcore vector db Settings - AZURE_COSMOSDB_MONGO_VCORE_CONNECTION_STRING = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_CONNECTION_STRING") #This has to be secure string - AZURE_COSMOSDB_MONGO_VCORE_DATABASE = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_DATABASE") - AZURE_COSMOSDB_MONGO_VCORE_CONTAINER = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_CONTAINER") - AZURE_COSMOSDB_MONGO_VCORE_INDEX = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_INDEX") - AZURE_COSMOSDB_MONGO_VCORE_TOP_K = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_TOP_K", AZURE_SEARCH_TOP_K) - AZURE_COSMOSDB_MONGO_VCORE_STRICTNESS = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_STRICTNESS", AZURE_SEARCH_STRICTNESS) - AZURE_COSMOSDB_MONGO_VCORE_ENABLE_IN_DOMAIN = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_ENABLE_IN_DOMAIN", AZURE_SEARCH_ENABLE_IN_DOMAIN) - AZURE_COSMOSDB_MONGO_VCORE_CONTENT_COLUMNS = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_CONTENT_COLUMNS", "") - AZURE_COSMOSDB_MONGO_VCORE_FILENAME_COLUMN = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_FILENAME_COLUMN") - AZURE_COSMOSDB_MONGO_VCORE_TITLE_COLUMN = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_TITLE_COLUMN") - AZURE_COSMOSDB_MONGO_VCORE_URL_COLUMN = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_URL_COLUMN") - AZURE_COSMOSDB_MONGO_VCORE_VECTOR_COLUMNS = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_VECTOR_COLUMNS") + p.AZURE_COSMOSDB_MONGO_VCORE_CONNECTION_STRING = os.environ.get( + "AZURE_COSMOSDB_MONGO_VCORE_CONNECTION_STRING" + ) # This has to be secure string + p.AZURE_COSMOSDB_MONGO_VCORE_DATABASE = os.environ.get( + "AZURE_COSMOSDB_MONGO_VCORE_DATABASE" + ) + p.AZURE_COSMOSDB_MONGO_VCORE_CONTAINER = os.environ.get( + "AZURE_COSMOSDB_MONGO_VCORE_CONTAINER" + ) + p.AZURE_COSMOSDB_MONGO_VCORE_INDEX = os.environ.get( + "AZURE_COSMOSDB_MONGO_VCORE_INDEX" + ) + p.AZURE_COSMOSDB_MONGO_VCORE_TOP_K = os.environ.get( + "AZURE_COSMOSDB_MONGO_VCORE_TOP_K", p.AZURE_SEARCH_TOP_K + ) + p.AZURE_COSMOSDB_MONGO_VCORE_STRICTNESS = os.environ.get( + "AZURE_COSMOSDB_MONGO_VCORE_STRICTNESS", p.AZURE_SEARCH_STRICTNESS + ) + p.AZURE_COSMOSDB_MONGO_VCORE_ENABLE_IN_DOMAIN = os.environ.get( + "AZURE_COSMOSDB_MONGO_VCORE_ENABLE_IN_DOMAIN", p.AZURE_SEARCH_ENABLE_IN_DOMAIN + ) + p.AZURE_COSMOSDB_MONGO_VCORE_CONTENT_COLUMNS = os.environ.get( + "AZURE_COSMOSDB_MONGO_VCORE_CONTENT_COLUMNS", "" + ) + p.AZURE_COSMOSDB_MONGO_VCORE_FILENAME_COLUMN = os.environ.get( + "AZURE_COSMOSDB_MONGO_VCORE_FILENAME_COLUMN" + ) + p.AZURE_COSMOSDB_MONGO_VCORE_TITLE_COLUMN = os.environ.get( + "AZURE_COSMOSDB_MONGO_VCORE_TITLE_COLUMN" + ) + p.AZURE_COSMOSDB_MONGO_VCORE_URL_COLUMN = os.environ.get( + "AZURE_COSMOSDB_MONGO_VCORE_URL_COLUMN" + ) + p.AZURE_COSMOSDB_MONGO_VCORE_VECTOR_COLUMNS = os.environ.get( + "AZURE_COSMOSDB_MONGO_VCORE_VECTOR_COLUMNS" + ) # Elasticsearch Integration Settings - ELASTICSEARCH_ENDPOINT = os.environ.get("ELASTICSEARCH_ENDPOINT") - ELASTICSEARCH_ENCODED_API_KEY = os.environ.get("ELASTICSEARCH_ENCODED_API_KEY") - ELASTICSEARCH_INDEX = os.environ.get("ELASTICSEARCH_INDEX") - ELASTICSEARCH_QUERY_TYPE = os.environ.get("ELASTICSEARCH_QUERY_TYPE", "simple") - ELASTICSEARCH_TOP_K = os.environ.get("ELASTICSEARCH_TOP_K", SEARCH_TOP_K) - ELASTICSEARCH_ENABLE_IN_DOMAIN = os.environ.get("ELASTICSEARCH_ENABLE_IN_DOMAIN", SEARCH_ENABLE_IN_DOMAIN) - ELASTICSEARCH_CONTENT_COLUMNS = os.environ.get("ELASTICSEARCH_CONTENT_COLUMNS") - ELASTICSEARCH_FILENAME_COLUMN = os.environ.get("ELASTICSEARCH_FILENAME_COLUMN") - ELASTICSEARCH_TITLE_COLUMN = os.environ.get("ELASTICSEARCH_TITLE_COLUMN") - ELASTICSEARCH_URL_COLUMN = os.environ.get("ELASTICSEARCH_URL_COLUMN") - ELASTICSEARCH_VECTOR_COLUMNS = os.environ.get("ELASTICSEARCH_VECTOR_COLUMNS") - ELASTICSEARCH_STRICTNESS = os.environ.get("ELASTICSEARCH_STRICTNESS", SEARCH_STRICTNESS) - ELASTICSEARCH_EMBEDDING_MODEL_ID = os.environ.get("ELASTICSEARCH_EMBEDDING_MODEL_ID") - - SHOULD_STREAM = True if AZURE_OPENAI_STREAM.lower() == "true" else False + p.ELASTICSEARCH_ENDPOINT = os.environ.get("ELASTICSEARCH_ENDPOINT") + p.ELASTICSEARCH_ENCODED_API_KEY = os.environ.get("ELASTICSEARCH_ENCODED_API_KEY") + p.ELASTICSEARCH_INDEX = os.environ.get("ELASTICSEARCH_INDEX") + p.ELASTICSEARCH_QUERY_TYPE = os.environ.get("ELASTICSEARCH_QUERY_TYPE", "simple") + p.ELASTICSEARCH_TOP_K = os.environ.get("ELASTICSEARCH_TOP_K", p.SEARCH_TOP_K) + p.ELASTICSEARCH_ENABLE_IN_DOMAIN = os.environ.get( + "ELASTICSEARCH_ENABLE_IN_DOMAIN", p.SEARCH_ENABLE_IN_DOMAIN + ) + p.ELASTICSEARCH_CONTENT_COLUMNS = os.environ.get("ELASTICSEARCH_CONTENT_COLUMNS") + p.ELASTICSEARCH_FILENAME_COLUMN = os.environ.get("ELASTICSEARCH_FILENAME_COLUMN") + p.ELASTICSEARCH_TITLE_COLUMN = os.environ.get("ELASTICSEARCH_TITLE_COLUMN") + p.ELASTICSEARCH_URL_COLUMN = os.environ.get("ELASTICSEARCH_URL_COLUMN") + p.ELASTICSEARCH_VECTOR_COLUMNS = os.environ.get("ELASTICSEARCH_VECTOR_COLUMNS") + p.ELASTICSEARCH_STRICTNESS = os.environ.get( + "ELASTICSEARCH_STRICTNESS", p.SEARCH_STRICTNESS + ) + p.ELASTICSEARCH_EMBEDDING_MODEL_ID = os.environ.get( + "ELASTICSEARCH_EMBEDDING_MODEL_ID" + ) + + p.SHOULD_STREAM = True if p.AZURE_OPENAI_STREAM.lower() == "true" else False + + return p + + +class Orchestrator(ABC): + @abstractmethod + def conversation_with_data(self, request_body, message_uuid, file=None): + pass + + @abstractmethod + def conversation_without_data(self, request_body, message_uuid, file=None): + pass + + env_params = extract_env_params_into_simple_namespace() message_uuid = "" conversation_client = ConversationTelemetryClient( - cosmosdb_endpoint=str(AZURE_COSMOSDB_ENDPOINT), + cosmosdb_endpoint=str(env_params.AZURE_COSMOSDB_ENDPOINT), credential=DefaultAzureCredential(), - database_name=str(AZURE_COSMOSDB_DATABASE_NAME), - container_name=str(AZURE_COSMOSDB_CONTAINER_NAME) + database_name=str(env_params.AZURE_COSMOSDB_DATABASE_NAME), + container_name=str(env_params.AZURE_COSMOSDB_CONTAINER_NAME), ) # methods to implement in orchestrator @@ -113,29 +174,31 @@ def fetchUserGroups(self, userToken, nextLink=None): if nextLink: endpoint = nextLink else: - endpoint = "https://graph.microsoft.com/v1.0/me/transitiveMemberOf?$select=id" - - headers = { - 'Authorization': "bearer " + userToken - } - try : + endpoint = ( + "https://graph.microsoft.com/v1.0/me/transitiveMemberOf?$select=id" + ) + + headers = {"Authorization": "bearer " + userToken} + try: r = requests.get(endpoint, headers=headers) if r.status_code != 200: - if self.DEBUG_LOGGING: - logging.error(f"Error fetching user groups: {r.status_code} {r.text}") + if self.env_params.DEBUG_LOGGING: + logging.error( + f"Error fetching user groups: {r.status_code} {r.text}" + ) return [] - + r = r.json() if "@odata.nextLink" in r: nextLinkData = self.fetchUserGroups(userToken, r["@odata.nextLink"]) - r['value'].extend(nextLinkData) - - return r['value'] + r["value"].extend(nextLinkData) + + return r["value"] except Exception as e: logging.error(f"Exception in fetchUserGroups: {e}") return [] - # Filter for permitted user groups + # Filter for permitted user groups def generateFilterString(self, userToken): # Get list of groups user is a member of userGroups = self.fetchUserGroups(self, userToken) @@ -144,8 +207,8 @@ def generateFilterString(self, userToken): if not userGroups: logging.debug("No user groups found") - group_ids = ", ".join([obj['id'] for obj in userGroups]) - return f"{self.AZURE_SEARCH_PERMITTED_GROUPS_COLUMN}/any(g:search.in(g, '{group_ids}'))" + group_ids = ", ".join([obj["id"] for obj in userGroups]) + return f"{self.env_params.AZURE_SEARCH_PERMITTED_GROUPS_COLUMN}/any(g:search.in(g, '{group_ids}'))" # Format response as newline delimited json def format_as_ndjson(self, obj: dict) -> str: @@ -157,65 +220,155 @@ def parse_multi_columns(self, columns: str) -> list: else: return columns.split(",") + def parse_file(self, file: FileStorage) -> str: + res = "" + + # if file is palin text, return the text + if file.content_type == "text/plain": + res = file.read().decode("utf-8") + + # if file is docx parse using docx + elif ( + file.content_type + == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + ): + doc = Document(file) + fullText = [] + for para in doc.paragraphs: + fullText.append(para.text) + res = "\n".join(fullText) + + # if file is pdf, parse using pdf + elif file.content_type == "application/pdf": + document = fitz.open(stream=file.read(), filetype="pdf") + + for page_num in range(len(document)): + page = document[page_num] + res += page.get_text() + + else: + return "The user has provided a non supported file type" + + # check if res is more than 50000 characters + if len(res) > 50000: + return ( + "The user has provided a file with more than the 1000 character limit" + ) + + return res + # Format request body and headers with relevant info based on search type def prepare_body_headers_with_data(self, request, **kwargs): - request_messages = request.json["messages"] - key=kwargs.get('key', self.AZURE_OPENAI_KEY) + messages_str = request.form.get("messages") + request_messages = json.loads(messages_str) + + file = request.files.get("file", None) + if file: + request_messages.append( + { + "role": "user", + "content": f"File: {self.parse_file(file)}", + } + ) + key = kwargs.get("key", self.env_params.AZURE_OPENAI_KEY) body = { "messages": request_messages, - "temperature": float(self.AZURE_OPENAI_TEMPERATURE), - "max_tokens": int(self.AZURE_OPENAI_MAX_TOKENS), - "top_p": float(self.AZURE_OPENAI_TOP_P), - "stop": self.AZURE_OPENAI_STOP_SEQUENCE.split("|") if self.AZURE_OPENAI_STOP_SEQUENCE else None, - "stream": self.SHOULD_STREAM, - "dataSources": [] + "temperature": float(self.env_params.AZURE_OPENAI_TEMPERATURE), + "max_tokens": int(self.env_params.AZURE_OPENAI_MAX_TOKENS), + "top_p": float(self.env_params.AZURE_OPENAI_TOP_P), + "stop": ( + self.env_params.AZURE_OPENAI_STOP_SEQUENCE.split("|") + if self.env_params.AZURE_OPENAI_STOP_SEQUENCE + else None + ), + "stream": self.env_params.SHOULD_STREAM, + "dataSources": [], } - if self.DATASOURCE_TYPE == "AzureCognitiveSearch": + if self.env_params.DATASOURCE_TYPE == "AzureCognitiveSearch": # Set query type query_type = "simple" - if self.AZURE_SEARCH_QUERY_TYPE: - query_type = self.AZURE_SEARCH_QUERY_TYPE - elif self.AZURE_SEARCH_USE_SEMANTIC_SEARCH.lower() == "true" and self.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG: + if self.env_params.AZURE_SEARCH_QUERY_TYPE: + query_type = self.env_params.AZURE_SEARCH_QUERY_TYPE + elif ( + self.env_params.AZURE_SEARCH_USE_SEMANTIC_SEARCH.lower() == "true" + and self.env_params.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG + ): query_type = "semantic" # Set filter filter = None userToken = None - if self.AZURE_SEARCH_PERMITTED_GROUPS_COLUMN: - userToken = request.headers.get('X-MS-TOKEN-AAD-ACCESS-TOKEN', "") - if self.DEBUG_LOGGING: - logging.debug(f"USER TOKEN is {'present' if userToken else 'not present'}") + if self.env_params.AZURE_SEARCH_PERMITTED_GROUPS_COLUMN: + userToken = request.headers.get("X-MS-TOKEN-AAD-ACCESS-TOKEN", "") + if self.env_params.DEBUG_LOGGING: + logging.debug( + f"USER TOKEN is {'present' if userToken else 'not present'}" + ) filter = self.generateFilterString(userToken) - if self.DEBUG_LOGGING: + if self.env_params.DEBUG_LOGGING: logging.debug(f"FILTER: {filter}") body["dataSources"].append( { "type": "AzureCognitiveSearch", "parameters": { - "endpoint": f"https://{self.AZURE_SEARCH_SERVICE}.search.windows.net", - "key": self.AZURE_SEARCH_KEY, - "indexName": self.AZURE_SEARCH_INDEX, + "endpoint": f"https://{self.env_params.AZURE_SEARCH_SERVICE}.search.windows.net", + "key": self.env_params.AZURE_SEARCH_KEY, + "indexName": self.env_params.AZURE_SEARCH_INDEX, "fieldsMapping": { - "contentFields": self.parse_multi_columns(self.AZURE_SEARCH_CONTENT_COLUMNS) if self.AZURE_SEARCH_CONTENT_COLUMNS else [], - "titleField": self.AZURE_SEARCH_TITLE_COLUMN if self.AZURE_SEARCH_TITLE_COLUMN else None, - "urlField": self.AZURE_SEARCH_URL_COLUMN if self.AZURE_SEARCH_URL_COLUMN else None, - "filepathField": self.AZURE_SEARCH_FILENAME_COLUMN if self.AZURE_SEARCH_FILENAME_COLUMN else None, - "vectorFields": self.parse_multi_columns(self.AZURE_SEARCH_VECTOR_COLUMNS) if self.AZURE_SEARCH_VECTOR_COLUMNS else [] + "contentFields": ( + self.parse_multi_columns( + self.env_params.AZURE_SEARCH_CONTENT_COLUMNS + ) + if self.env_params.AZURE_SEARCH_CONTENT_COLUMNS + else [] + ), + "titleField": ( + self.env_params.AZURE_SEARCH_TITLE_COLUMN + if self.env_params.AZURE_SEARCH_TITLE_COLUMN + else None + ), + "urlField": ( + self.env_params.AZURE_SEARCH_URL_COLUMN + if self.env_params.AZURE_SEARCH_URL_COLUMN + else None + ), + "filepathField": ( + self.env_params.AZURE_SEARCH_FILENAME_COLUMN + if self.env_params.AZURE_SEARCH_FILENAME_COLUMN + else None + ), + "vectorFields": ( + self.parse_multi_columns( + self.env_params.AZURE_SEARCH_VECTOR_COLUMNS + ) + if self.env_params.AZURE_SEARCH_VECTOR_COLUMNS + else [] + ), }, - "inScope": True if self.AZURE_SEARCH_ENABLE_IN_DOMAIN.lower() == "true" else False, - "topNDocuments": int(self.AZURE_SEARCH_TOP_K), + "inScope": ( + True + if self.env_params.AZURE_SEARCH_ENABLE_IN_DOMAIN.lower() + == "true" + else False + ), + "topNDocuments": int(self.env_params.AZURE_SEARCH_TOP_K), "queryType": query_type, - "semanticConfiguration": self.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG if self.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG else "", - "roleInformation": self.AZURE_OPENAI_SYSTEM_MESSAGE, + "semanticConfiguration": ( + self.env_params.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG + if self.env_params.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG + else "" + ), + "roleInformation": self.env_params.AZURE_OPENAI_SYSTEM_MESSAGE, "filter": filter, - "strictness": int(self.AZURE_SEARCH_STRICTNESS) - } - }) - elif self.DATASOURCE_TYPE == "AzureCosmosDB": + "strictness": int(self.env_params.AZURE_SEARCH_STRICTNESS), + }, + } + ) + elif self.env_params.DATASOURCE_TYPE == "AzureCosmosDB": # Set query type query_type = "vector" @@ -223,23 +376,56 @@ def prepare_body_headers_with_data(self, request, **kwargs): { "type": "AzureCosmosDB", "parameters": { - "connectionString": self.AZURE_COSMOSDB_MONGO_VCORE_CONNECTION_STRING, - "indexName": self.AZURE_COSMOSDB_MONGO_VCORE_INDEX, - "databaseName": self.AZURE_COSMOSDB_MONGO_VCORE_DATABASE, - "containerName": self.AZURE_COSMOSDB_MONGO_VCORE_CONTAINER, + "connectionString": self.env_params.AZURE_COSMOSDB_MONGO_VCORE_CONNECTION_STRING, + "indexName": self.env_params.AZURE_COSMOSDB_MONGO_VCORE_INDEX, + "databaseName": self.env_params.AZURE_COSMOSDB_MONGO_VCORE_DATABASE, + "containerName": self.env_params.AZURE_COSMOSDB_MONGO_VCORE_CONTAINER, "fieldsMapping": { - "contentFields": self.parse_multi_columns(self.AZURE_COSMOSDB_MONGO_VCORE_CONTENT_COLUMNS) if self.AZURE_COSMOSDB_MONGO_VCORE_CONTENT_COLUMNS else [], - "titleField": self.AZURE_COSMOSDB_MONGO_VCORE_TITLE_COLUMN if self.AZURE_COSMOSDB_MONGO_VCORE_TITLE_COLUMN else None, - "urlField": self.AZURE_COSMOSDB_MONGO_VCORE_URL_COLUMN if self.AZURE_COSMOSDB_MONGO_VCORE_URL_COLUMN else None, - "filepathField": self.AZURE_COSMOSDB_MONGO_VCORE_FILENAME_COLUMN if self.AZURE_COSMOSDB_MONGO_VCORE_FILENAME_COLUMN else None, - "vectorFields": self.parse_multi_columns(self.AZURE_COSMOSDB_MONGO_VCORE_VECTOR_COLUMNS) if self.AZURE_COSMOSDB_MONGO_VCORE_VECTOR_COLUMNS else [] + "contentFields": ( + self.parse_multi_columns( + self.env_params.AZURE_COSMOSDB_MONGO_VCORE_CONTENT_COLUMNS + ) + if self.env_params.AZURE_COSMOSDB_MONGO_VCORE_CONTENT_COLUMNS + else [] + ), + "titleField": ( + self.env_params.AZURE_COSMOSDB_MONGO_VCORE_TITLE_COLUMN + if self.env_params.AZURE_COSMOSDB_MONGO_VCORE_TITLE_COLUMN + else None + ), + "urlField": ( + self.env_params.AZURE_COSMOSDB_MONGO_VCORE_URL_COLUMN + if self.env_params.AZURE_COSMOSDB_MONGO_VCORE_URL_COLUMN + else None + ), + "filepathField": ( + self.env_params.AZURE_COSMOSDB_MONGO_VCORE_FILENAME_COLUMN + if self.env_params.AZURE_COSMOSDB_MONGO_VCORE_FILENAME_COLUMN + else None + ), + "vectorFields": ( + self.parse_multi_columns( + self.env_params.AZURE_COSMOSDB_MONGO_VCORE_VECTOR_COLUMNS + ) + if self.env_params.AZURE_COSMOSDB_MONGO_VCORE_VECTOR_COLUMNS + else [] + ), }, - "inScope": True if self.AZURE_COSMOSDB_MONGO_VCORE_ENABLE_IN_DOMAIN.lower() == "true" else False, - "topNDocuments": int(self.AZURE_COSMOSDB_MONGO_VCORE_TOP_K), - "strictness": int(self.AZURE_COSMOSDB_MONGO_VCORE_STRICTNESS), + "inScope": ( + True + if self.env_params.AZURE_COSMOSDB_MONGO_VCORE_ENABLE_IN_DOMAIN.lower() + == "true" + else False + ), + "topNDocuments": int( + self.env_params.AZURE_COSMOSDB_MONGO_VCORE_TOP_K + ), + "strictness": int( + self.env_params.AZURE_COSMOSDB_MONGO_VCORE_STRICTNESS + ), "queryType": query_type, - "roleInformation": self.AZURE_OPENAI_SYSTEM_MESSAGE - } + "roleInformation": self.env_params.AZURE_OPENAI_SYSTEM_MESSAGE, + }, } ) @@ -247,49 +433,94 @@ def prepare_body_headers_with_data(self, request, **kwargs): body["dataSources"].append( { "messages": request_messages, - "temperature": float(self.AZURE_OPENAI_TEMPERATURE), - "max_tokens": int(self.AZURE_OPENAI_MAX_TOKENS), - "top_p": float(self.AZURE_OPENAI_TOP_P), - "stop": self.AZURE_OPENAI_STOP_SEQUENCE.split("|") if self.AZURE_OPENAI_STOP_SEQUENCE else None, - "stream": self.SHOULD_STREAM, + "temperature": float(self.env_params.AZURE_OPENAI_TEMPERATURE), + "max_tokens": int(self.env_params.AZURE_OPENAI_MAX_TOKENS), + "top_p": float(self.env_params.AZURE_OPENAI_TOP_P), + "stop": ( + self.env_params.AZURE_OPENAI_STOP_SEQUENCE.split("|") + if self.env_params.AZURE_OPENAI_STOP_SEQUENCE + else None + ), + "stream": self.env_params.SHOULD_STREAM, "dataSources": [ { "type": "AzureCognitiveSearch", "parameters": { - "endpoint": self.ELASTICSEARCH_ENDPOINT, - "encodedApiKey": self.ELASTICSEARCH_ENCODED_API_KEY, - "indexName": self.ELASTICSEARCH_INDEX, + "endpoint": self.env_params.ELASTICSEARCH_ENDPOINT, + "encodedApiKey": self.env_params.ELASTICSEARCH_ENCODED_API_KEY, + "indexName": self.env_params.ELASTICSEARCH_INDEX, "fieldsMapping": { - "contentFields": self.parse_multi_columns(self.ELASTICSEARCH_CONTENT_COLUMNS) if self.ELASTICSEARCH_CONTENT_COLUMNS else [], - "titleField": self.ELASTICSEARCH_TITLE_COLUMN if self.ELASTICSEARCH_TITLE_COLUMN else None, - "urlField": self.ELASTICSEARCH_URL_COLUMN if self.ELASTICSEARCH_URL_COLUMN else None, - "filepathField": self.ELASTICSEARCH_FILENAME_COLUMN if self.ELASTICSEARCH_FILENAME_COLUMN else None, - "vectorFields": self.parse_multi_columns(self.ELASTICSEARCH_VECTOR_COLUMNS) if self.ELASTICSEARCH_VECTOR_COLUMNS else [] + "contentFields": ( + self.parse_multi_columns( + self.env_params.ELASTICSEARCH_CONTENT_COLUMNS + ) + if self.env_params.ELASTICSEARCH_CONTENT_COLUMNS + else [] + ), + "titleField": ( + self.env_params.ELASTICSEARCH_TITLE_COLUMN + if self.env_params.ELASTICSEARCH_TITLE_COLUMN + else None + ), + "urlField": ( + self.env_params.ELASTICSEARCH_URL_COLUMN + if self.env_params.ELASTICSEARCH_URL_COLUMN + else None + ), + "filepathField": ( + self.env_params.ELASTICSEARCH_FILENAME_COLUMN + if self.env_params.ELASTICSEARCH_FILENAME_COLUMN + else None + ), + "vectorFields": ( + self.parse_multi_columns( + self.env_params.ELASTICSEARCH_VECTOR_COLUMNS + ) + if self.env_params.ELASTICSEARCH_VECTOR_COLUMNS + else [] + ), }, - "inScope": True if self.ELASTICSEARCH_ENABLE_IN_DOMAIN.lower() == "true" else False, - "topNDocuments": int(self.ELASTICSEARCH_TOP_K), - "queryType": self.ELASTICSEARCH_QUERY_TYPE, - "roleInformation": self.AZURE_OPENAI_SYSTEM_MESSAGE, - "embeddingEndpoint": self.AZURE_OPENAI_EMBEDDING_ENDPOINT, - "embeddingKey": self.AZURE_OPENAI_EMBEDDING_KEY, - "embeddingModelId": self.ELASTICSEARCH_EMBEDDING_MODEL_ID, - "strictness": int(self.ELASTICSEARCH_STRICTNESS) - } + "inScope": ( + True + if self.env_params.ELASTICSEARCH_ENABLE_IN_DOMAIN.lower() + == "true" + else False + ), + "topNDocuments": int( + self.env_params.ELASTICSEARCH_TOP_K + ), + "queryType": self.env_params.ELASTICSEARCH_QUERY_TYPE, + "roleInformation": self.env_params.AZURE_OPENAI_SYSTEM_MESSAGE, + "embeddingEndpoint": self.env_params.AZURE_OPENAI_EMBEDDING_ENDPOINT, + "embeddingKey": self.env_params.AZURE_OPENAI_EMBEDDING_KEY, + "embeddingModelId": self.env_params.ELASTICSEARCH_EMBEDDING_MODEL_ID, + "strictness": int( + self.env_params.ELASTICSEARCH_STRICTNESS + ), + }, } - ] + ], } ) else: - raise Exception(f"DATASOURCE_TYPE is not configured or unknown: {self.DATASOURCE_TYPE}") + raise Exception( + f"DATASOURCE_TYPE is not configured or unknown: {self.env_params.DATASOURCE_TYPE}" + ) if "vector" in query_type.lower(): - if self.AZURE_OPENAI_EMBEDDING_NAME: - body["dataSources"][0]["parameters"]["embeddingDeploymentName"] = self.AZURE_OPENAI_EMBEDDING_NAME + if self.env_params.AZURE_OPENAI_EMBEDDING_NAME: + body["dataSources"][0]["parameters"][ + "embeddingDeploymentName" + ] = self.env_params.AZURE_OPENAI_EMBEDDING_NAME else: - body["dataSources"][0]["parameters"]["embeddingEndpoint"] = self.AZURE_OPENAI_EMBEDDING_ENDPOINT - body["dataSources"][0]["parameters"]["embeddingKey"] = self.AZURE_OPENAI_EMBEDDING_KEY - - if self.DEBUG_LOGGING: + body["dataSources"][0]["parameters"][ + "embeddingEndpoint" + ] = self.env_params.AZURE_OPENAI_EMBEDDING_ENDPOINT + body["dataSources"][0]["parameters"][ + "embeddingKey" + ] = self.env_params.AZURE_OPENAI_EMBEDDING_KEY + + if self.env_params.DEBUG_LOGGING: body_clean = copy.deepcopy(body) if body_clean["dataSources"][0]["parameters"].get("key"): body_clean["dataSources"][0]["parameters"]["key"] = "*****" @@ -297,62 +528,62 @@ def prepare_body_headers_with_data(self, request, **kwargs): body_clean["dataSources"][0]["parameters"]["connectionString"] = "*****" if body_clean["dataSources"][0]["parameters"].get("embeddingKey"): body_clean["dataSources"][0]["parameters"]["embeddingKey"] = "*****" - + logging.debug(f"REQUEST BODY: {json.dumps(body_clean, indent=4)}") headers = { - 'Content-Type': 'application/json', - 'api-key': key, - "x-ms-useragent": "GitHubSampleWebApp/PublicAPI/3.0.0" + "Content-Type": "application/json", + "api-key": key, + "x-ms-useragent": "GitHubSampleWebApp/PublicAPI/3.0.0", } return body, headers # Format chat response with no streaming output def formatApiResponseNoStreaming(self, rawResponse): - if 'error' in rawResponse: + if "error" in rawResponse: return {"error": rawResponse["error"]} response = { "id": rawResponse["id"], "model": rawResponse["model"], "created": rawResponse["created"], "object": rawResponse["object"], - "choices": [{ - "messages": [] - }], + "choices": [{"messages": []}], } toolMessage = { "role": "tool", - "content": rawResponse["choices"][0]["message"]["context"]["messages"][0]["content"] + "content": rawResponse["choices"][0]["message"]["context"]["messages"][0][ + "content" + ], } assistantMessage = { "role": "assistant", - "content": rawResponse["choices"][0]["message"]["content"] + "content": rawResponse["choices"][0]["message"]["content"], } response["choices"][0]["messages"].append(toolMessage) response["choices"][0]["messages"].append(assistantMessage) return response - + # Format chat response with streaming output def formatApiResponseStreaming(self, rawResponse): - if 'error' in rawResponse: + if "error" in rawResponse: return {"error": rawResponse["error"]} response = { "id": rawResponse["id"], "model": rawResponse["model"], "created": rawResponse["created"], "object": rawResponse["object"], - "choices": [{ - "messages": [] - }], + "choices": [{"messages": []}], } if rawResponse["choices"][0]["delta"].get("context"): messageObj = { "delta": { "role": "tool", - "content": rawResponse["choices"][0]["delta"]["context"]["messages"][0]["content"] + "content": rawResponse["choices"][0]["delta"]["context"][ + "messages" + ][0]["content"], } } response["choices"][0]["messages"].append(messageObj) @@ -380,10 +611,12 @@ def formatApiResponseStreaming(self, rawResponse): response["choices"][0]["messages"].append(messageObj) return response - + # Stream chat response with appropriate role referencing data source - @conversation_client.log_stream - def stream_with_data(self, body, headers, endpoint, message_uuid, history_metadata={}): + @conversation_client.log_stream + def stream_with_data( + self, body, headers, endpoint, message_uuid, history_metadata={} + ): s = requests.Session() try: with s.post(endpoint, json=body, headers=headers, stream=True) as r: @@ -393,50 +626,62 @@ def stream_with_data(self, body, headers, endpoint, message_uuid, history_metada "model": "", "created": 0, "object": "", - "choices": [{ - "messages": [] - }], + "choices": [{"messages": []}], "apim-request-id": "", - 'history_metadata': history_metadata + "history_metadata": history_metadata, } if line: - if self.AZURE_OPENAI_PREVIEW_API_VERSION == '2023-06-01-preview': - lineJson = json.loads(line.lstrip(b'data:').decode('utf-8')) + if ( + self.env_params.AZURE_OPENAI_PREVIEW_API_VERSION + == "2023-06-01-preview" + ): + lineJson = json.loads(line.lstrip(b"data:").decode("utf-8")) else: try: - rawResponse = json.loads(line.lstrip(b'data:').decode('utf-8')) + rawResponse = json.loads( + line.lstrip(b"data:").decode("utf-8") + ) lineJson = self.formatApiResponseStreaming(rawResponse) except json.decoder.JSONDecodeError: continue - if 'error' in lineJson: + if "error" in lineJson: yield self.format_as_ndjson(lineJson) response["id"] = message_uuid response["model"] = lineJson["model"] response["created"] = lineJson["created"] response["object"] = lineJson["object"] - response["apim-request-id"] = r.headers.get('apim-request-id') + response["apim-request-id"] = r.headers.get("apim-request-id") - role = lineJson["choices"][0]["messages"][0]["delta"].get("role") + role = lineJson["choices"][0]["messages"][0]["delta"].get( + "role" + ) if role == "tool": - response["choices"][0]["messages"].append(lineJson["choices"][0]["messages"][0]["delta"]) + response["choices"][0]["messages"].append( + lineJson["choices"][0]["messages"][0]["delta"] + ) yield self.format_as_ndjson(response) - elif role == "assistant": - if response['apim-request-id'] and self.DEBUG_LOGGING: - logging.debug(f"RESPONSE apim-request-id: {response['apim-request-id']}") - response["choices"][0]["messages"].append({ - "role": "assistant", - "content": "" - }) + elif role == "assistant": + if ( + response["apim-request-id"] + and self.env_params.DEBUG_LOGGING + ): + logging.debug( + f"RESPONSE apim-request-id: {response['apim-request-id']}" + ) + response["choices"][0]["messages"].append( + {"role": "assistant", "content": ""} + ) yield self.format_as_ndjson(response) else: - deltaText = lineJson["choices"][0]["messages"][0]["delta"]["content"] + deltaText = lineJson["choices"][0]["messages"][0]["delta"][ + "content" + ] if deltaText != "[DONE]": - response["choices"][0]["messages"].append({ - "role": "assistant", - "content": deltaText - }) + response["choices"][0]["messages"].append( + {"role": "assistant", "content": deltaText} + ) yield self.format_as_ndjson(response) except Exception as e: yield self.format_as_ndjson({"error" + str(e)}) @@ -447,7 +692,7 @@ def stream_without_data(self, response, message_uuid, history_metadata={}): responseText = "" for line in response: if line["choices"]: - deltaText = line["choices"][0]["delta"].get('content') + deltaText = line["choices"][0]["delta"].get("content") else: deltaText = "" if deltaText and deltaText != "[DONE]": @@ -458,12 +703,9 @@ def stream_without_data(self, response, message_uuid, history_metadata={}): "model": line["model"], "created": line["created"], "object": line["object"], - "choices": [{ - "messages": [{ - "role": "assistant", - "content": responseText - }] - }], - "history_metadata": history_metadata + "choices": [ + {"messages": [{"role": "assistant", "content": responseText}]} + ], + "history_metadata": history_metadata, } - yield self.format_as_ndjson(response_obj) \ No newline at end of file + yield self.format_as_ndjson(response_obj) diff --git a/backend/tests/test_dynamic_form_orchestrator.py b/backend/tests/test_dynamic_form_orchestrator.py new file mode 100644 index 0000000000..c9428f820d --- /dev/null +++ b/backend/tests/test_dynamic_form_orchestrator.py @@ -0,0 +1,79 @@ +""" +Test DynamicFormOrchestrator +""" + +import sys +import os +from dotenv import load_dotenv + +load_dotenv() +from pathlib import Path + +# Hacky way to allow imports to python modules in backend folder, in the scenario where this repo is not properly packaged as a python package +sys.path.append(str(Path(__file__).parent.parent.parent.absolute())) + +from backend.orchestrators.DynamicFormOrchestrator import ( + DynamicFormOrchestrator, + get_simple_azure_search_config, + get_data_source_config, +) +from backend.orchestrators.Orchestrator import extract_env_params_into_simple_namespace +from azure.identity import DefaultAzureCredential, get_bearer_token_provider +import openai + + +def simple_chat_completion(data_source_config): + """Convenience function to use the data_source_config to feed into a fixed chat completion calling""" + # Construct AOAI chat completion from scratch, then inject the data source config + azure_endpoint = "https://msrchat-aoai.openai.azure.com/" + api_version = "2024-05-01-preview" + api_deployment = "gpt-4o" + azure_ad_token_provider = get_bearer_token_provider( + DefaultAzureCredential(), + "https://cognitiveservices.azure.com/.default", + ) + + aoai_client = openai.AzureOpenAI( + azure_endpoint=azure_endpoint, + api_version=api_version, + azure_ad_token_provider=azure_ad_token_provider, + ) + + response = aoai_client.chat.completions.create( + model=api_deployment, + messages=[ + { + "role": "user", + "content": "Describe Microsoft research in 10 words. ", + }, + ], + extra_body={"data_sources": [data_source_config]}, + ) + return response + + +def test_get_simple_azure_search_config(): + search_endpoint = f"https://msrchatss.search.windows.net" + search_index = "msrchatindex" + data_source_config = get_simple_azure_search_config( + azure_search_endpoint=search_endpoint, + azure_search_index=search_index, + ) + response = simple_chat_completion(data_source_config) + print() + print(response.choices[0].message.content) + + +def test_get_data_source_config(): + env_namespace = extract_env_params_into_simple_namespace() + env_dict = env_namespace.__dict__ + + data_source_config = get_data_source_config( + data_source_type="azure_search", + env_dict=env_dict, + system_message="", + ) + + response = simple_chat_completion(data_source_config) + print() + print(response.choices[0].message.content) diff --git a/build.sh b/build.sh new file mode 100644 index 0000000000..2fabb6244a --- /dev/null +++ b/build.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# Ensure that the required environment variables are set +if [ -z "$AZURE_ARTIFACTS_PAT" ]; then + echo "AZURE_ARTIFACTS_PAT environment variable is not set. Exiting." + exit 1 +fi + +# Create pip.conf with authentication details +export POETRY_HTTP_BASIC_TNRDEV_USERNAME=agent +export POETRY_HTTP_BASIC_TNRDEV_PASSWORD=$AZURE_ARTIFACTS_PAT + +set -x # Enable debugging + +echo "Username: $POETRY_HTTP_BASIC_TNRDEV_USERNAME" +echo "Password: $POETRY_HTTP_BASIC_TNRDEV_PASSWORD" + +echo "pip authenticated to Azure Artifacts successfully." \ No newline at end of file diff --git a/frontend/src/api/api.ts b/frontend/src/api/api.ts index 12efabf690..f88b1b9008 100644 --- a/frontend/src/api/api.ts +++ b/frontend/src/api/api.ts @@ -2,15 +2,19 @@ import { UserInfo, ConversationRequest, Conversation, ChatMessage, CosmosDBHealt import { chatHistorySampleData } from "../constants/chatHistory"; import { SpeechConfig, AutoDetectSourceLanguageConfig, AudioConfig, SpeechRecognizer, ResultReason } from "microsoft-cognitiveservices-speech-sdk"; -export async function conversationApi(options: ConversationRequest, abortSignal: AbortSignal): Promise { +export async function conversationApi(options: ConversationRequest, abortSignal: AbortSignal, file?: File): Promise { + console.log("conversationApi", options, file); + const formData = new FormData(); + + if (file) { + formData.append("file", file); + } + + formData.append("messages", JSON.stringify(options.messages)); + const response = await fetch("/conversation", { method: "POST", - headers: { - "Content-Type": "application/json" - }, - body: JSON.stringify({ - messages: options.messages - }), + body: formData, signal: abortSignal }); diff --git a/frontend/src/components/Answer/Answer.tsx b/frontend/src/components/Answer/Answer.tsx index ffb26478b9..5a062bb93a 100644 --- a/frontend/src/components/Answer/Answer.tsx +++ b/frontend/src/components/Answer/Answer.tsx @@ -276,9 +276,6 @@ export const Answer = ({ })} } - - AI-generated content may be incorrect - { diff --git a/frontend/src/components/DynamicForm/DynamicForm.tsx b/frontend/src/components/DynamicForm/DynamicForm.tsx new file mode 100644 index 0000000000..54d4ad9c5f --- /dev/null +++ b/frontend/src/components/DynamicForm/DynamicForm.tsx @@ -0,0 +1,56 @@ +import * as React from 'react'; +import { DynamicFormStyles } from './DynamicFormStyles'; +import { IDynamicFormField } from './DynamicFormModels'; +import { DynamicFormField } from './DynamicFormField'; +import { Button, Card, Title3 } from '@fluentui/react-components'; + + +export interface IDynamicFormProps { + formTitle: string; + fields: IDynamicFormField[]; + onClearAllClick?: () => void; +} + +export const DynamicForm: React.FunctionComponent = (props: React.PropsWithChildren) => { + const styles = DynamicFormStyles(); + + const onFieldChange = (value: string, field: IDynamicFormField) => { + field.value = value; + }; + + const onSubmitClick = () => { + const fields = props.fields.map((field) => `${field.label}: ${field.value}`).join('\n'); + const blob = new Blob([fields], { type: 'text/plain' }); + const url = URL.createObjectURL(blob); + const a = document.createElement + ('a'); + a.href + = url; + let fileName = String(props.fields.find((field) => field.name === 'fullName')?.value) || 'userprofile'; + fileName = fileName.replace(/[^a-zA-Z0-9]/g, '') + 'Profile.txt'; + a.download = fileName; + a.click(); + URL.revokeObjectURL(url); + } + + return ( + + {props.formTitle} + { + props.fields + .sort((a, b) => (a.order !== undefined && b.order !== undefined) ? a.order - b.order : 0) + .map((field: IDynamicFormField) => ( + onFieldChange(value, field)} + /> + )) + } +
+ + +
+
+ ); +}; \ No newline at end of file diff --git a/frontend/src/components/DynamicForm/DynamicFormData.ts b/frontend/src/components/DynamicForm/DynamicFormData.ts new file mode 100644 index 0000000000..d72c6a8222 --- /dev/null +++ b/frontend/src/components/DynamicForm/DynamicFormData.ts @@ -0,0 +1,52 @@ +import { DynamicFieldType, IDynamicFormField } from "./DynamicFormModels"; + +export const DynamicFormData: IDynamicFormField[] = [ + { + name: "name", + label: "Name", + type: DynamicFieldType.text, + required: true, + order: 1, + value: "John Doe", + }, + { + name: "professionalBackground", + label: "Professional background", + type: DynamicFieldType.textarea, + required: true, + order: 2, + value: "I have a degree in Computer Science and have been working as a software developer for 5 years.", + }, + { + name: "workStylePreferences", + label: "Work Style Preferences", + type: DynamicFieldType.textarea, + required: true, + order: 3, + value: "I prefer working in a quiet environment with minimal distractions.", + }, + { + name: "socialLifestylePreferences", + label: "Social and Lifestyle Preferences", + type: DynamicFieldType.textarea, + required: true, + order: 4, + value: "I enjoy spending time with friends and family, and I like to travel.", + }, + { + name: "preferencesAndValues", + label: "Preferences and Values", + type: DynamicFieldType.textarea, + required: true, + order: 5, + value: "I value honesty, integrity, and hard work.", + }, + { + name: "funFacts", + label: "Fun Facts", + type: DynamicFieldType.textarea, + required: true, + order: 6, + value: "I value honesty, integrity, and hard work.", + }, +]; \ No newline at end of file diff --git a/frontend/src/components/DynamicForm/DynamicFormField.tsx b/frontend/src/components/DynamicForm/DynamicFormField.tsx new file mode 100644 index 0000000000..e554aa2cb2 --- /dev/null +++ b/frontend/src/components/DynamicForm/DynamicFormField.tsx @@ -0,0 +1,102 @@ +import * as React from 'react'; +import { DynamicFieldType, IDynamicFormField } from './DynamicFormModels'; +import { Field, Input, Select, SpinButton, Textarea, TextareaOnChangeData } from '@fluentui/react-components'; +import { set } from 'lodash'; + +export interface IDynamicFormFieldProps { + field: IDynamicFormField; + onChange?: (value: string) => void; +} + +export const DynamicFormField: React.FunctionComponent = (props: React.PropsWithChildren) => { + const [value, setValue] = React.useState(props.field.value); + React.useEffect(() => { + setValue(props.field.value); + }, [props.field.value]); + + const getField = (field: IDynamicFormField) => { + switch (field.type) { + case DynamicFieldType.text: + return ( + + , data?: any) => { + if (props.onChange) { + props.onChange(data?.value as string); + setValue(data?.value); + } + }} + /> + + ); + + case DynamicFieldType.number: + return ( + + + + ); + + case DynamicFieldType.date: + return ( + + ); + + case DynamicFieldType.select: + return ( + + + + ); + case DynamicFieldType.textarea: + return ( + +