Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ return render_template('page.html', settings=public_settings)

## Version Management

- Its important to update the version at the end of every plan
- Version is stored in `config.py`: `VERSION = "X.XXX.XXX"`
- When incrementing, only change the third segment (e.g., `0.238.024` -> `0.238.025`)
- Include the current version in functional test file headers and documentation files
Expand All @@ -83,7 +84,7 @@ return render_template('page.html', settings=public_settings)

## Release Notes

After completing code changes, offer to update `docs/explanation/release_notes.md`.
After completing plans and code changes, offer to update `docs/explanation/release_notes.md`.

- Add entries under the current version from `config.py`
- If the version was bumped, create a new section at the top: `### **(vX.XXX.XXX)**`
Expand Down
12 changes: 8 additions & 4 deletions application/single_app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
EXECUTOR_TYPE = 'thread'
EXECUTOR_MAX_WORKERS = 30
SESSION_TYPE = 'filesystem'
VERSION = "0.239.002"
VERSION = "0.239.007"

SECRET_KEY = os.getenv('SECRET_KEY', 'dev-secret-key-change-in-production')

Expand Down Expand Up @@ -257,6 +257,8 @@ def get_redis_cache_infrastructure_endpoint(redis_hostname: str) -> str:
storage_account_user_documents_container_name = "user-documents"
storage_account_group_documents_container_name = "group-documents"
storage_account_public_documents_container_name = "public-documents"
storage_account_personal_chat_container_name = "personal-chat"
storage_account_group_chat_container_name = "group-chat"

# Initialize Azure Cosmos DB client
cosmos_endpoint = os.getenv("AZURE_COSMOS_ENDPOINT")
Expand Down Expand Up @@ -745,9 +747,11 @@ def initialize_clients(settings):
# This addresses the issue where the application assumes containers exist
if blob_service_client:
for container_name in [
storage_account_user_documents_container_name,
storage_account_group_documents_container_name,
storage_account_public_documents_container_name
storage_account_user_documents_container_name,
storage_account_group_documents_container_name,
storage_account_public_documents_container_name,
storage_account_personal_chat_container_name,
storage_account_group_chat_container_name
]:
try:
container_client = blob_service_client.get_container_client(container_name)
Expand Down
101 changes: 100 additions & 1 deletion application/single_app/functions_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ def generate_embedding(
embedding_model = selected_embedding_model['deploymentName']

while True:
random_delay = random.uniform(0.5, 2.0)
random_delay = random.uniform(0.05, 0.2)
time.sleep(random_delay)

try:
Expand Down Expand Up @@ -385,3 +385,102 @@ def generate_embedding(

except Exception as e:
raise

def generate_embeddings_batch(
texts,
batch_size=16,
max_retries=5,
initial_delay=1.0,
delay_multiplier=2.0
):
"""Generate embeddings for multiple texts in batches.

Azure OpenAI embeddings API accepts a list of strings as input.
This reduces per-call overhead and delay significantly.

Args:
texts: List of text strings to embed.
batch_size: Number of texts per API call (default 16).
max_retries: Max retries on rate limit errors.
initial_delay: Initial retry delay in seconds.
delay_multiplier: Multiplier for exponential backoff.

Returns:
list of (embedding, token_usage) tuples, one per input text.
"""
settings = get_settings()

enable_embedding_apim = settings.get('enable_embedding_apim', False)

if enable_embedding_apim:
embedding_model = settings.get('azure_apim_embedding_deployment')
embedding_client = AzureOpenAI(
api_version=settings.get('azure_apim_embedding_api_version'),
azure_endpoint=settings.get('azure_apim_embedding_endpoint'),
api_key=settings.get('azure_apim_embedding_subscription_key'))
else:
if (settings.get('azure_openai_embedding_authentication_type') == 'managed_identity'):
token_provider = get_bearer_token_provider(DefaultAzureCredential(), cognitive_services_scope)

embedding_client = AzureOpenAI(
api_version=settings.get('azure_openai_embedding_api_version'),
azure_endpoint=settings.get('azure_openai_embedding_endpoint'),
azure_ad_token_provider=token_provider
)

embedding_model_obj = settings.get('embedding_model', {})
if embedding_model_obj and embedding_model_obj.get('selected'):
selected_embedding_model = embedding_model_obj['selected'][0]
embedding_model = selected_embedding_model['deploymentName']
else:
embedding_client = AzureOpenAI(
api_version=settings.get('azure_openai_embedding_api_version'),
azure_endpoint=settings.get('azure_openai_embedding_endpoint'),
api_key=settings.get('azure_openai_embedding_key')
)

embedding_model_obj = settings.get('embedding_model', {})
if embedding_model_obj and embedding_model_obj.get('selected'):
selected_embedding_model = embedding_model_obj['selected'][0]
embedding_model = selected_embedding_model['deploymentName']

results = []
for i in range(0, len(texts), batch_size):
batch = texts[i:i + batch_size]
retries = 0
current_delay = initial_delay

while True:
random_delay = random.uniform(0.05, 0.2)
time.sleep(random_delay)

try:
response = embedding_client.embeddings.create(
model=embedding_model,
input=batch
)

for item in response.data:
token_usage = None
if hasattr(response, 'usage') and response.usage:
token_usage = {
'prompt_tokens': response.usage.prompt_tokens // len(batch),
'total_tokens': response.usage.total_tokens // len(batch),
'model_deployment_name': embedding_model
}
results.append((item.embedding, token_usage))
break

except RateLimitError as e:
retries += 1
if retries > max_retries:
raise

wait_time = current_delay * random.uniform(1.0, 1.5)
time.sleep(wait_time)
current_delay *= delay_multiplier

except Exception as e:
raise

return results
Loading