-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathadd_url_fields.py
More file actions
132 lines (112 loc) · 3.64 KB
/
add_url_fields.py
File metadata and controls
132 lines (112 loc) · 3.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/env python3
"""
Add URL tracking fields to Azure Search index
"""
import os
import requests
import sys
# Load from .env file
env_path = os.path.join(os.path.dirname(__file__), "ui", ".env")
env_vars = {}
if os.path.exists(env_path):
with open(env_path) as f:
for line in f:
if '=' in line and not line.startswith('#'):
key, value = line.strip().split('=', 1)
env_vars[key] = value
SEARCH_ENDPOINT = env_vars.get("SEARCH_ENDPOINT")
SEARCH_KEY = env_vars.get("SEARCH_KEY")
SEARCH_INDEX_NAME = env_vars.get("SEARCH_INDEX_NAME", "segments")
print(f"Endpoint: {SEARCH_ENDPOINT}")
print(f"Index: {SEARCH_INDEX_NAME}")
print(f"Key: {'*' * 10}{SEARCH_KEY[-4:] if SEARCH_KEY else 'NOT FOUND'}")
print()
if not SEARCH_ENDPOINT or not SEARCH_KEY:
print("ERROR: Missing SEARCH_ENDPOINT or SEARCH_KEY in .env")
sys.exit(1)
API_VERSION = "2024-07-01"
def get_index():
url = f"{SEARCH_ENDPOINT}/indexes/{SEARCH_INDEX_NAME}?api-version={API_VERSION}"
headers = {"api-key": SEARCH_KEY}
print(f"Fetching index: {url}")
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.json()
else:
print(f"Failed to get index: {response.status_code}")
print(response.text)
return None
def update_index(index_def):
url = f"{SEARCH_ENDPOINT}/indexes/{SEARCH_INDEX_NAME}?api-version={API_VERSION}"
headers = {
"Content-Type": "application/json",
"api-key": SEARCH_KEY
}
response = requests.put(url, headers=headers, json=index_def)
if response.status_code in [200, 201]:
print("✅ Index updated successfully!")
return True
else:
print(f"❌ Failed to update: {response.status_code}")
print(response.text)
return False
def main():
print("Fetching current index...")
index = get_index()
if not index:
sys.exit(1)
existing_fields = {f["name"] for f in index.get("fields", [])}
print(f"Existing fields: {existing_fields}")
print()
new_fields = [
{
"name": "source_url",
"type": "Edm.String",
"searchable": False,
"filterable": True,
"retrievable": True,
"sortable": False,
"facetable": False,
"key": False
},
{
"name": "source_type",
"type": "Edm.String",
"searchable": False,
"filterable": True,
"retrievable": True,
"sortable": False,
"facetable": True,
"key": False
},
{
"name": "processed_at",
"type": "Edm.DateTimeOffset",
"searchable": False,
"filterable": True,
"retrievable": True,
"sortable": True,
"facetable": False,
"key": False
}
]
added = 0
for field in new_fields:
if field["name"] in existing_fields:
print(f"⚠️ Already exists: {field['name']}")
else:
print(f"➕ Adding: {field['name']}")
index["fields"].append(field)
added += 1
if added == 0:
print("\n✅ All fields already present!")
return
print(f"\n💾 Saving with {added} new fields...")
if update_index(index):
print("\n🎉 SUCCESS! URL tracking fields added.")
print("\nNext steps:")
print("1. Restart your Streamlit app")
print("2. Go to 'System Diagnostics' page")
print("3. Click 'Check Index Schema' to verify")
if __name__ == "__main__":
main()