-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprototype.py
More file actions
161 lines (133 loc) · 6.18 KB
/
prototype.py
File metadata and controls
161 lines (133 loc) · 6.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import cosine
# Define constants for file paths and sheet names
PIECES_FILE_PATH = "Data/Pieces.xlsx"
PURCHASES_FILE_PATH = "Data/Purchases.xlsx"
USERS_FILE_PATH = "Data/Users.xlsx"
PIECES_SHEET = "Pieces"
ROOMS_SHEET = "Rooms"
CATEGORIES_SHEET = "Categories"
AESTHETICS_SHEET = "Aesthetics"
ORDER_ITEMS_SHEET = "Order Items"
ORDERS_TABLE_SHEET = "Orders Table"
USERS_SHEET = "Users"
# Define constants for attribute names
ROOM_TYPE = "Room Type"
AESTHETIC = "Aesthetic"
CATEGORY = "Category"
PRICE = "Price"
COLOR = "Color"
# Define a color compatibility dictionary with prioritized rankings
color_matching = {
"Black": ["Black", "White", "Gray", "Mahogany", "Walnut", "Beige", "Clear", "Natural", "Oak"],
"White": ["White", "Beige", "Gray", "Black", "Clear", "Oak", "Natural", "Walnut"],
"Gray": ["Gray", "Black", "White", "Beige", "Blue", "Mahogany", "Walnut", "Oak"],
"Brown": ["Brown", "Beige", "Tan", "Walnut", "Mahogany", "Natural", "Oak", "Cherry"],
"Blue": ["Blue", "Light Blue", "Gray", "White", "Beige", "Teal", "Black"],
"Mahogany": ["Mahogany", "Brown", "Walnut", "Beige", "Cherry", "Black", "Natural"],
"Beige": ["Beige", "White", "Brown", "Gray", "Walnut", "Natural", "Oak", "Clear"],
"Clear": ["Clear", "White", "Gray", "Black", "Beige", "Natural", "Oak"],
"Oak": ["Oak", "Walnut", "Natural", "Beige", "White", "Gray", "Mahogany", "Black"],
"Walnut": ["Walnut", "Brown", "Mahogany", "Beige", "Oak", "Natural", "Black"],
"Cherry": ["Cherry", "Mahogany", "Brown", "Beige", "Walnut", "Black"],
"Teal": ["Teal", "Blue", "Gray", "White", "Beige", "Black"],
"Natural": ["Natural", "Oak", "Walnut", "Beige", "White", "Clear", "Brown"]
}
# Load data
inventory = pd.read_excel(PIECES_FILE_PATH, sheet_name=PIECES_SHEET)
order_items = pd.read_excel(PURCHASES_FILE_PATH, sheet_name=ORDER_ITEMS_SHEET)
orders_table = pd.read_excel(PURCHASES_FILE_PATH, sheet_name=ORDERS_TABLE_SHEET)
users = pd.read_excel(USERS_FILE_PATH, sheet_name=USERS_SHEET)
# Attribute weights
weights = {
ROOM_TYPE: 0.4,
AESTHETIC: 0.3,
CATEGORY: 0.05,
PRICE: 0.1,
COLOR: 0.15
}
# Normalize numerical data (Price)
def normalize(series):
return (series - series.min()) / (series.max() - series.min())
inventory[PRICE] = normalize(inventory[PRICE])
order_items["Unit Price"] = normalize(order_items["Unit Price"])
# Create encoding dictionaries for categorical values
room_types_dict = {room: idx for idx, room in enumerate(inventory[ROOM_TYPE].unique(), start=1)}
aesthetics_dict = {aesthetic: idx for idx, aesthetic in enumerate(inventory[AESTHETIC].unique(), start=1)}
categories_dict = {category: idx for idx, category in enumerate(inventory[CATEGORY].unique(), start=1)}
def color_score(purchased_color, inventory_color):
compatible = color_matching.get(purchased_color, [])
if inventory_color == purchased_color:
return 1.0
elif inventory_color in compatible[:3]:
return 0.9
elif inventory_color in compatible[:6]:
return 0.7
elif inventory_color in compatible:
return 0.5
else:
return 0.0
# Vectorize item with encoded categorical values
def weighted_vectorize(row):
return np.array([
room_types_dict.get(row.get(ROOM_TYPE), 0) / max(room_types_dict.values()) * weights[ROOM_TYPE],
aesthetics_dict.get(row.get(AESTHETIC), 0) / max(aesthetics_dict.values()) * weights[AESTHETIC],
categories_dict.get(row.get(CATEGORY), 0) / max(categories_dict.values()) * weights[CATEGORY],
float(row[PRICE]) * weights[PRICE] if pd.notna(row[PRICE]) else 0,
])
# Enhanced Similarity Calculation with Distance Penalty
def compute_similarity(vector1, vector2, purchased_color, inventory_color):
vector1 = np.array(vector1, dtype=float)
vector2 = np.array(vector2, dtype=float)
similarity = 1 - cosine(vector1, vector2)
# Color Bonus — Reduced to Avoid Overcompensation
if inventory_color in color_matching.get(purchased_color, []):
similarity = min(similarity + 0.05, 1.0)
# Distance Penalty for More Precision in Similarity
distance_penalty = np.linalg.norm(vector1 - vector2) * 0.2
similarity = max(0, similarity - distance_penalty)
return similarity
# Process each user's last purchases
for _, user in users.iterrows():
user_id = user["User ID"]
purchase_history = eval(user["Purchase History"])
if not purchase_history:
print(f"No purchase history for User ID {user_id}")
continue
# --- Get Last Purchased Item Only ---
last_purchase_id = purchase_history[-1]
last_purchased_item = inventory[inventory["ID"] == last_purchase_id]
if last_purchased_item.empty:
print(f"Last purchased item not found for User ID {user_id}")
continue
last_vector = weighted_vectorize(last_purchased_item.iloc[0])
last_color = last_purchased_item.iloc[0][COLOR]
recommendations = []
for _, candidate in inventory.iterrows():
if candidate["ID"] in purchase_history:
continue
candidate_vector = weighted_vectorize(candidate)
base_similarity = 1 - cosine(last_vector, candidate_vector)
base_similarity = max(0, base_similarity) # Handle NaN
# Apply color influence
color_weight = color_score(last_color, candidate[COLOR])
final_score = base_similarity * 0.85 + color_weight * 0.15
recommendations.append((candidate["ID"], final_score))
top_recommendations = sorted(recommendations, key=lambda x: x[1], reverse=True)[:10]
print(f"\nTop Recommendations for User ID {user_id}:")
print(top_recommendations)
# Plot Results
if top_recommendations:
product_ids = [item[0] for item in top_recommendations]
scores = [item[1] for item in top_recommendations]
plt.figure(figsize=(10, 6))
plt.bar(range(len(product_ids)), scores, tick_label=product_ids, color='skyblue')
plt.title(f'Top 10 Recommendations for User {user_id}')
plt.xlabel('Product ID')
plt.ylabel('Similarity Score')
plt.tight_layout()
plt.show()
else:
print(f"No recommendations found for User ID {user_id}.")