-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstatic_graphs.py
More file actions
488 lines (401 loc) · 19.5 KB
/
static_graphs.py
File metadata and controls
488 lines (401 loc) · 19.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import plotly.express as px
from data_cleaner import DataCleaner
from sklearn.preprocessing import StandardScaler
df_raw = pd.read_csv('car_prices.csv')
df = DataCleaner(df_raw).clean_data()
"""
Numerical features: ['year', 'condition', 'odometer', 'mmr', 'sellingprice']
Categorical features: ['make', 'model', 'trim', 'body', 'transmission', 'state', 'color', 'interior', 'seller', 'saledate', 'salemonth', 'saleday', 'saleyear']
"""
# 1.========= Line Plot: Total Sales Volume Over Time =========
sales_volume = df.groupby('year')['sellingprice'].sum()
plt.figure(figsize=(10, 6))
plt.plot(sales_volume.index, sales_volume.values, color='skyblue', linewidth=2)
plt.title('Total Sales Volume Over Time', fontsize=20, fontfamily='serif', color='blue')
plt.xlabel('Year', fontsize=15, fontfamily='serif', color='darkred')
plt.ylabel('Total Sales Volume', fontsize=15, fontfamily='serif', color='darkred')
plt.grid(True)
plt.show()
# 2.========= Histogram Plot with KDE: Distribution of Selling Prices =========
plt.figure(figsize=(10, 6))
sns.histplot(df['sellingprice'], kde=True, color='skyblue', bins=30)
plt.title('Distribution of Selling Prices', fontsize=20, fontfamily='serif', color='blue')
plt.xlabel('Selling Price', fontsize=15, fontfamily='serif', color='darkred')
plt.ylabel('Frequency', fontsize=15, fontfamily='serif', color='darkred')
plt.grid(True)
plt.tight_layout()
plt.show()
# 3.========= KDE Plot: Distribution of Odometer Reading =========
plt.figure(figsize=(10, 6))
sns.kdeplot(df['odometer'], color='skyblue', shade=True)
plt.title('Distribution of Odometer Readings', fontsize=20, fontfamily='serif', color='blue')
plt.xlabel('Odometer Reading', fontsize=15, fontfamily='serif', color='darkred')
plt.ylabel('Density', fontsize=15, fontfamily='serif', color='darkred')
plt.grid(True)
plt.tight_layout()
plt.show()
# 4.========= Bar Plot (Grouped): Average Selling Prices by Body and Condition =========
plt.figure(figsize=(10, 6))
bar_plot = sns.barplot(x='body', y='sellingprice', hue='condition', data=df, ci=None)
plt.title('Average Selling Prices by Body and Condition')
plt.xlabel('Body')
plt.ylabel('Average Selling Price')
plt.xticks(rotation=45)
legend = plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) # Move the legend to the right
legend.set_title('Condition')
plt.tight_layout()
plt.show()
# 5.========= Count Plot: Frequency of Different Vehicle body =========
plt.figure(figsize=(10, 6))
sns.countplot(x='body', data=df)
plt.title('Count plot of Different Vehicle Body')
plt.xlabel('Body')
plt.ylabel('Frequency')
plt.xticks(rotation=45) # Rotate x-axis labels for better visibility if they are long
plt.tight_layout()
plt.show()
# 6.========= Pie Chart: Market Share of Top 10 Vehicle Makes =========
top_10_makes = df['make'].value_counts().head(10)
plt.figure(figsize=(10, 6))
top_10_makes.plot(kind='pie', autopct='%1.2f%%')
plt.title('Market Share of Top 10 Vehicle Makes', fontsize=20, fontfamily='serif', color='blue')
plt.tight_layout()
plt.show()
# 7.========= Scatter Plot with Regression Line: Selling Price vs. Odometer Readings =========
plt.figure(figsize=(10, 6))
sns.regplot(x='odometer', y='sellingprice', data=df, color='skyblue', line_kws={'color': 'red'})
plt.title('Selling Price vs. Odometer Readings', fontsize=20, fontfamily='serif', color='blue')
plt.xlabel('Odometer Reading', fontsize=15, fontfamily='serif', color='darkred')
plt.ylabel('Selling Price', fontsize=15, fontfamily='serif', color='darkred')
plt.grid(True)
plt.tight_layout()
plt.show()
# 8.========= Pair Plot: Relationships Between Selling Price, MMR, Odometer, and Year =========
selected_columns = df[['sellingprice', 'mmr', 'odometer', 'year']]
sns.pairplot(selected_columns)
plt.subplots_adjust(top=0.9)
plt.suptitle('Relationships Between Selling Price, MMR, Odometer, and Year', fontsize=20, fontfamily='serif', color='blue')
plt.show()
# 9.========= Heatmap with Color Bar: Correlation Between Numerical Variables =========
corr_matrix = df[['year', 'condition', 'odometer', 'mmr', 'sellingprice']].corr()
plt.figure(figsize=(10, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Between Numerical Variables', fontsize=20, fontfamily='serif', color='blue')
plt.tight_layout()
plt.show()
# # 10.========= Box Plot: Selling Price Distribution by Vehicle Condition =========
plt.figure(figsize=(10, 6))
sns.boxplot(x='condition', y='sellingprice', data=df, palette='coolwarm')
plt.title('Selling Price Distribution by Vehicle Condition', fontsize=20, fontfamily='serif', color='blue')
plt.xlabel('Vehicle Condition', fontsize=15, fontfamily='serif', color='darkred')
plt.ylabel('Selling Price', fontsize=15, fontfamily='serif', color='darkred')
plt.grid(True)
plt.tight_layout()
plt.show()
# 11.========= Violin Plot: Selling Price Distribution by Year of Manufacture =========
plt.figure(figsize=(15, 8))
sns.violinplot(x='year', y='sellingprice', data=df, palette='coolwarm')
plt.title('Selling Price Distribution by Year of Manufacture', fontsize=20, fontfamily='serif', color='blue')
plt.xlabel('Year of Manufacture', fontsize=15, fontfamily='serif', color='darkred')
plt.ylabel('Selling Price', fontsize=15, fontfamily='serif', color='darkred')
plt.grid(True)
plt.tight_layout()
plt.show()
# 12.========= Strip Plot: Selling Prices Across Different Body Types =========
plt.figure(figsize=(15, 8))
sns.stripplot(x='body', y='sellingprice', data=df, palette='coolwarm')
plt.title('Selling Prices Across Different Body Types', fontsize=20, fontfamily='serif', color='blue')
plt.xlabel('Body Type', fontsize=15, fontfamily='serif', color='darkred')
plt.ylabel('Selling Price', fontsize=15, fontfamily='serif', color='darkred')
plt.grid(True)
plt.tight_layout()
plt.show()
# 13.========= Area Plot: Stacked Sales Volume by Top 5 Makes Over Time =========
sales_volume = df.groupby(['make', 'year'])['sellingprice'].sum()
sales_volume_df = sales_volume.unstack(level=0)
total_sales_volume = sales_volume_df.sum()
top_5_makes = total_sales_volume.nlargest(5).index
sales_volume_top_5 = sales_volume_df[top_5_makes]
plt.figure(figsize=(10, 6))
sales_volume_top_5.plot.area(stacked=True)
plt.title('Stacked Sales Volume by Top 5 Makes Over Time', fontsize=15, fontfamily='serif', color='blue')
plt.xlabel('Year', fontsize=12, fontfamily='serif', color='darkred')
plt.ylabel('Sales Volume', fontsize=12, fontfamily='serif', color='darkred')
plt.grid(True)
plt.show()
# 14.========= Stacked Bar Plot: Proportion of Sales by Condition for Each Year =========
pivot_table = df.pivot_table(index='year', columns='condition', values='sellingprice', aggfunc='count', fill_value=0)
proportions = pivot_table.divide(pivot_table.sum(axis=1), axis=0)
proportions.plot(kind='bar', stacked=True, figsize=(15, 8))
plt.title('Proportion of Sales by Condition for Each Year', fontsize=20, fontfamily='serif', color='blue')
plt.xlabel('Year', fontsize=15, fontfamily='serif', color='darkred')
plt.ylabel('Proportion', fontsize=15, fontfamily='serif', color='darkred')
plt.grid(True)
plt.tight_layout()
plt.show()
# 15.========= Joint Plot: KDE and Scatter Representation of Selling Price vs. MMR =========
sns.jointplot(x='mmr', y='sellingprice', data=df, kind='scatter', color='skyblue',
marginal_kws=dict(bins=30, fill=True))
plt.suptitle('Selling Price vs. MMR', fontsize=20, fontfamily='serif', color='blue')
plt.xlabel('MMR', fontsize=15, fontfamily='serif', color='darkred')
plt.ylabel('Selling Price', fontsize=15, fontfamily='serif', color='darkred')
plt.grid(True)
plt.tight_layout()
plt.show()
# 16.========= Hexbin Plot: Density of Data Points for Selling Price vs. MMR =========
plt.figure(figsize=(10, 6))
plt.hexbin(df['mmr'], df['sellingprice'], gridsize=30, cmap='Blues')
cb = plt.colorbar(label='count in bin')
plt.title('Density of Data Points for Selling Price vs. MMR', fontsize=20, fontfamily='serif', color='blue')
plt.xlabel('MMR', fontsize=15, fontfamily='serif', color='darkred')
plt.ylabel('Selling Price', fontsize=15, fontfamily='serif', color='darkred')
plt.grid(True)
plt.xlim(0, df['mmr'].max()/2)
plt.ylim(0, df['sellingprice'].max()/2)
plt.tight_layout()
plt.show()
# 17.========= 3D Scatter Plot: Year, Selling Price, and Odometer Readings =========
fig = px.scatter_3d(df, x='year', y='sellingprice', z='odometer', color='year')
fig.update_layout(
title={
'text': 'Year, Selling Price, and Odometer Readings',
'font': dict(
family="Serif",
size=20,
color='blue'
)
},
scene=dict(
xaxis_title='Year',
yaxis_title='Selling Price',
zaxis_title='Odometer Reading',
xaxis=dict(
titlefont=dict(
family="Serif",
size=15,
color="darkred"
)
),
yaxis=dict(
titlefont=dict(
family="Serif",
size=15,
color="darkred"
)
),
zaxis=dict(
titlefont=dict(
family="Serif",
size=15,
color="darkred"
)
)
)
)
fig.show()
# TODO - change
# 18.========= Swarm Plot: Selling Prices by Make for Top 5 Selling Makes =========
plt.figure(figsize=(10, 6))
sns.swarmplot(x='make', y='sellingprice', data=df)
# plt.title('Selling Prices by Make', fontsize=20, fontfamily='serif', color='blue')
# plt.xlabel('Make', fontsize=15, fontfamily='serif', color='darkred')
# plt.ylabel('Selling Price', fontsize=15, fontfamily='serif', color='darkred')
#
# plt.grid(True)
# plt.tight_layout()
plt.show()
plt.savefig('C:\\Github\\InformationVisualization_TermProject\\staticgraphs\\18_selling_prices_top_5_makes.png')
# 19.========= QQ-Plot: Compare Selling Price Distribution to Normal Distribution =========
import matplotlib.pyplot as plt
from scipy.stats import probplot
plt.figure(figsize=(10, 6))
probplot(df['sellingprice'], plot=plt)
plt.title('QQ-Plot: Compare Selling Price Distribution to Normal Distribution', fontsize=20, fontfamily='serif',
color='blue')
plt.xlabel('Theoretical Quantiles', fontsize=15, fontfamily='serif', color='darkred')
plt.ylabel('Ordered Values', fontsize=15, fontfamily='serif', color='darkred')
plt.grid(True)
plt.tight_layout()
plt.gca().xaxis.set_major_formatter(plt.FuncFormatter('{:.2f}'.format))
plt.gca().yaxis.set_major_formatter(plt.FuncFormatter('{:.2f}'.format))
plt.show()
# 20.========= Dist Plot: Compare Selling Price Distributions for Different Transmission Types =========
transmission_types = df['transmission'].unique()
plt.figure(figsize=(10, 6))
for transmission in transmission_types:
subset = df[df['transmission'] == transmission]
sns.distplot(subset['sellingprice'], hist=False, kde=True,
kde_kws={'linewidth': 3},
label=transmission)
plt.title('Selling Price Distributions for Different Transmission Types', fontsize=20, fontfamily='serif', color='blue')
plt.xlabel('Selling Price', fontsize=15, fontfamily='serif', color='darkred')
plt.ylabel('Density', fontsize=15, fontfamily='serif', color='darkred')
plt.grid(True)
plt.tight_layout()
plt.legend(prop={'size': 10}, title='Transmission Type')
plt.show()
plt.savefig('C:\\Github\\InformationVisualization_TermProject\\staticgraphs\\20_selling_price_distribution_transmission.png')
# TODO - change
# 21.========= Cluster Map: Hierarchical Clustering of Vehicles Based on Numerical Features =========
import seaborn as sns
numerical_features = df[['year', 'condition', 'odometer', 'mmr', 'sellingprice']]
scaler = StandardScaler()
scaled_features = scaler.fit_transform(numerical_features)
df_scaled = pd.DataFrame(scaled_features, columns=numerical_features.columns)
sns.clustermap(df_scaled, cmap='coolwarm', standard_scale=1)
plt.show()
plt.savefig('C:\\Github\\InformationVisualization_TermProject\\staticgraphs\\21_cluster_map.png')
# 22.========= Contour Plot: Density of Sales in the Space of Year and Selling Price =========
import seaborn as sns
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
sns.kdeplot(data=df, x='year', y='sellingprice', fill=True)
plt.title('Density of Sales in the Space of Year and Selling Price', fontsize=20, fontfamily='serif', color='blue')
plt.xlabel('Year', fontsize=15, fontfamily='serif', color='darkred')
plt.ylabel('Selling Price', fontsize=15, fontfamily='serif', color='darkred')
plt.grid(True)
plt.tight_layout()
plt.gca().xaxis.set_major_formatter(plt.FuncFormatter('{:.2f}'.format))
plt.gca().yaxis.set_major_formatter(plt.FuncFormatter('{:.2f}'.format))
plt.show()
# Pick from Mahima's code
# 23.========= Rug Plot: Distribution of Individual Data Points for MMR Values =========
# TODO - change
# 24.========= Boxen Plot: Selling Prices Across Different States (Top 10 States by Sales Volume) =========
# ---------------------------SUBPLOTS-----------------------------------
# 25.========= Pricing Dynamics and Market Value =========
"""
Line plot: Average selling price vs. average MMR over time
Scatter plot: Selling price vs. MMR, colored by vehicle condition
Box plot: Selling price distribution by make (top 10 makes)
Bar plot: Average price difference (Selling price - MMR) by vehicle age group
"""
avg_price_mmr_year = df.groupby('year')[['sellingprice', 'mmr']].mean()
top_10_makes = df.groupby('make')['sellingprice'].mean().nlargest(10)
df['age'] = 2024 - df['year'] # Assuming the current year is 2024
avg_price_diff_age = df.groupby('age').apply(lambda x: (x['sellingprice'] - x['mmr']).mean())
fig, axs = plt.subplots(2, 2, figsize=(20, 20))
# Plot line plot of average selling price vs. average MMR over time
axs[0, 0].plot(avg_price_mmr_year.index, avg_price_mmr_year['sellingprice'], label='Selling Price')
axs[0, 0].plot(avg_price_mmr_year.index, avg_price_mmr_year['mmr'], label='MMR')
axs[0, 0].set_title('Average Selling Price vs. Average MMR Over Time')
axs[0, 0].set_xlabel('Year')
axs[0, 0].set_ylabel('Price')
axs[0, 0].legend()
# Scatter plot: Selling price vs. MMR, colored by vehicle condition
sns.scatterplot(x='mmr', y='sellingprice', hue='condition', data=df, ax=axs[0, 1])
axs[0, 1].set_title('Selling Price vs. MMR')
axs[0, 1].set_xlabel('MMR')
axs[0, 1].set_ylabel('Selling Price')
# Box plot: Selling price distribution by make (top 10 makes)
sns.boxplot(x='make', y='sellingprice', data=df[df['make'].isin(top_10_makes.index)], ax=axs[1, 0])
axs[1, 0].set_title('Selling Price Distribution by Make (Top 10 Makes)')
axs[1, 0].set_xlabel('Make')
axs[1, 0].set_ylabel('Selling Price')
# Bar plot: Average price difference (Selling price - MMR) by vehicle age group
sns.barplot(x=avg_price_diff_age.index, y=avg_price_diff_age.values, ax=axs[1, 1])
axs[1, 1].set_title('Average Price Difference (Selling Price - MMR) by Vehicle Age Group')
axs[1, 1].set_xlabel('Age Group')
axs[1, 1].set_ylabel('Average Price Difference')
plt.tight_layout()
plt.show()
# 26.========= Sales Volume Analysis =========
"""
Line plot: Total sales volume over years
Stacked bar plot: Sales volume by vehicle condition for each year
Pie chart: Market share of top 5 makes
Bar plot: Sales volume by month (to show seasonality)
"""
sales_volume_years = df.groupby('year')['sellingprice'].sum()
sales_volume_condition_year = df.groupby(['year', 'condition'])['sellingprice'].sum().unstack()
top_5_makes = df['make'].value_counts().nlargest(5)
sales_volume_month = df.groupby('salemonth')['sellingprice'].sum()
fig, axs = plt.subplots(2, 2, figsize=(20, 20))
# Plot line plot of total sales volume over years
axs[0, 0].plot(sales_volume_years.index, sales_volume_years.values, color='skyblue', linewidth=2)
axs[0, 0].set_title('Total Sales Volume Over Years')
axs[0, 0].set_xlabel('Year')
axs[0, 0].set_ylabel('Total Sales Volume')
# Plot stacked bar plot of sales volume by vehicle condition for each year
sales_volume_condition_year.plot(kind='bar', stacked=True, ax=axs[0, 1])
axs[0, 1].set_title('Sales Volume by Vehicle Condition for Each Year')
axs[0, 1].set_xlabel('Year')
axs[0, 1].set_ylabel('Sales Volume')
# Plot pie chart of market share of top 5 makes
axs[1, 0].pie(top_5_makes, labels=top_5_makes.index, autopct='%1.1f%%')
axs[1, 0].set_title('Market Share of Top 5 Makes')
# Plot bar plot of sales volume by month
axs[1, 1].bar(sales_volume_month.index, sales_volume_month.values, color='skyblue')
axs[1, 1].set_title('Sales Volume by Month')
axs[1, 1].set_xlabel('Month')
axs[1, 1].set_ylabel('Sales Volume')
plt.tight_layout()
plt.show()
# 27.========= Vehicle Characteristics and Their Impact =========
"""
Scatter plot: Year vs. Odometer reading, colored by selling price
Histogram: Distribution of vehicle ages in the dataset
Bar plot: Average MMR by vehicle condition
Heatmap: Correlation between numerical features (year, odometer, selling price, MMR)
"""
df['age'] = 2024 - df['year'] # Assuming the current year is 2024
fig, axs = plt.subplots(2, 2, figsize=(20, 15))
# Scatter plot: Year vs. Odometer reading, colored by selling price
sns.scatterplot(x='year', y='odometer', hue='sellingprice', data=df, ax=axs[0, 0])
axs[0, 0].set_title('Year vs. Odometer Reading')
axs[0, 0].set_xlabel('Year')
axs[0, 0].set_ylabel('Odometer Reading')
# Histogram: Distribution of vehicle ages in the dataset
sns.histplot(df['age'], kde=False, ax=axs[0, 1])
axs[0, 1].set_title('Distribution of Vehicle Ages')
axs[0, 1].set_xlabel('Age')
axs[0, 1].set_ylabel('Frequency')
# Bar plot: Average MMR by vehicle condition
avg_mmr_condition = df.groupby('condition')['mmr'].mean()
sns.barplot(x=avg_mmr_condition.index, y=avg_mmr_condition.values, ax=axs[1, 0])
axs[1, 0].set_title('Average MMR by Vehicle Condition')
axs[1, 0].set_xlabel('Condition')
axs[1, 0].set_ylabel('Average MMR')
# Heatmap: Correlation between numerical features (year, odometer, selling price, MMR)
numerical_features = df[['year', 'odometer', 'sellingprice', 'mmr']]
correlation = numerical_features.corr()
sns.heatmap(correlation, annot=True, ax=axs[1, 1])
axs[1, 1].set_title('Correlation Between Numerical Features')
plt.tight_layout()
plt.show()
# 28.========= Regional Analysis and Seasonal Trends =========
"""
1. Bar plot: Top 10 states by average selling price
2. Bar plot: Top 10 states by sales volume
3. Heatmap: Monthly sales volume by year
4. Line plot: Average selling price trend for top 5 states over time
"""
avg_price_states = df.groupby('state')['sellingprice'].mean().nlargest(10)
sales_volume_states = df.groupby('state')['sellingprice'].sum().nlargest(10)
sales_volume_month_year = df.groupby(['saleyear', 'salemonth'])['sellingprice'].sum().unstack()
top_5_states = df['state'].value_counts().nlargest(5).index
avg_price_trend_top_5_states = df[df['state'].isin(top_5_states)].groupby(['saleyear', 'state'])['sellingprice'].mean().unstack()
fig, axs = plt.subplots(2, 2, figsize=(20, 20))
sns.barplot(x=avg_price_states.index, y=avg_price_states.values, ax=axs[0, 0])
axs[0, 0].set_title('Top 10 States by Average Selling Price')
axs[0, 0].set_xlabel('State')
axs[0, 0].set_ylabel('Average Selling Price')
sns.barplot(x=sales_volume_states.index, y=sales_volume_states.values, ax=axs[0, 1])
axs[0, 1].set_title('Top 10 States by Sales Volume')
axs[0, 1].set_xlabel('State')
axs[0, 1].set_ylabel('Sales Volume')
sns.heatmap(sales_volume_month_year, annot=True, ax=axs[1, 0])
axs[1, 0].set_title('Monthly Sales Volume by Year')
axs[1, 0].set_xlabel('Month')
axs[1, 0].set_ylabel('Year')
for state in top_5_states:
axs[1, 1].plot(avg_price_trend_top_5_states.index, avg_price_trend_top_5_states[state], label=state)
axs[1, 1].set_title('Average Selling Price Trend for Top 5 States Over Time')
axs[1, 1].set_xlabel('Year')
axs[1, 1].set_ylabel('Average Selling Price')
axs[1, 1].legend()
plt.tight_layout()
plt.show()