-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
157 lines (136 loc) · 4.32 KB
/
main.py
File metadata and controls
157 lines (136 loc) · 4.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# main.py
import warnings
from sklearn.exceptions import InconsistentVersionWarning
import os
import uvicorn
# 1) Silence scikit‑learn version mismatch warnings
warnings.filterwarnings("ignore", category=InconsistentVersionWarning)
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import pandas as pd
import joblib
from sklearn.pipeline import Pipeline
# 2) FastAPI + CORS
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["http://localhost:3000",
"https://meta-predict-web.vercel.app"], # adjust for your frontend
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/debug")
async def debug_files():
import os
return {
"files": os.listdir(),
"cwd": os.getcwd()
}
@app.get("/")
async def health_check():
return {"status": "up"}
# 3) Load your trained pipeline
pipeline = joblib.load("logistic_pipeline_model.pkl")
# 4) Extract real feature names from the ColumnTransformer
preproc = pipeline.named_steps["preprocessing"]
feature_names = []
for name, transformer, cols in preproc.transformers_:
# skip dropped cols or remainder
if transformer == "drop" or name == "remainder":
continue
# passthrough: just re‑emit original names
if transformer == "passthrough":
feature_names.extend(cols)
continue
# if it's a Pipeline, unwrap it
if isinstance(transformer, Pipeline):
# find a OneHotEncoder inside, else treat as pass‑through
ohe = None
for step in transformer.named_steps.values():
if hasattr(step, "categories_"):
ohe = step
break
if ohe is not None:
# manual one‑hot names
for input_col, cats in zip(cols, ohe.categories_):
for cat in cats:
feature_names.append(f"{input_col}_{cat}")
continue
else:
# final step might support get_feature_names_out
final = transformer.steps[-1][1]
if hasattr(final, "get_feature_names_out"):
try:
names = final.get_feature_names_out(cols)
except Exception:
names = final.get_feature_names_out()
feature_names.extend(names)
continue
else:
feature_names.extend(cols)
continue
# if it's a bare OneHotEncoder
if hasattr(transformer, "categories_"):
for input_col, cats in zip(cols, transformer.categories_):
for cat in cats:
feature_names.append(f"{input_col}_{cat}")
continue
# if transformer can emit names
if hasattr(transformer, "get_feature_names_out"):
try:
names = transformer.get_feature_names_out(cols)
except Exception:
names = transformer.get_feature_names_out()
feature_names.extend(names)
continue
# fallback: assume cols passed unchanged
feature_names.extend(cols)
# 5) Define Pydantic schema
class InputData(BaseModel):
Albuminuria: int
WaistCirc: float
UricAcid: float
BloodGlucose: float
HDL: float
Triglycerides: float
Age: int
Sex: str
Marital: str
UrAlbCr: float
Race: str
# 6) Prediction endpoint
@app.post("/predict")
def predict(data: InputData):
# a) DataFrame from JSON
df = pd.DataFrame([data.dict()])
# b) predict + proba
pred = pipeline.predict(df)[0]
proba = pipeline.predict_proba(df)[0][1]
# c) transform & grab coefs
X_proc = preproc.transform(df)
coefs = pipeline.named_steps["classifier"].coef_[0]
# d) compute contributions
contribs = X_proc[0] * coefs
# e) assemble top‑5
contrib_df = pd.DataFrame({
"feature": feature_names,
"contribution": contribs
})
contrib_df["abs_val"] = contrib_df.contribution.abs()
top5 = (
contrib_df
.nlargest(5, "abs_val")
.loc[:, ["feature", "contribution"]]
.to_dict(orient="records")
)
# f) return JSON
return {
"prediction": int(pred),
"probability": float(proba),
"contributions": top5
}
if __name__ == "__main__":
port = int(os.environ.get("PORT", 8000))
uvicorn.run("main:app", host="0.0.0.0", port=port)