-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_load_data.py
More file actions
92 lines (78 loc) · 2.66 KB
/
test_load_data.py
File metadata and controls
92 lines (78 loc) · 2.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import pandas as pd
import pymysql
import numpy as np
# ========== 1. Load JSON ==========
df_applicant = pd.read_json("applicant_info.json", lines=True)
df_financial = pd.read_json("financial_info.json", lines=True)
df_loan = pd.read_json("loan_info.json", lines=True)
print("✅ JSON files loaded")
print("Applicant columns:", df_applicant.columns.tolist())
print("Financial columns:", df_financial.columns.tolist())
print("Loan columns:", df_loan.columns.tolist())
# ========== 2. Preprocess (NaN → None) ==========
def clean_df(df):
df = df.replace({np.nan: None})
return df
df_applicant = clean_df(df_applicant)
df_financial = clean_df(df_financial)
df_loan = clean_df(df_loan)
# ========== 3. Connect to MySQL (NO DB selected yet) ==========
conn = pymysql.connect(
host="localhost",
user="root", # 🔧 change if needed
password="pratik", # 🔧 change if needed
autocommit=True
)
cursor = conn.cursor()
print("✅ Connected to MySQL server")
# ========== 4. Create Database & Use ==========
cursor.execute("CREATE DATABASE IF NOT EXISTS loan_db")
cursor.execute("USE loan_db")
# ========== 5. Create Tables ==========
cursor.execute("""
CREATE TABLE IF NOT EXISTS applicant_info (
Loan_ID VARCHAR(50) PRIMARY KEY,
Gender VARCHAR(10),
Married VARCHAR(10),
Dependents VARCHAR(10),
Education VARCHAR(20),
Self_Employed VARCHAR(10)
)
""")
cursor.execute("""
CREATE TABLE IF NOT EXISTS financial_info (
Loan_ID VARCHAR(50) PRIMARY KEY,
ApplicantIncome FLOAT,
CoapplicantIncome FLOAT,
LoanAmount FLOAT,
Loan_Amount_Term FLOAT,
Credit_History FLOAT
)
""")
cursor.execute("""
CREATE TABLE IF NOT EXISTS loan_info (
Loan_ID VARCHAR(50) PRIMARY KEY,
Property_Area VARCHAR(20),
Loan_Status VARCHAR(10)
)
""")
print("✅ Tables created (if not exist)")
# ========== 6. Insert Data ==========
def insert_data(df, table_name):
df = df.where(pd.notnull(df), None) # replace NaN → None
cols = ",".join([f"`{col}`" for col in df.columns])
placeholders = ",".join(["%s"] * len(df.columns))
insert_sql = f"REPLACE INTO `{table_name}` ({cols}) VALUES ({placeholders})"
clean_data = []
for row in df.itertuples(index=False, name=None):
clean_row = [None if (isinstance(val, float) and np.isnan(val)) else val for val in row]
clean_data.append(tuple(clean_row))
cursor.executemany(insert_sql, clean_data)
conn.commit()
print(f"✅ Inserted {len(df)} rows into {table_name}")
insert_data(df_applicant, "applicant_info")
insert_data(df_financial, "financial_info")
insert_data(df_loan, "loan_info")
cursor.close()
conn.close()
print("🎉 All data successfully loaded into MySQL!")