Customer segmentation on TM SQLite data (from fcc-synthetic-tm).
It computes per-customer behavioral features and clusters customers with KMeans.
conda create -n tmcseg python=3.11 -y
conda activate tmcseg
python --version
pip install -e .- Extract features
python -m tm_custseg extract-features --db data\fcc_tm.sqlite --out-csv data\customer_features.csv- Pick k + then segment with the suggested k
python -m tm_custseg suggest-k --db data\fcc_tm.sqlite --method kmeans --k 4 --kmin 2 --kmax 10 --out-csv data\k_sweep_active.csv- Run segmentation
python -m tm_custseg segment --db data\fcc_tm.sqlite --k 4 --overwrite --min-tx 1- Inspect profiles
python -m tm_custseg profile --db data\fcc_tm.sqlite --k 4- Export segments + features (CSV)
python -m tm_custseg export --db data\fcc_tm.sqlite --k 4 --out-csv data\segments_k4.csv- PCA scatter CSV
python -m tm_custseg pca-2d --db data\fcc_tm.sqlite --k 4 --out-csv data\pca2d_k4.csv- Tiny HTML report
python -m tm_custseg report --db data\fcc_tm.sqlite --k 4 --out-html data\seg_report_k4.html --pca-csv data\pca2d_k4.csv- Explain clusters with RF + permutation importance
python -m tm_custseg explain --db data\fcc_tm.sqlite --k 4 --out-csv data\importance_k4_cv5.csv --mode pi_cv --cv 5- Viz importance
python -m tm_custseg viz-importance --db data\fcc_tm.sqlite --k 4 --out-png data\importance_k4_grouped.png --cv 5-
Counts & amounts: n_tx, sum_amt_major, avg_amt_major, max_amt_major
-
Composition: pct_card, pct_atm, pct_cash, pct_in
-
Activity / network: active_days, daily_rate, unique_cp, cp_ratio
-
Stored in table: customer_features.
-
customer_segments: (customer_id, method='kmeans', k, segment, distance, created_ts_utc)
-
segment_centers: cluster centers per segment in original feature space (JSON)
-
Optional CSV exports (features/segments)
# from repo root
conda create -n tmcseg python=3.11 -y
conda activate tmcseg
pip install -e .
python -m tm_custseg extract-features --db data\fcc_tm.sqlite --out-csv data\customer_features.csv
python -m tm_custseg segment --db data\fcc_tm.sqlite --k 4 --overwrite --min-tx 1
python -m tm_custseg suggest-k --db data\fcc_tm.sqlite --method kmeans --k 4 --kmin 2 --kmax 10 --out-csv data\k_sweep_active.csv
python -m tm_custseg profile --db data\fcc_tm.sqlite --k 4
python -m tm_custseg export --db data\fcc_tm.sqlite --k 4 --out-csv data\segments_k4.csv
python -m tm_custseg pca-2d --db data\fcc_tm.sqlite --k 4 --out-csv data\pca2d_k4.csv
python -m tm_custseg report --db data\fcc_tm.sqlite --k 4 --out-html data\seg_report_k4.html --pca-csv data\pca2d_k4.csv
python -m tm_custseg explain --db data\fcc_tm.sqlite --k 4 --out-csv data\importance_k4_cv5.csv --mode pi_cv --cv 5
python -m tm_custseg viz-importance --db data\fcc_tm.sqlite --k 4 --out-png data\importance_k4_grouped.png --cv 5