Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions configs/dvc_remote.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
stages:
track_raw_data:
cmd: dvc add data/raw/MachineLearningRating_v3.txt
deps:
- data/raw/MachineLearningRating_v3.txt
outs:
- data/raw/MachineLearningRating_v3.txt.dvc
4 changes: 4 additions & 0 deletions dvc.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
schema: '2.0'
stages:
track_raw_data:
cmd: dvc add data/processed/insurance_cleaned.csv
3 changes: 3 additions & 0 deletions dvc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
stages:
track_raw_data:
cmd: dvc add data/processed/insurance_cleaned.csv
13 changes: 13 additions & 0 deletions eda.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
stages:
process:
cmd: python src/insurance_analytics/reports/report_generator.py --config config/data.yaml
deps:
- scripts/eda_report_generator.py
- data/raw/MachineLearningRating_v3.txt
- config/data.yaml
out:
- reports/eda_reports.md
- reports/plots/hist_totalpremium.png
- reports/plots/province_count.png
- reports/plots/loss_ratio_by_province.png
- reports/plots/claims_vs_premium_vehicle.png
164 changes: 164 additions & 0 deletions init_project.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
#!/usr/bin/env bash
set -euo pipefail

echo ""
echo "---------------------------------------------"
echo " Initializing Insurance Risk Analytics Project"
echo "---------------------------------------------"
echo ""

GREEN="\e[32m"
YELLOW="\e[33m"
CYAN="\e[36m"
RESET="\e[0m"

created_dirs=()
created_files=()

# ----------------------------------------
# Helpers
# ----------------------------------------
ensure_dir () {
if [ ! -d "$1" ]; then
mkdir -p "$1"
touch "$1/.gitkeep"
created_dirs+=("$1")
echo -e "${GREEN}Created directory:${RESET} $1"
else
echo -e "${CYAN}Directory exists:${RESET} $1"
fi
}

ensure_file () {
if [ ! -f "$1" ]; then
mkdir -p "$(dirname "$1")"
touch "$1"
created_files+=("$1")
echo -e "${GREEN}Created file:${RESET} $1"
else
echo -e "${CYAN}File exists:${RESET} $1"
fi
}

# ----------------------------------------
# DIRECTORIES
# ----------------------------------------
dirs=(
".github/workflows"
"configs"
"data/raw"
"data/interim"
"data/processed"
"docs"
"notebooks/eda"
"notebooks/modeling"
"scripts"
"src/insurance_analytics"
"src/insurance_analytics/data"
"src/insurance_analytics/preprocessing"
"src/insurance_analytics/eda"
"src/insurance_analytics/models"
"src/insurance_analytics/viz"
"src/insurance_analytics/utils"
"tests/unit"
"tests/integration"
)

for d in "${dirs[@]}"; do
ensure_dir "$d"
done

# ----------------------------------------
# FILES
# ----------------------------------------
files=(
".github/workflows/ci.yml"
".github/workflows/codeql.yml"

"configs/data.yaml"
"configs/modeling.yaml"

"docs/README_project_overview.md"
"docs/EDA_report_template.md"
"docs/Modeling_report_template.md"

"scripts/run_eda.sh"
"scripts/run_modeling.sh"

"src/insurance_analytics/__init__.py"
"src/insurance_analytics/config.py"

"src/insurance_analytics/data/__init__.py"
"src/insurance_analytics/data/load_data.py"
"src/insurance_analytics/data/versioning.py"

"src/insurance_analytics/preprocessing/__init__.py"
"src/insurance_analytics/preprocessing/cleaner.py"
"src/insurance_analytics/preprocessing/feature_engineering.py"

"src/insurance_analytics/eda/__init__.py"
"src/insurance_analytics/eda/exploration.py"
"src/insurance_analytics/eda/visualization.py"

"src/insurance_analytics/models/__init__.py"
"src/insurance_analytics/models/linear_regression.py"
"src/insurance_analytics/models/random_forest.py"
"src/insurance_analytics/models/xgboost_model.py"
"src/insurance_analytics/models/evaluation.py"
"src/insurance_analytics/models/interpretability.py"

"src/insurance_analytics/viz/__init__.py"
"src/insurance_analytics/viz/plots.py"

"src/insurance_analytics/utils/__init__.py"
"src/insurance_analytics/utils/helpers.py"
"src/insurance_analytics/utils/io_utils.py"

"tests/unit/test_cleaner.py"
"tests/unit/test_feature_engineering.py"
"tests/unit/test_load_data.py"
"tests/unit/test_models.py"

"tests/integration/test_eda_pipeline.py"
"tests/integration/test_model_pipeline.py"

"requirements.txt"
"requirements-dev.txt"
"pyproject.toml"
"README.md"
".gitignore"
)

for f in "${files[@]}"; do
ensure_file "$f"
done

echo ""
echo -e "${GREEN}Project structure for Insurance Risk Analytics initialized successfully.${RESET}"
echo ""

# Summary
echo "---------------------------------------------"
echo -e "${YELLOW}Summary${RESET}"
echo "---------------------------------------------"
echo "Directories created: ${#created_dirs[@]}"
echo "Files created: ${#created_files[@]}"

if (( ${#created_dirs[@]} > 0 )); then
echo ""
echo "New directories:"
for d in "${created_dirs[@]}"; do
echo " - $d"
done
fi

if (( ${#created_files[@]} > 0 )); then
echo ""
echo "New files:"
for f in "${created_files[@]}"; do
echo " - $f"
done
fi

echo ""
echo -e "${GREEN}All done.${RESET}"
75 changes: 23 additions & 52 deletions notebooks/exploration/eda.ipynb

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion src/insurance_analytics/utils/io_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def write_csv(df, path):
try:
os.makedirs(os.path.dirname(path), exist_ok=True)
df.to_csv(path, index=False)
print(f"[IO] CSV file is saved sucessfully")
except Exception as e:
print(f"[IO] Failed to write CSV: {e}")

Expand Down
Loading