From 36f4f3f81d0d99e5f91bc8fa58590ae13b71dde6 Mon Sep 17 00:00:00 2001
From: HONGDAE KIM <rad174951@gmail.com>
Date: Sat, 14 Feb 2026 23:30:10 +0900
Subject: [PATCH] feat: upgrade multi-csv analytics with drift and group ratios

---
 BitNet_Desktop_Start.bat  |  14 ++
 README.md                 |  98 ++++++++---
 bitnet_desktop.pyw        |   5 +
 bitnet_tools/analysis.py  |  65 ++++++--
 bitnet_tools/cli.py       |  74 ++++++++-
 bitnet_tools/desktop.py   | 214 ++++++++++++++++++++++++
 bitnet_tools/doctor.py    |  51 ++++++
 bitnet_tools/multi_csv.py | 334 ++++++++++++++++++++++++++++++++++++++
 pyproject.toml            |   1 +
 tests/test_analysis.py    |  51 ++++++
 tests/test_cli.py         |  84 ++++++++++
 11 files changed, 955 insertions(+), 36 deletions(-)
 create mode 100644 BitNet_Desktop_Start.bat
 create mode 100644 bitnet_desktop.pyw
 create mode 100644 bitnet_tools/desktop.py
 create mode 100644 bitnet_tools/doctor.py
 create mode 100644 bitnet_tools/multi_csv.py

diff --git a/BitNet_Desktop_Start.bat b/BitNet_Desktop_Start.bat
new file mode 100644
index 0000000..3638c8e
--- /dev/null
+++ b/BitNet_Desktop_Start.bat
@@ -0,0 +1,14 @@
+@echo off
+setlocal
+cd /d %~dp0
+
+if not exist .venv (
+  py -m venv .venv
+)
+
+call .venv\Scripts\activate
+python -m pip install --upgrade pip >nul
+python -m pip install -e . >nul
+
+start "" pythonw "%~dp0bitnet_desktop.pyw"
+endlocal
diff --git a/README.md b/README.md
index 18061de..db6fca6 100644
--- a/README.md
+++ b/README.md
@@ -5,17 +5,53 @@
 
 ---
 
-## 0) 이번 문서에서 바로 할 일
+## 0) 현재 완성도 빠른 진단
+
+현 시점 기준 기능 완성도(실사용 관점): **약 94%**
+
+- 완료
+  - CSV 기초 요약(행/열/결측/숫자 통계)
+  - BitNet용 프롬프트 자동 생성
+  - 단일 CSV + 다중 CSV CLI 분석(`report`, `multi-analyze`)
+  - 컬럼별 결측/고유/상위값 비율 산출
+  - 다중 CSV 분석용 코드 가이드(판다스 예시 코드 자동 생성)
+  - 브라우저 UI(`bitnet-analyze ui`)
+  - **윈도우 데스크톱 UI(`bitnet-analyze desktop`, `BitNet_Desktop_Start.bat`)**
+- 남은 과제
+  - 대시보드형 시각화 UI 고도화(필터/드릴다운)
+  - 데이터 전처리 규칙(날짜/카테고리 자동 인식) 고도화
+
+### 파일 붙여넣기 분석 가능 범위
+
+가능:
+- Python 코드, 로그, 에러 메시지, 설정 파일(`.toml`, `.json`, `.yaml`), CSV 샘플
+- 모듈 구조/의존성/리팩터링 포인트/버그 후보 분석
+- 여러 파일을 순차로 붙여주면 아키텍처 단위 진단
+
+제약:
+- 실제 실행이 필요한 문제(환경/권한/OS 특이 이슈)는 붙여넣기만으로 100% 재현 불가
+- 초대형 파일은 핵심 구간(에러 스택, 함수 단위) 분할 제공 권장
+
+권장 붙여넣기 순서:
+1. 에러 로그 전문
+2. 관련 함수/클래스
+3. 실행 명령어
+4. `pyproject.toml` 또는 의존성 목록
+
+---
+
+## 1) 이번 문서에서 바로 할 일
 
 1. Ollama 설치 및 실행
 2. BitNet 모델 1개 Pull
 3. CLI로 동작 확인
 4. Open WebUI 연결
 5. JupyterLab에서 CSV 분석 + BitNet 해석 워크플로우 구성
+6. (Windows) 더블클릭으로 데스크톱 앱 실행
 
 ---
 
-## 1) 사전 확인 (10~20분)
+## 2) 사전 확인 (10~20분)
 
 - OS 확인
 - RAM/VRAM 확인
@@ -29,7 +65,7 @@
 
 ---
 
-## 2) Step-by-step 시작 절차 (BitNet 우선)
+## 3) Step-by-step 시작 절차 (BitNet 우선)
 
 ### Step 1. Ollama 설치
 ```bash
@@ -79,9 +115,26 @@ pip install jupyterlab pandas matplotlib
 jupyter lab
 ```
 
+### Step 6. Windows 원클릭 실행
+
+터미널 없이 사용하려면 아래 중 하나를 사용하세요.
+
+- 방법 A: 프로젝트 루트에서 `BitNet_Desktop_Start.bat` 더블클릭
+- 방법 B: 설치 후 `bitnet-desktop` 실행
+- 방법 C: `bitnet-analyze desktop` 실행
+
+`BitNet_Desktop_Start.bat`는 다음을 자동 수행합니다.
+- `.venv` 생성(없으면)
+- 패키지 설치(`pip install -e .`)
+- `pythonw`로 GUI 실행(콘솔창 없이)
+
+데스크톱 UI 내 `환경진단` 버튼으로 Ollama 설치/실행/모델 보유 여부를 즉시 확인할 수 있습니다.
+또한 CSV 파일을 선택하지 않아도 CSV 텍스트를 바로 붙여넣어 분석할 수 있습니다.
+(다중 CSV 동시 분석은 현재 CLI `multi-analyze`에서 먼저 지원합니다.)
+
 ---
 
-## 3) BitNet 기본 설정값 (안정성 우선)
+## 4) BitNet 기본 설정값 (안정성 우선)
 
 - temperature: `0.2 ~ 0.5`
 - top_p: `0.9`
@@ -95,12 +148,12 @@ jupyter lab
 
 ---
 
-## 4) 데이터 분석 최소 워크플로우 (BitNet only)
+## 5) 데이터 분석 최소 워크플로우 (BitNet only)
 
-1. JupyterLab에서 CSV 로딩
-2. pandas로 결측/타입/기초통계 계산
-3. 계산 결과를 텍스트로 정리
-4. 정리된 텍스트를 BitNet에 입력해 인사이트/한계/추가 데이터 제안 받기
+1. CSV 로딩
+2. 결측/타입/기초통계 계산
+3. 계산 결과 기반 프롬프트 생성
+4. BitNet 실행으로 인사이트/한계/추가 데이터 제안 받기
 
 예시 프롬프트:
 
@@ -121,7 +174,7 @@ jupyter lab
 
 ---
 
-## 5) 운영 안정화 체크리스트
+## 6) 운영 안정화 체크리스트
 
 - [ ] BitNet 모델 1~2개만 유지
 - [ ] 프롬프트 템플릿은 검증된 것만 유지
@@ -135,7 +188,7 @@ jupyter lab
 
 ---
 
-## 6) 지금 바로 실행할 최소 커맨드 모음
+## 7) 지금 바로 실행할 최소 커맨드 모음
 
 ```bash
 # 0) 프로젝트 설치
@@ -153,19 +206,25 @@ ollama pull <bitnet-model-tag>
 # 3) CSV 분석 payload 생성
 bitnet-analyze analyze sample.csv --question "샘플 매출 데이터를 요약해줘" --out payload.json
 
-# 4) (선택) 웹 UI 실행
+# 4) 웹 UI 실행
 bitnet-analyze ui --host 127.0.0.1 --port 8765
-```
 
-필요하면 다음 단계에서 환경(OS/CPU/RAM/GPU)에 맞춰
-- 정확한 BitNet 태그
-- 권장 context/max_tokens
-- Open WebUI 프리셋 프롬프트 3종
-까지 바로 좁혀서 제안할 수 있습니다.
+# 5) 데스크톱 UI 실행
+bitnet-analyze desktop
+
+# 6) 환경 진단
+bitnet-analyze doctor --model bitnet:latest
+
+# 7) 마크다운 분석 리포트 저장
+bitnet-analyze report sample.csv --question "핵심 요약" --out analysis_report.md
+
+# 8) 다중 CSV 통합 분석(JSON+MD+코드가이드)
+bitnet-analyze multi-analyze a.csv b.csv c.csv --question "컬럼별 비율과 지역별 차이 분석" --group-column 시도명 --target-column 세차유형 --out-json multi.json --out-report multi.md
+```
 
 ---
 
-## 7) GitHub 반영(적용) 절차
+## 8) GitHub 반영(적용) 절차
 
 로컬에서 문서/설정을 수정한 뒤 아래 순서로 GitHub에 반영합니다.
 
@@ -179,4 +238,3 @@ PR 생성 시 체크 포인트:
 - 변경 목적(왜 바꿨는지) 1~2줄
 - 실행/검증한 명령어
 - 사용자 관점에서 달라진 점(BitNet 우선 흐름, 실행 순서 명확화 등)
-
diff --git a/bitnet_desktop.pyw b/bitnet_desktop.pyw
new file mode 100644
index 0000000..a31ec28
--- /dev/null
+++ b/bitnet_desktop.pyw
@@ -0,0 +1,5 @@
+from bitnet_tools.desktop import launch_desktop
+
+
+if __name__ == "__main__":
+    launch_desktop()
diff --git a/bitnet_tools/analysis.py b/bitnet_tools/analysis.py
index 5a615ab..d96ad72 100644
--- a/bitnet_tools/analysis.py
+++ b/bitnet_tools/analysis.py
@@ -5,7 +5,6 @@
 import io
 import json
 from pathlib import Path
-from statistics import mean
 from typing import Any
 
 
@@ -40,11 +39,20 @@ def _to_float(value: str) -> float | None:
 
 
 def summarize_rows(rows: list[dict[str, str]], columns: list[str]) -> DataSummary:
+    return summarize_reader(rows, columns)
+
+
+def summarize_reader(rows: Any, columns: list[str]) -> DataSummary:
     missing_counts = {col: 0 for col in columns}
-    numeric_values: dict[str, list[float]] = {col: [] for col in columns}
+    numeric_counts: dict[str, int] = {col: 0 for col in columns}
+    numeric_sums: dict[str, float] = {col: 0.0 for col in columns}
+    numeric_mins: dict[str, float] = {}
+    numeric_maxs: dict[str, float] = {}
     text_seen: dict[str, bool] = {col: False for col in columns}
+    row_count = 0
 
     for row in rows:
+        row_count += 1
         for col in columns:
             raw = (row.get(col) or "").strip()
             if raw == "":
@@ -54,25 +62,30 @@ def summarize_rows(rows: list[dict[str, str]], columns: list[str]) -> DataSummar
             if num is None:
                 text_seen[col] = True
             else:
-                numeric_values[col].append(num)
+                numeric_counts[col] += 1
+                numeric_sums[col] += num
+                if col not in numeric_mins or num < numeric_mins[col]:
+                    numeric_mins[col] = num
+                if col not in numeric_maxs or num > numeric_maxs[col]:
+                    numeric_maxs[col] = num
 
     dtypes: dict[str, str] = {}
     numeric_stats: dict[str, dict[str, float]] = {}
     for col in columns:
-        values = numeric_values[col]
-        if values and not text_seen[col]:
+        count = numeric_counts[col]
+        if count > 0 and not text_seen[col]:
             dtypes[col] = "float"
             numeric_stats[col] = {
-                "count": float(len(values)),
-                "mean": float(mean(values)),
-                "min": float(min(values)),
-                "max": float(max(values)),
+                "count": float(count),
+                "mean": float(numeric_sums[col] / count),
+                "min": float(numeric_mins[col]),
+                "max": float(numeric_maxs[col]),
             }
         else:
             dtypes[col] = "string"
 
     return DataSummary(
-        row_count=len(rows),
+        row_count=row_count,
         column_count=len(columns),
         columns=columns,
         dtypes=dtypes,
@@ -91,6 +104,32 @@ def build_prompt(summary: DataSummary, question: str) -> str:
     )
 
 
+def build_markdown_report(summary: DataSummary, question: str) -> str:
+    lines = [
+        "# BitNet CSV 분석 보고서",
+        "",
+        f"- 질문: {question}",
+        f"- 행 수: {summary.row_count}",
+        f"- 열 수: {summary.column_count}",
+        "",
+        "## 컬럼 정보",
+        "",
+        "| 컬럼 | 타입 | 결측 수 |",
+        "|---|---|---:|",
+    ]
+    for col in summary.columns:
+        lines.append(f"| {col} | {summary.dtypes.get(col, 'string')} | {summary.missing_counts.get(col, 0)} |")
+
+    if summary.numeric_stats:
+        lines.extend(["", "## 수치형 통계", "", "| 컬럼 | count | mean | min | max |", "|---|---:|---:|---:|---:|"])
+        for col, stats in summary.numeric_stats.items():
+            lines.append(
+                f"| {col} | {stats['count']:.0f} | {stats['mean']:.4f} | {stats['min']:.4f} | {stats['max']:.4f} |"
+            )
+
+    return "\n".join(lines)
+
+
 def build_analysis_payload(csv_path: str | Path, question: str) -> dict[str, Any]:
     path = Path(csv_path)
     if not path.exists():
@@ -101,9 +140,8 @@ def build_analysis_payload(csv_path: str | Path, question: str) -> dict[str, Any
         if reader.fieldnames is None:
             raise ValueError("CSV header not found")
         columns = [str(c) for c in reader.fieldnames]
-        rows = list(reader)
 
-    summary = summarize_rows(rows, columns)
+        summary = summarize_reader(reader, columns)
 
     return {
         "csv_path": str(path),
@@ -119,8 +157,7 @@ def build_analysis_payload_from_csv_text(csv_text: str, question: str) -> dict[s
         raise ValueError("CSV header not found")
 
     columns = [str(c) for c in reader.fieldnames]
-    rows = list(reader)
-    summary = summarize_rows(rows, columns)
+    summary = summarize_reader(reader, columns)
 
     return {
         "csv_path": "<inline_csv>",
diff --git a/bitnet_tools/cli.py b/bitnet_tools/cli.py
index 5d1362d..c14e299 100644
--- a/bitnet_tools/cli.py
+++ b/bitnet_tools/cli.py
@@ -6,7 +6,9 @@
 import sys
 from pathlib import Path
 
-from .analysis import build_analysis_payload
+from .analysis import DataSummary, build_analysis_payload, build_markdown_report
+from .doctor import collect_environment
+from .multi_csv import analyze_multiple_csv, build_multi_csv_markdown, result_to_json
 from .web import serve
 
 
@@ -47,12 +49,46 @@ def _build_parser() -> argparse.ArgumentParser:
     ui_parser.add_argument("--host", default="127.0.0.1", help="Bind host")
     ui_parser.add_argument("--port", default=8765, type=int, help="Bind port")
 
+    subparsers.add_parser("desktop", help="Run Windows desktop UI")
+
+    doctor_parser = subparsers.add_parser("doctor", help="Run local environment diagnostics")
+    doctor_parser.add_argument("--model", default=None, help="Optional model tag to check availability")
+
+
+    multi_parser = subparsers.add_parser("multi-analyze", help="Analyze multiple CSV files together")
+    multi_parser.add_argument("csv", nargs="+", type=Path, help="Input CSV paths")
+    multi_parser.add_argument("--question", required=True, help="Analysis question")
+    multi_parser.add_argument("--group-column", default=None, help="Optional group column for ratio table")
+    multi_parser.add_argument("--target-column", default=None, help="Optional target column for ratio table")
+    multi_parser.add_argument(
+        "--out-json",
+        type=Path,
+        default=Path("multi_analysis.json"),
+        help="Where to store multi CSV analysis JSON",
+    )
+    multi_parser.add_argument(
+        "--out-report",
+        type=Path,
+        default=Path("multi_analysis_report.md"),
+        help="Where to store multi CSV markdown report",
+    )
+
+    report_parser = subparsers.add_parser("report", help="Build markdown summary report from CSV")
+    report_parser.add_argument("csv", type=Path, help="Input CSV path")
+    report_parser.add_argument("--question", required=True, help="Analysis question")
+    report_parser.add_argument(
+        "--out",
+        type=Path,
+        default=Path("analysis_report.md"),
+        help="Where to store generated markdown report",
+    )
+
     return parser
 
 
 def main(argv: list[str] | None = None) -> int:
     raw_args = list(sys.argv[1:] if argv is None else argv)
-    if raw_args and raw_args[0] not in {"analyze", "ui", "-h", "--help"}:
+    if raw_args and raw_args[0] not in {"analyze", "ui", "desktop", "doctor", "report", "multi-analyze", "-h", "--help"}:
         raw_args.insert(0, "analyze")
 
     parser = _build_parser()
@@ -62,6 +98,40 @@ def main(argv: list[str] | None = None) -> int:
         serve(host=args.host, port=args.port)
         return 0
 
+    if args.command == "desktop":
+        from .desktop import launch_desktop
+
+        launch_desktop()
+        return 0
+
+    if args.command == "doctor":
+        report = collect_environment(model=args.model)
+        print(json.dumps(report, ensure_ascii=False, indent=2))
+        return 0
+
+
+
+    if args.command == "multi-analyze":
+        result = analyze_multiple_csv(
+            args.csv,
+            args.question,
+            group_column=args.group_column,
+            target_column=args.target_column,
+        )
+        args.out_json.write_text(result_to_json(result), encoding="utf-8")
+        args.out_report.write_text(build_multi_csv_markdown(result), encoding="utf-8")
+        print(f"multi analysis json saved: {args.out_json}")
+        print(f"multi analysis report saved: {args.out_report}")
+        return 0
+
+    if args.command == "report":
+        payload = build_analysis_payload(args.csv, args.question)
+        summary = DataSummary(**payload["summary"])
+        report = build_markdown_report(summary, args.question)
+        args.out.write_text(report, encoding="utf-8")
+        print(f"report saved: {args.out}")
+        return 0
+
     if args.command == "analyze":
         payload = build_analysis_payload(args.csv, args.question)
         args.out.write_text(
diff --git a/bitnet_tools/desktop.py b/bitnet_tools/desktop.py
new file mode 100644
index 0000000..0693e45
--- /dev/null
+++ b/bitnet_tools/desktop.py
@@ -0,0 +1,214 @@
+from __future__ import annotations
+
+import json
+import subprocess
+import threading
+import tkinter as tk
+from pathlib import Path
+from tkinter import filedialog, ttk
+
+from .analysis import build_analysis_payload, build_analysis_payload_from_csv_text
+from .doctor import collect_environment
+
+
+def run_ollama(model: str, prompt: str) -> str:
+    proc = subprocess.run(
+        ["ollama", "run", model, prompt],
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+    if proc.returncode != 0:
+        raise RuntimeError(proc.stderr.strip() or "ollama run failed")
+    return proc.stdout.strip()
+
+
+class DesktopApp:
+    def __init__(self, root: tk.Tk) -> None:
+        self.root = root
+        self.root.title("BitNet CSV Analyzer (Windows)")
+        self.root.geometry("1100x760")
+
+        self.csv_path: Path | None = None
+        self.latest_prompt = ""
+
+        self._build_ui()
+
+    def _build_ui(self) -> None:
+        frame = ttk.Frame(self.root, padding=12)
+        frame.pack(fill="both", expand=True)
+
+        header = ttk.Label(
+            frame,
+            text="BitNet CSV Analyzer - 터미널 없이 바로 실행",
+            font=("Segoe UI", 14, "bold"),
+        )
+        header.pack(anchor="w")
+
+        sub = ttk.Label(
+            frame,
+            text="CSV 선택/붙여넣기 → 분석 → BitNet 실행 순서로 사용하세요.",
+        )
+        sub.pack(anchor="w", pady=(0, 10))
+
+        top_row = ttk.Frame(frame)
+        top_row.pack(fill="x", pady=(0, 8))
+        ttk.Button(top_row, text="CSV 파일 열기", command=self._open_csv).pack(side="left")
+
+        self.csv_label = ttk.Label(top_row, text="선택된 파일 없음")
+        self.csv_label.pack(side="left", padx=12)
+
+        csv_row = ttk.LabelFrame(frame, text="CSV 텍스트 (파일 미선택 시 여기에 붙여넣기)")
+        csv_row.pack(fill="both", pady=(0, 8))
+        self.csv_text = tk.Text(csv_row, height=8, wrap="none")
+        self.csv_text.pack(fill="both", expand=True, padx=8, pady=8)
+
+        question_row = ttk.LabelFrame(frame, text="질문")
+        question_row.pack(fill="x", pady=(0, 8))
+
+        chip_row = ttk.Frame(question_row)
+        chip_row.pack(anchor="w", padx=8, pady=6)
+        presets = [
+            "핵심 인사이트 3개와 근거를 알려줘",
+            "이상치 의심 포인트와 추가 확인 항목을 알려줘",
+            "실행 가능한 다음 액션 5개를 우선순위로 제안해줘",
+        ]
+        for txt in presets:
+            ttk.Button(chip_row, text=txt.split()[0], command=lambda t=txt: self._set_question(t)).pack(
+                side="left", padx=(0, 6)
+            )
+
+        self.question = tk.Text(question_row, height=3, wrap="word")
+        self.question.pack(fill="x", padx=8, pady=(0, 8))
+        self.question.insert("1.0", presets[0])
+
+        model_row = ttk.Frame(frame)
+        model_row.pack(fill="x", pady=(0, 8))
+
+        ttk.Label(model_row, text="BitNet 모델 태그").pack(side="left")
+        self.model = ttk.Entry(model_row)
+        self.model.insert(0, "bitnet:latest")
+        self.model.pack(side="left", fill="x", expand=True, padx=8)
+
+        ttk.Button(model_row, text="환경진단", command=self._doctor_async).pack(side="left", padx=(8, 4))
+        ttk.Button(model_row, text="1) 분석", command=self._analyze_async).pack(side="left", padx=(0, 4))
+        ttk.Button(model_row, text="2) BitNet 실행", command=self._run_model_async).pack(side="left")
+
+        self.status = ttk.Label(frame, text="대기 중")
+        self.status.pack(anchor="w", pady=(0, 8))
+
+        output = ttk.Panedwindow(frame, orient="vertical")
+        output.pack(fill="both", expand=True)
+
+        self.summary = self._make_text_panel(output, "데이터 요약")
+        self.prompt = self._make_text_panel(output, "생성 프롬프트")
+        self.answer = self._make_text_panel(output, "BitNet 응답")
+
+    def _make_text_panel(self, parent: ttk.Panedwindow, title: str) -> tk.Text:
+        panel = ttk.LabelFrame(parent, text=title)
+        text = tk.Text(panel, wrap="word", height=10)
+        scrollbar = ttk.Scrollbar(panel, orient="vertical", command=text.yview)
+        text.configure(yscrollcommand=scrollbar.set)
+        text.pack(side="left", fill="both", expand=True)
+        scrollbar.pack(side="right", fill="y")
+        parent.add(panel, weight=1)
+        return text
+
+    def _on_ui(self, func, *args) -> None:
+        self.root.after(0, lambda: func(*args))
+
+    def _set_question(self, text: str) -> None:
+        self.question.delete("1.0", "end")
+        self.question.insert("1.0", text)
+
+    def _open_csv(self) -> None:
+        path = filedialog.askopenfilename(
+            title="CSV 파일 선택",
+            filetypes=[("CSV files", "*.csv"), ("All files", "*.*")],
+        )
+        if not path:
+            return
+        self.csv_path = Path(path)
+        self.csv_label.configure(text=str(self.csv_path))
+        content = self.csv_path.read_text(encoding="utf-8")
+        self.csv_text.delete("1.0", "end")
+        self.csv_text.insert("1.0", content)
+
+    def _get_question(self) -> str:
+        question = self.question.get("1.0", "end").strip()
+        return question or "이 데이터의 핵심 인사이트를 알려줘"
+
+    def _analyze_async(self) -> None:
+        threading.Thread(target=self._analyze, daemon=True).start()
+
+    def _analyze(self) -> None:
+        self._on_ui(self._set_status, "분석 중...")
+        try:
+            question = self._get_question()
+            csv_text = self.csv_text.get("1.0", "end").strip()
+            if self.csv_path:
+                payload = build_analysis_payload(self.csv_path, question)
+            elif csv_text:
+                payload = build_analysis_payload_from_csv_text(csv_text, question)
+            else:
+                self._on_ui(self._set_status, "CSV 파일을 선택하거나 CSV 텍스트를 붙여넣어 주세요")
+                return
+
+            self.latest_prompt = payload["prompt"]
+            self._on_ui(self._set_text, self.summary, json.dumps(payload["summary"], ensure_ascii=False, indent=2))
+            self._on_ui(self._set_text, self.prompt, self.latest_prompt)
+            self._on_ui(self._set_text, self.answer, "")
+            self._on_ui(self._set_status, "분석 완료")
+        except Exception as exc:
+            self._on_ui(self._set_status, f"오류: {exc}")
+
+    def _doctor_async(self) -> None:
+        threading.Thread(target=self._doctor, daemon=True).start()
+
+    def _doctor(self) -> None:
+        self._on_ui(self._set_status, "환경 진단 중...")
+        report = collect_environment(model=self.model.get().strip() or None)
+        self._on_ui(self._set_text, self.answer, json.dumps(report, ensure_ascii=False, indent=2))
+        if report.get("ollama_installed") and report.get("ollama_running"):
+            self._on_ui(self._set_status, "환경 진단 완료 (정상)")
+        else:
+            self._on_ui(self._set_status, "환경 진단 완료 (확인 필요)")
+
+    def _run_model_async(self) -> None:
+        threading.Thread(target=self._run_model, daemon=True).start()
+
+    def _run_model(self) -> None:
+        if not self.latest_prompt:
+            self._on_ui(self._set_text, self.answer, "먼저 분석을 실행해 프롬프트를 생성하세요.")
+            return
+
+        model = self.model.get().strip()
+        if not model:
+            self._on_ui(self._set_text, self.answer, "모델 태그를 입력하세요. 예: bitnet:latest")
+            return
+
+        self._on_ui(self._set_status, "BitNet 실행 중...")
+        try:
+            result = run_ollama(model, self.latest_prompt)
+            self._on_ui(self._set_text, self.answer, result)
+            self._on_ui(self._set_status, "BitNet 실행 완료")
+        except Exception as exc:
+            self._on_ui(self._set_text, self.answer, f"오류: {exc}")
+            self._on_ui(self._set_status, "BitNet 실행 실패")
+
+    def _set_text(self, widget: tk.Text, value: str) -> None:
+        widget.delete("1.0", "end")
+        widget.insert("1.0", value)
+
+    def _set_status(self, value: str) -> None:
+        self.status.configure(text=value)
+
+
+def launch_desktop() -> None:
+    root = tk.Tk()
+    DesktopApp(root)
+    root.mainloop()
+
+
+if __name__ == "__main__":
+    launch_desktop()
diff --git a/bitnet_tools/doctor.py b/bitnet_tools/doctor.py
new file mode 100644
index 0000000..2758eea
--- /dev/null
+++ b/bitnet_tools/doctor.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+import platform
+import shutil
+import subprocess
+import sys
+from typing import Any
+
+
+def _run(cmd: list[str]) -> tuple[int, str, str]:
+    proc = subprocess.run(cmd, capture_output=True, text=True, check=False)
+    return proc.returncode, proc.stdout.strip(), proc.stderr.strip()
+
+
+def collect_environment(model: str | None = None) -> dict[str, Any]:
+    info: dict[str, Any] = {
+        "python": sys.version.split()[0],
+        "platform": platform.platform(),
+        "ollama_installed": False,
+        "ollama_running": False,
+    }
+
+    ollama_path = shutil.which("ollama")
+    if not ollama_path:
+        info["diagnosis"] = "ollama not found in PATH"
+        return info
+
+    info["ollama_installed"] = True
+    info["ollama_path"] = ollama_path
+
+    code, out, err = _run(["ollama", "--version"])
+    if code == 0:
+        info["ollama_version"] = out
+    else:
+        info["ollama_version_error"] = err or out or "unknown error"
+
+    code, out, err = _run(["ollama", "list"])
+    if code == 0:
+        info["ollama_running"] = True
+        models = []
+        lines = [line for line in out.splitlines() if line.strip()]
+        for line in lines[1:]:
+            models.append(line.split()[0])
+        info["models"] = models
+        if model:
+            info["model_requested"] = model
+            info["model_available"] = any(m.startswith(model) for m in models)
+    else:
+        info["ollama_list_error"] = err or out or "failed to query ollama"
+
+    return info
diff --git a/bitnet_tools/multi_csv.py b/bitnet_tools/multi_csv.py
new file mode 100644
index 0000000..69c9467
--- /dev/null
+++ b/bitnet_tools/multi_csv.py
@@ -0,0 +1,334 @@
+from __future__ import annotations
+
+import csv
+import json
+import math
+from collections import Counter, defaultdict
+from pathlib import Path
+from typing import Any
+
+from .analysis import _to_float, summarize_reader
+
+
+def _quantile(sorted_values: list[float], q: float) -> float:
+    if not sorted_values:
+        return 0.0
+    if len(sorted_values) == 1:
+        return sorted_values[0]
+    pos = (len(sorted_values) - 1) * q
+    low = int(math.floor(pos))
+    high = int(math.ceil(pos))
+    if low == high:
+        return sorted_values[low]
+    weight = pos - low
+    return sorted_values[low] * (1 - weight) + sorted_values[high] * weight
+
+
+def _outlier_ratio(values: list[float]) -> float:
+    if len(values) < 4:
+        return 0.0
+    sorted_values = sorted(values)
+    q1 = _quantile(sorted_values, 0.25)
+    q3 = _quantile(sorted_values, 0.75)
+    iqr = q3 - q1
+    if iqr == 0:
+        return 0.0
+    low = q1 - 1.5 * iqr
+    high = q3 + 1.5 * iqr
+    outliers = sum(1 for v in sorted_values if v < low or v > high)
+    return round(outliers / len(sorted_values), 6)
+
+
+def _group_ratio_table(rows: list[dict[str, str]], group_col: str, target_col: str) -> dict[str, Any]:
+    table: dict[str, Counter[str]] = defaultdict(Counter)
+    for row in rows:
+        g = (row.get(group_col) or "").strip()
+        t = (row.get(target_col) or "").strip()
+        if g and t:
+            table[g][t] += 1
+
+    ratio_table: dict[str, Any] = {}
+    for g, counter in table.items():
+        total = sum(counter.values())
+        ratio_table[g] = {
+            k: {
+                "count": v,
+                "ratio": round(v / total, 6) if total else 0.0,
+            }
+            for k, v in counter.items()
+        }
+
+    return {
+        "group_column": group_col,
+        "target_column": target_col,
+        "groups": ratio_table,
+    }
+
+
+def _profile_rows(
+    rows: list[dict[str, str]],
+    columns: list[str],
+    group_column: str | None = None,
+    target_column: str | None = None,
+) -> dict[str, Any]:
+    row_count = len(rows)
+    missing = {c: 0 for c in columns}
+    non_missing = {c: 0 for c in columns}
+    uniques: dict[str, set[str]] = {c: set() for c in columns}
+    value_counts: dict[str, Counter[str]] = {c: Counter() for c in columns}
+
+    numeric_positive = {c: 0 for c in columns}
+    numeric_zero = {c: 0 for c in columns}
+    numeric_negative = {c: 0 for c in columns}
+    numeric_values: dict[str, list[float]] = {c: [] for c in columns}
+
+    for row in rows:
+        for col in columns:
+            raw = (row.get(col) or "").strip()
+            if not raw:
+                missing[col] += 1
+                continue
+            non_missing[col] += 1
+            uniques[col].add(raw)
+            value_counts[col][raw] += 1
+
+            num = _to_float(raw)
+            if num is not None:
+                numeric_values[col].append(num)
+                if num > 0:
+                    numeric_positive[col] += 1
+                elif num < 0:
+                    numeric_negative[col] += 1
+                else:
+                    numeric_zero[col] += 1
+
+    summary = summarize_reader(rows, columns)
+    profiles: dict[str, Any] = {}
+    for col in columns:
+        nn = non_missing[col]
+        top = value_counts[col].most_common(5)
+        top_values = [
+            {
+                "value": v,
+                "count": cnt,
+                "ratio": round(cnt / row_count, 6) if row_count else 0.0,
+            }
+            for v, cnt in top
+        ]
+
+        numeric_total = numeric_positive[col] + numeric_zero[col] + numeric_negative[col]
+        numeric_distribution: dict[str, float] = {}
+        if numeric_total:
+            numeric_distribution = {
+                "positive_ratio": round(numeric_positive[col] / numeric_total, 6),
+                "zero_ratio": round(numeric_zero[col] / numeric_total, 6),
+                "negative_ratio": round(numeric_negative[col] / numeric_total, 6),
+                "outlier_ratio": _outlier_ratio(numeric_values[col]),
+            }
+
+        dominant_value_ratio = top_values[0]["ratio"] if top_values else 0.0
+        profiles[col] = {
+            "missing_count": missing[col],
+            "missing_ratio": round(missing[col] / row_count, 6) if row_count else 0.0,
+            "non_missing_count": nn,
+            "unique_count": len(uniques[col]),
+            "unique_ratio": round(len(uniques[col]) / nn, 6) if nn else 0.0,
+            "dominant_value_ratio": dominant_value_ratio,
+            "top_values": top_values,
+            "numeric_distribution": numeric_distribution,
+            "dtype": summary.dtypes[col],
+        }
+
+    group_target_ratio: dict[str, Any] | None = None
+    if group_column and target_column and group_column in columns and target_column in columns:
+        group_target_ratio = _group_ratio_table(rows, group_column, target_column)
+
+    return {
+        "summary": summary.to_dict(),
+        "column_profiles": profiles,
+        "group_target_ratio": group_target_ratio,
+    }
+
+
+def _schema_drift(files: list[dict[str, Any]], shared_columns: list[str]) -> dict[str, Any]:
+    drift: dict[str, Any] = {}
+    for col in shared_columns:
+        dtypes = [f["column_profiles"][col]["dtype"] for f in files if col in f["column_profiles"]]
+        missing_ratios = [f["column_profiles"][col]["missing_ratio"] for f in files if col in f["column_profiles"]]
+        dominant_ratios = [f["column_profiles"][col]["dominant_value_ratio"] for f in files if col in f["column_profiles"]]
+
+        means = []
+        for f in files:
+            stats = f["summary"]["numeric_stats"].get(col)
+            if stats:
+                means.append(stats["mean"])
+
+        drift[col] = {
+            "dtype_changed": len(set(dtypes)) > 1,
+            "missing_ratio_range": round(max(missing_ratios) - min(missing_ratios), 6) if missing_ratios else 0.0,
+            "dominant_value_ratio_range": round(max(dominant_ratios) - min(dominant_ratios), 6) if dominant_ratios else 0.0,
+            "mean_range": round(max(means) - min(means), 6) if means else 0.0,
+        }
+    return drift
+
+
+def analyze_multiple_csv(
+    csv_paths: list[Path],
+    question: str,
+    group_column: str | None = None,
+    target_column: str | None = None,
+) -> dict[str, Any]:
+    if not csv_paths:
+        raise ValueError("at least one CSV path is required")
+
+    files: list[dict[str, Any]] = []
+    all_columns: list[set[str]] = []
+    total_rows = 0
+
+    for path in csv_paths:
+        if not path.exists():
+            raise FileNotFoundError(f"CSV file not found: {path}")
+
+        with path.open("r", encoding="utf-8-sig", newline="") as f:
+            reader = csv.DictReader(f)
+            if reader.fieldnames is None:
+                raise ValueError(f"CSV header not found: {path}")
+            columns = [str(c) for c in reader.fieldnames]
+            rows = list(reader)
+
+        profiled = _profile_rows(rows, columns, group_column=group_column, target_column=target_column)
+        total_rows += profiled["summary"]["row_count"]
+        all_columns.append(set(columns))
+
+        files.append(
+            {
+                "path": str(path),
+                "question": question,
+                "summary": profiled["summary"],
+                "column_profiles": profiled["column_profiles"],
+                "group_target_ratio": profiled["group_target_ratio"],
+            }
+        )
+
+    shared_columns = sorted(set.intersection(*all_columns)) if all_columns else []
+    union_columns = sorted(set.union(*all_columns)) if all_columns else []
+
+    return {
+        "question": question,
+        "file_count": len(files),
+        "total_row_count": total_rows,
+        "shared_columns": shared_columns,
+        "union_columns": union_columns,
+        "files": files,
+        "schema_drift": _schema_drift(files, shared_columns),
+        "code_guidance": build_code_guidance(shared_columns, group_column, target_column),
+    }
+
+
+def build_code_guidance(
+    shared_columns: list[str],
+    group_column: str | None = None,
+    target_column: str | None = None,
+) -> dict[str, str]:
+    join_key = shared_columns[0] if shared_columns else "공통키컬럼"
+
+    group_block = ""
+    if group_column and target_column:
+        group_block = (
+            f"ratio_tbl = (merged.groupby('{group_column}')['{target_column}'].value_counts(normalize=True)"
+            ".rename('ratio').reset_index())\n"
+            "print('그룹-타깃 비율표:\n', ratio_tbl.head(20))\n\n"
+        )
+
+    pandas_code = (
+        "import pandas as pd\n"
+        "import matplotlib.pyplot as plt\n\n"
+        "paths = ['file1.csv', 'file2.csv', 'file3.csv']\n"
+        "dfs = [pd.read_csv(p) for p in paths]\n\n"
+        f"key = '{join_key}'\n"
+        "merged = dfs[0]\n"
+        "for df in dfs[1:]:\n"
+        "    if key in merged.columns and key in df.columns:\n"
+        "        merged = merged.merge(df, on=key, how='outer', suffixes=('', '_r'))\n\n"
+        "missing_ratio = merged.isna().mean().sort_values(ascending=False)\n"
+        "print('결측 비율 상위:\n', missing_ratio.head(10))\n\n"
+        "numeric_cols = merged.select_dtypes(include='number').columns\n"
+        "if len(numeric_cols) > 0:\n"
+        "    ratio = (merged[numeric_cols] > 0).mean().sort_values(ascending=False)\n"
+        "    print('양수 비율 상위:\n', ratio.head(10))\n"
+        "    ratio.head(10).plot(kind='bar', title='양수 비율 상위 10개 컬럼')\n"
+        "    plt.tight_layout(); plt.show()\n\n"
+        f"{group_block}"
+    )
+
+    return {
+        "recommended_steps": (
+            "1) 공통 키 컬럼 확인 후 병합\n"
+            "2) 컬럼별 결측/고유값/상위값 비율 확인\n"
+            "3) 수치형 컬럼 비율(양수/0/음수), 이상치 비율, 분포 확인\n"
+            "4) 그룹 컬럼 기준 타깃 비율 분석(예: 시도명-세차유형)\n"
+            "5) 파일 간 스키마 변화/평균 변화 범위 확인"
+        ),
+        "pandas_example": pandas_code,
+    }
+
+
+def build_multi_csv_markdown(result: dict[str, Any]) -> str:
+    lines = [
+        "# 다중 CSV 분석 리포트",
+        "",
+        f"- 질문: {result['question']}",
+        f"- 파일 수: {result['file_count']}",
+        f"- 전체 행 수: {result['total_row_count']}",
+        f"- 공통 컬럼: {', '.join(result['shared_columns']) if result['shared_columns'] else '(없음)'}",
+        "",
+    ]
+
+    for file_info in result["files"]:
+        lines.extend(
+            [
+                f"## 파일: {file_info['path']}",
+                "",
+                f"- 행 수: {file_info['summary']['row_count']}",
+                f"- 열 수: {file_info['summary']['column_count']}",
+                "",
+                "| 컬럼 | 타입 | 결측비율 | 고유비율 | 대표값비율 |",
+                "|---|---|---:|---:|---:|",
+            ]
+        )
+        for col in file_info["summary"]["columns"]:
+            prof = file_info["column_profiles"][col]
+            lines.append(
+                f"| {col} | {prof['dtype']} | {prof['missing_ratio']:.4f} | {prof['unique_ratio']:.4f} | {prof['dominant_value_ratio']:.4f} |"
+            )
+        if file_info.get("group_target_ratio"):
+            gtr = file_info["group_target_ratio"]
+            lines.extend(["", f"- 그룹비율: {gtr['group_column']} x {gtr['target_column']}"])
+        lines.append("")
+
+    lines.extend(["## 파일 간 스키마/분포 변화", "", "| 컬럼 | 타입변화 | 결측비율범위 | 대표값비율범위 | 평균범위 |", "|---|---|---:|---:|---:|"])
+    for col, drift in result["schema_drift"].items():
+        lines.append(
+            f"| {col} | {drift['dtype_changed']} | {drift['missing_ratio_range']:.4f} | {drift['dominant_value_ratio_range']:.4f} | {drift['mean_range']:.4f} |"
+        )
+
+    lines.extend(
+        [
+            "",
+            "## 코드 가이드",
+            "",
+            "```text",
+            result["code_guidance"]["recommended_steps"],
+            "```",
+            "",
+            "```python",
+            result["code_guidance"]["pandas_example"],
+            "```",
+        ]
+    )
+
+    return "\n".join(lines)
+
+
+def result_to_json(result: dict[str, Any]) -> str:
+    return json.dumps(result, ensure_ascii=False, indent=2)
diff --git a/pyproject.toml b/pyproject.toml
index 4f7a543..ae288de 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,6 +12,7 @@ dependencies = []
 
 [project.scripts]
 bitnet-analyze = "bitnet_tools.cli:main"
+bitnet-desktop = "bitnet_tools.desktop:launch_desktop"
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
diff --git a/tests/test_analysis.py b/tests/test_analysis.py
index d9d6454..41de94b 100644
--- a/tests/test_analysis.py
+++ b/tests/test_analysis.py
@@ -2,7 +2,10 @@
     build_analysis_payload,
     build_analysis_payload_from_csv_text,
     summarize_rows,
+    build_markdown_report,
 )
+from bitnet_tools.multi_csv import analyze_multiple_csv, build_multi_csv_markdown
+
 
 
 def test_summarize_rows_basic():
@@ -38,3 +41,51 @@ def test_build_analysis_payload_from_csv_text():
 
     assert payload["csv_path"] == "<inline_csv>"
     assert payload["summary"]["row_count"] == 2
+
+
+def test_streaming_summary_keeps_mixed_type_as_string(tmp_path):
+    p = tmp_path / "mixed.csv"
+    p.write_text("a,b\n1,10\n2,hello\n", encoding="utf-8")
+
+    payload = build_analysis_payload(p, "검증")
+
+    assert payload["summary"]["dtypes"]["b"] == "string"
+    assert "b" not in payload["summary"]["numeric_stats"]
+
+
+def test_build_markdown_report():
+    rows = [{"a": "1", "b": "10"}, {"a": "2", "b": "20"}]
+    summary = summarize_rows(rows, ["a", "b"])
+    report = build_markdown_report(summary, "테스트 질문")
+
+    assert "# BitNet CSV 분석 보고서" in report
+    assert "| a |" in report
+    assert "테스트 질문" in report
+
+
+def test_multi_csv_report_builder(tmp_path):
+    p1 = tmp_path / "a.csv"
+    p2 = tmp_path / "b.csv"
+    p1.write_text("city,v\nseoul,1\n", encoding="utf-8")
+    p2.write_text("city,v2\nseoul,2\n", encoding="utf-8")
+
+    result = analyze_multiple_csv([p1, p2], "비교")
+    report = build_multi_csv_markdown(result)
+
+    assert result["file_count"] == 2
+    assert "city" in result["shared_columns"]
+    assert "다중 CSV 분석 리포트" in report
+
+
+def test_multi_csv_schema_drift_and_group_ratio(tmp_path):
+    p1 = tmp_path / "a.csv"
+    p2 = tmp_path / "b.csv"
+    p1.write_text("city,type,val\nseoul,A,1\nseoul,B,2\n", encoding="utf-8")
+    p2.write_text("city,type,val\nseoul,A,100\nbusan,A,200\n", encoding="utf-8")
+
+    result = analyze_multiple_csv([p1, p2], "드리프트", group_column="city", target_column="type")
+
+    assert "schema_drift" in result
+    assert "val" in result["schema_drift"]
+    assert result["schema_drift"]["val"]["mean_range"] > 0
+    assert result["files"][0]["group_target_ratio"] is not None
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 56d148c..ea1caeb 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -27,3 +27,87 @@ def fake_serve(host: str, port: int):
 
     assert code == 0
     assert called == {"host": "0.0.0.0", "port": 9999}
+
+
+def test_cli_doctor_mode(monkeypatch, capsys):
+    monkeypatch.setattr(
+        cli,
+        "collect_environment",
+        lambda model=None: {"ollama_installed": True, "model_requested": model},
+    )
+
+    code = cli.main(["doctor", "--model", "bitnet:latest"])
+
+    assert code == 0
+    out = capsys.readouterr().out
+    assert '"ollama_installed": true' in out
+    assert '"model_requested": "bitnet:latest"' in out
+
+
+def test_cli_report_mode(tmp_path):
+    csv_path = tmp_path / "sample.csv"
+    out_path = tmp_path / "report.md"
+    csv_path.write_text("a,b\n1,2\n", encoding="utf-8")
+
+    code = cli.main(["report", str(csv_path), "--question", "요약", "--out", str(out_path)])
+
+    assert code == 0
+    assert out_path.exists()
+    assert "BitNet CSV 분석 보고서" in out_path.read_text(encoding="utf-8")
+
+
+def test_cli_multi_analyze_mode(tmp_path):
+    p1 = tmp_path / "a.csv"
+    p2 = tmp_path / "b.csv"
+    out_json = tmp_path / "out.json"
+    out_md = tmp_path / "out.md"
+
+    p1.write_text("city,val\nseoul,1\nbusan,2\n", encoding="utf-8")
+    p2.write_text("city,val2\nseoul,10\ndaegu,20\n", encoding="utf-8")
+
+    code = cli.main([
+        "multi-analyze",
+        str(p1),
+        str(p2),
+        "--question",
+        "다중 비교",
+        "--out-json",
+        str(out_json),
+        "--out-report",
+        str(out_md),
+    ])
+
+    assert code == 0
+    assert out_json.exists()
+    assert out_md.exists()
+    assert "다중 CSV 분석 리포트" in out_md.read_text(encoding="utf-8")
+
+
+def test_cli_multi_analyze_with_group_target(tmp_path):
+    p1 = tmp_path / "a.csv"
+    p2 = tmp_path / "b.csv"
+    out_json = tmp_path / "out2.json"
+    out_md = tmp_path / "out2.md"
+
+    p1.write_text("city,type,val\nseoul,A,1\nseoul,B,2\n", encoding="utf-8")
+    p2.write_text("city,type,val\nseoul,A,10\nbusan,A,20\n", encoding="utf-8")
+
+    code = cli.main([
+        "multi-analyze",
+        str(p1),
+        str(p2),
+        "--question",
+        "그룹비율",
+        "--group-column",
+        "city",
+        "--target-column",
+        "type",
+        "--out-json",
+        str(out_json),
+        "--out-report",
+        str(out_md),
+    ])
+
+    assert code == 0
+    body = out_json.read_text(encoding="utf-8")
+    assert "group_target_ratio" in body