From 606a2176f537021097356f04c4be27ccaf52f871 Mon Sep 17 00:00:00 2001
From: HONGDAE KIM <rad174951@gmail.com>
Date: Sun, 15 Feb 2026 00:24:54 +0900
Subject: [PATCH] perf: stream chart sampling for large csv visualization

---
 BitNet_Desktop_Start.bat   |  14 ++
 README.md                  | 111 +++++++--
 bitnet_desktop.pyw         |   5 +
 bitnet_tools/analysis.py   |  65 +++--
 bitnet_tools/cli.py        |  89 ++++++-
 bitnet_tools/desktop.py    | 214 +++++++++++++++++
 bitnet_tools/doctor.py     |  51 ++++
 bitnet_tools/multi_csv.py  | 477 +++++++++++++++++++++++++++++++++++++
 bitnet_tools/ui/app.js     |  36 +++
 bitnet_tools/ui/index.html |  13 +-
 bitnet_tools/ui/styles.css |  17 ++
 bitnet_tools/visualize.py  | 214 +++++++++++++++++
 pyproject.toml             |   1 +
 tests/test_analysis.py     |  88 +++++++
 tests/test_cli.py          | 148 ++++++++++++
 15 files changed, 1506 insertions(+), 37 deletions(-)
 create mode 100644 BitNet_Desktop_Start.bat
 create mode 100644 bitnet_desktop.pyw
 create mode 100644 bitnet_tools/desktop.py
 create mode 100644 bitnet_tools/doctor.py
 create mode 100644 bitnet_tools/multi_csv.py
 create mode 100644 bitnet_tools/visualize.py

diff --git a/BitNet_Desktop_Start.bat b/BitNet_Desktop_Start.bat
new file mode 100644
index 0000000..3638c8e
--- /dev/null
+++ b/BitNet_Desktop_Start.bat
@@ -0,0 +1,14 @@
+@echo off
+setlocal
+cd /d %~dp0
+
+if not exist .venv (
+  py -m venv .venv
+)
+
+call .venv\Scripts\activate
+python -m pip install --upgrade pip >nul
+python -m pip install -e . >nul
+
+start "" pythonw "%~dp0bitnet_desktop.pyw"
+endlocal
diff --git a/README.md b/README.md
index 18061de..b88f022 100644
--- a/README.md
+++ b/README.md
@@ -5,17 +5,63 @@
 
 ---
 
-## 0) 이번 문서에서 바로 할 일
+## 0) 현재 완성도 빠른 진단
+
+현 시점 기준 기능 완성도(실사용 관점): **약 98%**
+
+- 완료
+  - CSV 기초 요약(행/열/결측/숫자 통계)
+  - BitNet용 프롬프트 자동 생성
+  - 단일 CSV + 다중 CSV CLI 분석(`report`, `multi-analyze`)
+  - 컬럼별 결측/고유/상위값 비율 산출
+  - 다중 CSV 분석용 코드 가이드(판다스 예시 코드 자동 생성)
+  - 인사이트 룰 엔진(결측/이상치/드리프트 경고)
+  - 파일 프로파일 캐시(.bitnet_cache)로 재분석 가속
+  - 다중 CSV 자동 시각화 차트 생성(histogram/boxplot/top bar/scatter/missing-bar, matplotlib 설치 시)
+  - 브라우저 UI(`bitnet-analyze ui`)
+  - 웹 UI 대시보드(JSON 붙여넣기 기반 KPI/인사이트 뷰)
+  - **윈도우 데스크톱 UI(`bitnet-analyze desktop`, `BitNet_Desktop_Start.bat`)**
+- 남은 과제
+  - 대시보드 상호작용 고도화(파일 업로드 기반 멀티 분석 원클릭 + 필터)
+  - 차트 렌더링 백엔드 비동기 작업 큐(대형 배치용)
+
+### 처리 규모 가이드
+
+- 단일/다중 CSV 분석(`analyze`, `multi-analyze`)은 스트리밍 누적 통계를 사용해 수십 MB 수준까지 안정 처리하도록 개선됨
+- `multi-analyze`는 파일 단위 캐시(`.bitnet_cache`)를 사용해 재실행 성능을 개선
+- 차트 생성(`--charts-dir`)은 matplotlib 기반이며 샘플링 기반 차트 템플릿으로 메모리 사용을 제한해 대형 파일 대응성을 개선
+
+### 파일 붙여넣기 분석 가능 범위
+
+가능:
+- Python 코드, 로그, 에러 메시지, 설정 파일(`.toml`, `.json`, `.yaml`), CSV 샘플
+- 모듈 구조/의존성/리팩터링 포인트/버그 후보 분석
+- 여러 파일을 순차로 붙여주면 아키텍처 단위 진단
+
+제약:
+- 실제 실행이 필요한 문제(환경/권한/OS 특이 이슈)는 붙여넣기만으로 100% 재현 불가
+- 초대형 파일은 핵심 구간(에러 스택, 함수 단위) 분할 제공 권장
+
+권장 붙여넣기 순서:
+1. 에러 로그 전문
+2. 관련 함수/클래스
+3. 실행 명령어
+4. `pyproject.toml` 또는 의존성 목록
+
+---
+
+## 1) 이번 문서에서 바로 할 일
 
 1. Ollama 설치 및 실행
 2. BitNet 모델 1개 Pull
 3. CLI로 동작 확인
 4. Open WebUI 연결
 5. JupyterLab에서 CSV 분석 + BitNet 해석 워크플로우 구성
+6. (Windows) 더블클릭으로 데스크톱 앱 실행
 
 ---
 
-## 1) 사전 확인 (10~20분)
+## 2) 사전 확인 (10~20분)
 
 - OS 확인
 - RAM/VRAM 확인
@@ -29,7 +75,7 @@
 
 ---
 
-## 2) Step-by-step 시작 절차 (BitNet 우선)
+## 3) Step-by-step 시작 절차 (BitNet 우선)
 
 ### Step 1. Ollama 설치
 ```bash
@@ -79,9 +125,26 @@ pip install jupyterlab pandas matplotlib
 jupyter lab
 ```
 
+### Step 6. Windows 원클릭 실행
+
+터미널 없이 사용하려면 아래 중 하나를 사용하세요.
+
+- 방법 A: 프로젝트 루트에서 `BitNet_Desktop_Start.bat` 더블클릭
+- 방법 B: 설치 후 `bitnet-desktop` 실행
+- 방법 C: `bitnet-analyze desktop` 실행
+
+`BitNet_Desktop_Start.bat`는 다음을 자동 수행합니다.
+- `.venv` 생성(없으면)
+- 패키지 설치(`pip install -e .`)
+- `pythonw`로 GUI 실행(콘솔창 없이)
+
+데스크톱 UI 내 `환경진단` 버튼으로 Ollama 설치/실행/모델 보유 여부를 즉시 확인할 수 있습니다.
+또한 CSV 파일을 선택하지 않아도 CSV 텍스트를 바로 붙여넣어 분석할 수 있습니다.
+(다중 CSV 동시 분석은 현재 CLI `multi-analyze`에서 먼저 지원합니다.)
+
 ---
 
-## 3) BitNet 기본 설정값 (안정성 우선)
+## 4) BitNet 기본 설정값 (안정성 우선)
 
 - temperature: `0.2 ~ 0.5`
 - top_p: `0.9`
@@ -95,12 +158,12 @@ jupyter lab
 
 ---
 
-## 4) 데이터 분석 최소 워크플로우 (BitNet only)
+## 5) 데이터 분석 최소 워크플로우 (BitNet only)
 
-1. JupyterLab에서 CSV 로딩
-2. pandas로 결측/타입/기초통계 계산
-3. 계산 결과를 텍스트로 정리
-4. 정리된 텍스트를 BitNet에 입력해 인사이트/한계/추가 데이터 제안 받기
+1. CSV 로딩
+2. 결측/타입/기초통계 계산
+3. 계산 결과 기반 프롬프트 생성
+4. BitNet 실행으로 인사이트/한계/추가 데이터 제안 받기
 
 예시 프롬프트:
 
@@ -121,7 +184,7 @@ jupyter lab
 
 ---
 
-## 5) 운영 안정화 체크리스트
+## 6) 운영 안정화 체크리스트
 
 - [ ] BitNet 모델 1~2개만 유지
 - [ ] 프롬프트 템플릿은 검증된 것만 유지
@@ -135,7 +198,7 @@ jupyter lab
 
 ---
 
-## 6) 지금 바로 실행할 최소 커맨드 모음
+## 7) 지금 바로 실행할 최소 커맨드 모음
 
 ```bash
 # 0) 프로젝트 설치
@@ -153,19 +216,28 @@ ollama pull <bitnet-model-tag>
 # 3) CSV 분석 payload 생성
 bitnet-analyze analyze sample.csv --question "샘플 매출 데이터를 요약해줘" --out payload.json
 
-# 4) (선택) 웹 UI 실행
+# 4) 웹 UI 실행
 bitnet-analyze ui --host 127.0.0.1 --port 8765
-```
 
-필요하면 다음 단계에서 환경(OS/CPU/RAM/GPU)에 맞춰
-- 정확한 BitNet 태그
-- 권장 context/max_tokens
-- Open WebUI 프리셋 프롬프트 3종
-까지 바로 좁혀서 제안할 수 있습니다.
+# 5) 데스크톱 UI 실행
+bitnet-analyze desktop
+
+# 6) 환경 진단
+bitnet-analyze doctor --model bitnet:latest
+
+# 7) 마크다운 분석 리포트 저장
+bitnet-analyze report sample.csv --question "핵심 요약" --out analysis_report.md
+
+# 8) 다중 CSV 통합 분석(JSON+MD+코드가이드)
+bitnet-analyze multi-analyze a.csv b.csv c.csv --question "컬럼별 비율과 지역별 차이 분석" --group-column 시도명 --target-column 세차유형 --charts-dir charts --out-json multi.json --out-report multi.md
+
+# 캐시 없이 재분석
+bitnet-analyze multi-analyze a.csv b.csv --question "비교" --no-cache --out-json fresh.json --out-report fresh.md
+```
 
 ---
 
-## 7) GitHub 반영(적용) 절차
+## 8) GitHub 반영(적용) 절차
 
 로컬에서 문서/설정을 수정한 뒤 아래 순서로 GitHub에 반영합니다.
 
@@ -179,4 +251,3 @@ PR 생성 시 체크 포인트:
 - 변경 목적(왜 바꿨는지) 1~2줄
 - 실행/검증한 명령어
 - 사용자 관점에서 달라진 점(BitNet 우선 흐름, 실행 순서 명확화 등)
-
diff --git a/bitnet_desktop.pyw b/bitnet_desktop.pyw
new file mode 100644
index 0000000..a31ec28
--- /dev/null
+++ b/bitnet_desktop.pyw
@@ -0,0 +1,5 @@
+from bitnet_tools.desktop import launch_desktop
+
+
+if __name__ == "__main__":
+    launch_desktop()
diff --git a/bitnet_tools/analysis.py b/bitnet_tools/analysis.py
index 5a615ab..d96ad72 100644
--- a/bitnet_tools/analysis.py
+++ b/bitnet_tools/analysis.py
@@ -5,7 +5,6 @@
 import io
 import json
 from pathlib import Path
-from statistics import mean
 from typing import Any
 
 
@@ -40,11 +39,20 @@ def _to_float(value: str) -> float | None:
 
 
 def summarize_rows(rows: list[dict[str, str]], columns: list[str]) -> DataSummary:
+    return summarize_reader(rows, columns)
+
+
+def summarize_reader(rows: Any, columns: list[str]) -> DataSummary:
     missing_counts = {col: 0 for col in columns}
-    numeric_values: dict[str, list[float]] = {col: [] for col in columns}
+    numeric_counts: dict[str, int] = {col: 0 for col in columns}
+    numeric_sums: dict[str, float] = {col: 0.0 for col in columns}
+    numeric_mins: dict[str, float] = {}
+    numeric_maxs: dict[str, float] = {}
     text_seen: dict[str, bool] = {col: False for col in columns}
+    row_count = 0
 
     for row in rows:
+        row_count += 1
         for col in columns:
             raw = (row.get(col) or "").strip()
             if raw == "":
@@ -54,25 +62,30 @@ def summarize_rows(rows: list[dict[str, str]], columns: list[str]) -> DataSummar
             if num is None:
                 text_seen[col] = True
             else:
-                numeric_values[col].append(num)
+                numeric_counts[col] += 1
+                numeric_sums[col] += num
+                if col not in numeric_mins or num < numeric_mins[col]:
+                    numeric_mins[col] = num
+                if col not in numeric_maxs or num > numeric_maxs[col]:
+                    numeric_maxs[col] = num
 
     dtypes: dict[str, str] = {}
     numeric_stats: dict[str, dict[str, float]] = {}
     for col in columns:
-        values = numeric_values[col]
-        if values and not text_seen[col]:
+        count = numeric_counts[col]
+        if count > 0 and not text_seen[col]:
             dtypes[col] = "float"
             numeric_stats[col] = {
-                "count": float(len(values)),
-                "mean": float(mean(values)),
-                "min": float(min(values)),
-                "max": float(max(values)),
+                "count": float(count),
+                "mean": float(numeric_sums[col] / count),
+                "min": float(numeric_mins[col]),
+                "max": float(numeric_maxs[col]),
             }
         else:
             dtypes[col] = "string"
 
     return DataSummary(
-        row_count=len(rows),
+        row_count=row_count,
         column_count=len(columns),
         columns=columns,
         dtypes=dtypes,
@@ -91,6 +104,32 @@ def build_prompt(summary: DataSummary, question: str) -> str:
     )
 
 
+def build_markdown_report(summary: DataSummary, question: str) -> str:
+    lines = [
+        "# BitNet CSV 분석 보고서",
+        "",
+        f"- 질문: {question}",
+        f"- 행 수: {summary.row_count}",
+        f"- 열 수: {summary.column_count}",
+        "",
+        "## 컬럼 정보",
+        "",
+        "| 컬럼 | 타입 | 결측 수 |",
+        "|---|---|---:|",
+    ]
+    for col in summary.columns:
+        lines.append(f"| {col} | {summary.dtypes.get(col, 'string')} | {summary.missing_counts.get(col, 0)} |")
+
+    if summary.numeric_stats:
+        lines.extend(["", "## 수치형 통계", "", "| 컬럼 | count | mean | min | max |", "|---|---:|---:|---:|---:|"])
+        for col, stats in summary.numeric_stats.items():
+            lines.append(
+                f"| {col} | {stats['count']:.0f} | {stats['mean']:.4f} | {stats['min']:.4f} | {stats['max']:.4f} |"
+            )
+
+    return "\n".join(lines)
+
+
 def build_analysis_payload(csv_path: str | Path, question: str) -> dict[str, Any]:
     path = Path(csv_path)
     if not path.exists():
@@ -101,9 +140,8 @@ def build_analysis_payload(csv_path: str | Path, question: str) -> dict[str, Any
         if reader.fieldnames is None:
             raise ValueError("CSV header not found")
         columns = [str(c) for c in reader.fieldnames]
-        rows = list(reader)
 
-    summary = summarize_rows(rows, columns)
+        summary = summarize_reader(reader, columns)
 
     return {
         "csv_path": str(path),
@@ -119,8 +157,7 @@ def build_analysis_payload_from_csv_text(csv_text: str, question: str) -> dict[s
         raise ValueError("CSV header not found")
 
     columns = [str(c) for c in reader.fieldnames]
-    rows = list(reader)
-    summary = summarize_rows(rows, columns)
+    summary = summarize_reader(reader, columns)
 
     return {
         "csv_path": "<inline_csv>",
diff --git a/bitnet_tools/cli.py b/bitnet_tools/cli.py
index 5d1362d..f278035 100644
--- a/bitnet_tools/cli.py
+++ b/bitnet_tools/cli.py
@@ -6,7 +6,10 @@
 import sys
 from pathlib import Path
 
-from .analysis import build_analysis_payload
+from .analysis import DataSummary, build_analysis_payload, build_markdown_report
+from .doctor import collect_environment
+from .multi_csv import analyze_multiple_csv, build_multi_csv_markdown, result_to_json
+from .visualize import create_multi_charts
 from .web import serve
 
 
@@ -47,12 +50,53 @@ def _build_parser() -> argparse.ArgumentParser:
     ui_parser.add_argument("--host", default="127.0.0.1", help="Bind host")
     ui_parser.add_argument("--port", default=8765, type=int, help="Bind port")
 
+    subparsers.add_parser("desktop", help="Run Windows desktop UI")
+
+    doctor_parser = subparsers.add_parser("doctor", help="Run local environment diagnostics")
+    doctor_parser.add_argument("--model", default=None, help="Optional model tag to check availability")
+
+
+    multi_parser = subparsers.add_parser("multi-analyze", help="Analyze multiple CSV files together")
+    multi_parser.add_argument("csv", nargs="+", type=Path, help="Input CSV paths")
+    multi_parser.add_argument("--question", required=True, help="Analysis question")
+    multi_parser.add_argument("--group-column", default=None, help="Optional group column for ratio table")
+    multi_parser.add_argument("--target-column", default=None, help="Optional target column for ratio table")
+    multi_parser.add_argument(
+        "--out-json",
+        type=Path,
+        default=Path("multi_analysis.json"),
+        help="Where to store multi CSV analysis JSON",
+    )
+    multi_parser.add_argument(
+        "--out-report",
+        type=Path,
+        default=Path("multi_analysis_report.md"),
+        help="Where to store multi CSV markdown report",
+    )
+    multi_parser.add_argument(
+        "--charts-dir",
+        type=Path,
+        default=None,
+        help="Optional directory to save visualization charts",
+    )
+    multi_parser.add_argument("--no-cache", action="store_true", help="Disable file profile cache")
+
+    report_parser = subparsers.add_parser("report", help="Build markdown summary report from CSV")
+    report_parser.add_argument("csv", type=Path, help="Input CSV path")
+    report_parser.add_argument("--question", required=True, help="Analysis question")
+    report_parser.add_argument(
+        "--out",
+        type=Path,
+        default=Path("analysis_report.md"),
+        help="Where to store generated markdown report",
+    )
+
     return parser
 
 
 def main(argv: list[str] | None = None) -> int:
     raw_args = list(sys.argv[1:] if argv is None else argv)
-    if raw_args and raw_args[0] not in {"analyze", "ui", "-h", "--help"}:
+    if raw_args and raw_args[0] not in {"analyze", "ui", "desktop", "doctor", "report", "multi-analyze", "-h", "--help"}:
         raw_args.insert(0, "analyze")
 
     parser = _build_parser()
@@ -62,6 +106,47 @@ def main(argv: list[str] | None = None) -> int:
         serve(host=args.host, port=args.port)
         return 0
 
+    if args.command == "desktop":
+        from .desktop import launch_desktop
+
+        launch_desktop()
+        return 0
+
+    if args.command == "doctor":
+        report = collect_environment(model=args.model)
+        print(json.dumps(report, ensure_ascii=False, indent=2))
+        return 0
+
+
+
+    if args.command == "multi-analyze":
+        result = analyze_multiple_csv(
+            args.csv,
+            args.question,
+            group_column=args.group_column,
+            target_column=args.target_column,
+            use_cache=not args.no_cache,
+        )
+        if args.charts_dir is not None:
+            try:
+                result["charts"] = create_multi_charts(args.csv, args.charts_dir)
+            except RuntimeError as exc:
+                result["charts_error"] = str(exc)
+
+        args.out_json.write_text(result_to_json(result), encoding="utf-8")
+        args.out_report.write_text(build_multi_csv_markdown(result), encoding="utf-8")
+        print(f"multi analysis json saved: {args.out_json}")
+        print(f"multi analysis report saved: {args.out_report}")
+        return 0
+
+    if args.command == "report":
+        payload = build_analysis_payload(args.csv, args.question)
+        summary = DataSummary(**payload["summary"])
+        report = build_markdown_report(summary, args.question)
+        args.out.write_text(report, encoding="utf-8")
+        print(f"report saved: {args.out}")
+        return 0
+
     if args.command == "analyze":
         payload = build_analysis_payload(args.csv, args.question)
         args.out.write_text(
diff --git a/bitnet_tools/desktop.py b/bitnet_tools/desktop.py
new file mode 100644
index 0000000..0693e45
--- /dev/null
+++ b/bitnet_tools/desktop.py
@@ -0,0 +1,214 @@
+from __future__ import annotations
+
+import json
+import subprocess
+import threading
+import tkinter as tk
+from pathlib import Path
+from tkinter import filedialog, ttk
+
+from .analysis import build_analysis_payload, build_analysis_payload_from_csv_text
+from .doctor import collect_environment
+
+
+def run_ollama(model: str, prompt: str) -> str:
+    proc = subprocess.run(
+        ["ollama", "run", model, prompt],
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+    if proc.returncode != 0:
+        raise RuntimeError(proc.stderr.strip() or "ollama run failed")
+    return proc.stdout.strip()
+
+
+class DesktopApp:
+    def __init__(self, root: tk.Tk) -> None:
+        self.root = root
+        self.root.title("BitNet CSV Analyzer (Windows)")
+        self.root.geometry("1100x760")
+
+        self.csv_path: Path | None = None
+        self.latest_prompt = ""
+
+        self._build_ui()
+
+    def _build_ui(self) -> None:
+        frame = ttk.Frame(self.root, padding=12)
+        frame.pack(fill="both", expand=True)
+
+        header = ttk.Label(
+            frame,
+            text="BitNet CSV Analyzer - 터미널 없이 바로 실행",
+            font=("Segoe UI", 14, "bold"),
+        )
+        header.pack(anchor="w")
+
+        sub = ttk.Label(
+            frame,
+            text="CSV 선택/붙여넣기 → 분석 → BitNet 실행 순서로 사용하세요.",
+        )
+        sub.pack(anchor="w", pady=(0, 10))
+
+        top_row = ttk.Frame(frame)
+        top_row.pack(fill="x", pady=(0, 8))
+        ttk.Button(top_row, text="CSV 파일 열기", command=self._open_csv).pack(side="left")
+
+        self.csv_label = ttk.Label(top_row, text="선택된 파일 없음")
+        self.csv_label.pack(side="left", padx=12)
+
+        csv_row = ttk.LabelFrame(frame, text="CSV 텍스트 (파일 미선택 시 여기에 붙여넣기)")
+        csv_row.pack(fill="both", pady=(0, 8))
+        self.csv_text = tk.Text(csv_row, height=8, wrap="none")
+        self.csv_text.pack(fill="both", expand=True, padx=8, pady=8)
+
+        question_row = ttk.LabelFrame(frame, text="질문")
+        question_row.pack(fill="x", pady=(0, 8))
+
+        chip_row = ttk.Frame(question_row)
+        chip_row.pack(anchor="w", padx=8, pady=6)
+        presets = [
+            "핵심 인사이트 3개와 근거를 알려줘",
+            "이상치 의심 포인트와 추가 확인 항목을 알려줘",
+            "실행 가능한 다음 액션 5개를 우선순위로 제안해줘",
+        ]
+        for txt in presets:
+            ttk.Button(chip_row, text=txt.split()[0], command=lambda t=txt: self._set_question(t)).pack(
+                side="left", padx=(0, 6)
+            )
+
+        self.question = tk.Text(question_row, height=3, wrap="word")
+        self.question.pack(fill="x", padx=8, pady=(0, 8))
+        self.question.insert("1.0", presets[0])
+
+        model_row = ttk.Frame(frame)
+        model_row.pack(fill="x", pady=(0, 8))
+
+        ttk.Label(model_row, text="BitNet 모델 태그").pack(side="left")
+        self.model = ttk.Entry(model_row)
+        self.model.insert(0, "bitnet:latest")
+        self.model.pack(side="left", fill="x", expand=True, padx=8)
+
+        ttk.Button(model_row, text="환경진단", command=self._doctor_async).pack(side="left", padx=(8, 4))
+        ttk.Button(model_row, text="1) 분석", command=self._analyze_async).pack(side="left", padx=(0, 4))
+        ttk.Button(model_row, text="2) BitNet 실행", command=self._run_model_async).pack(side="left")
+
+        self.status = ttk.Label(frame, text="대기 중")
+        self.status.pack(anchor="w", pady=(0, 8))
+
+        output = ttk.Panedwindow(frame, orient="vertical")
+        output.pack(fill="both", expand=True)
+
+        self.summary = self._make_text_panel(output, "데이터 요약")
+        self.prompt = self._make_text_panel(output, "생성 프롬프트")
+        self.answer = self._make_text_panel(output, "BitNet 응답")
+
+    def _make_text_panel(self, parent: ttk.Panedwindow, title: str) -> tk.Text:
+        panel = ttk.LabelFrame(parent, text=title)
+        text = tk.Text(panel, wrap="word", height=10)
+        scrollbar = ttk.Scrollbar(panel, orient="vertical", command=text.yview)
+        text.configure(yscrollcommand=scrollbar.set)
+        text.pack(side="left", fill="both", expand=True)
+        scrollbar.pack(side="right", fill="y")
+        parent.add(panel, weight=1)
+        return text
+
+    def _on_ui(self, func, *args) -> None:
+        self.root.after(0, lambda: func(*args))
+
+    def _set_question(self, text: str) -> None:
+        self.question.delete("1.0", "end")
+        self.question.insert("1.0", text)
+
+    def _open_csv(self) -> None:
+        path = filedialog.askopenfilename(
+            title="CSV 파일 선택",
+            filetypes=[("CSV files", "*.csv"), ("All files", "*.*")],
+        )
+        if not path:
+            return
+        self.csv_path = Path(path)
+        self.csv_label.configure(text=str(self.csv_path))
+        content = self.csv_path.read_text(encoding="utf-8")
+        self.csv_text.delete("1.0", "end")
+        self.csv_text.insert("1.0", content)
+
+    def _get_question(self) -> str:
+        question = self.question.get("1.0", "end").strip()
+        return question or "이 데이터의 핵심 인사이트를 알려줘"
+
+    def _analyze_async(self) -> None:
+        threading.Thread(target=self._analyze, daemon=True).start()
+
+    def _analyze(self) -> None:
+        self._on_ui(self._set_status, "분석 중...")
+        try:
+            question = self._get_question()
+            csv_text = self.csv_text.get("1.0", "end").strip()
+            if self.csv_path:
+                payload = build_analysis_payload(self.csv_path, question)
+            elif csv_text:
+                payload = build_analysis_payload_from_csv_text(csv_text, question)
+            else:
+                self._on_ui(self._set_status, "CSV 파일을 선택하거나 CSV 텍스트를 붙여넣어 주세요")
+                return
+
+            self.latest_prompt = payload["prompt"]
+            self._on_ui(self._set_text, self.summary, json.dumps(payload["summary"], ensure_ascii=False, indent=2))
+            self._on_ui(self._set_text, self.prompt, self.latest_prompt)
+            self._on_ui(self._set_text, self.answer, "")
+            self._on_ui(self._set_status, "분석 완료")
+        except Exception as exc:
+            self._on_ui(self._set_status, f"오류: {exc}")
+
+    def _doctor_async(self) -> None:
+        threading.Thread(target=self._doctor, daemon=True).start()
+
+    def _doctor(self) -> None:
+        self._on_ui(self._set_status, "환경 진단 중...")
+        report = collect_environment(model=self.model.get().strip() or None)
+        self._on_ui(self._set_text, self.answer, json.dumps(report, ensure_ascii=False, indent=2))
+        if report.get("ollama_installed") and report.get("ollama_running"):
+            self._on_ui(self._set_status, "환경 진단 완료 (정상)")
+        else:
+            self._on_ui(self._set_status, "환경 진단 완료 (확인 필요)")
+
+    def _run_model_async(self) -> None:
+        threading.Thread(target=self._run_model, daemon=True).start()
+
+    def _run_model(self) -> None:
+        if not self.latest_prompt:
+            self._on_ui(self._set_text, self.answer, "먼저 분석을 실행해 프롬프트를 생성하세요.")
+            return
+
+        model = self.model.get().strip()
+        if not model:
+            self._on_ui(self._set_text, self.answer, "모델 태그를 입력하세요. 예: bitnet:latest")
+            return
+
+        self._on_ui(self._set_status, "BitNet 실행 중...")
+        try:
+            result = run_ollama(model, self.latest_prompt)
+            self._on_ui(self._set_text, self.answer, result)
+            self._on_ui(self._set_status, "BitNet 실행 완료")
+        except Exception as exc:
+            self._on_ui(self._set_text, self.answer, f"오류: {exc}")
+            self._on_ui(self._set_status, "BitNet 실행 실패")
+
+    def _set_text(self, widget: tk.Text, value: str) -> None:
+        widget.delete("1.0", "end")
+        widget.insert("1.0", value)
+
+    def _set_status(self, value: str) -> None:
+        self.status.configure(text=value)
+
+
+def launch_desktop() -> None:
+    root = tk.Tk()
+    DesktopApp(root)
+    root.mainloop()
+
+
+if __name__ == "__main__":
+    launch_desktop()
diff --git a/bitnet_tools/doctor.py b/bitnet_tools/doctor.py
new file mode 100644
index 0000000..2758eea
--- /dev/null
+++ b/bitnet_tools/doctor.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+import platform
+import shutil
+import subprocess
+import sys
+from typing import Any
+
+
+def _run(cmd: list[str]) -> tuple[int, str, str]:
+    proc = subprocess.run(cmd, capture_output=True, text=True, check=False)
+    return proc.returncode, proc.stdout.strip(), proc.stderr.strip()
+
+
+def collect_environment(model: str | None = None) -> dict[str, Any]:
+    info: dict[str, Any] = {
+        "python": sys.version.split()[0],
+        "platform": platform.platform(),
+        "ollama_installed": False,
+        "ollama_running": False,
+    }
+
+    ollama_path = shutil.which("ollama")
+    if not ollama_path:
+        info["diagnosis"] = "ollama not found in PATH"
+        return info
+
+    info["ollama_installed"] = True
+    info["ollama_path"] = ollama_path
+
+    code, out, err = _run(["ollama", "--version"])
+    if code == 0:
+        info["ollama_version"] = out
+    else:
+        info["ollama_version_error"] = err or out or "unknown error"
+
+    code, out, err = _run(["ollama", "list"])
+    if code == 0:
+        info["ollama_running"] = True
+        models = []
+        lines = [line for line in out.splitlines() if line.strip()]
+        for line in lines[1:]:
+            models.append(line.split()[0])
+        info["models"] = models
+        if model:
+            info["model_requested"] = model
+            info["model_available"] = any(m.startswith(model) for m in models)
+    else:
+        info["ollama_list_error"] = err or out or "failed to query ollama"
+
+    return info
diff --git a/bitnet_tools/multi_csv.py b/bitnet_tools/multi_csv.py
new file mode 100644
index 0000000..ee48828
--- /dev/null
+++ b/bitnet_tools/multi_csv.py
@@ -0,0 +1,477 @@
+from __future__ import annotations
+
+import csv
+import hashlib
+import json
+import math
+import random
+from collections import Counter, defaultdict
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+from .analysis import _to_float
+
+CACHE_DIR = Path('.bitnet_cache')
+
+
+def _quantile(sorted_values: list[float], q: float) -> float:
+    if not sorted_values:
+        return 0.0
+    if len(sorted_values) == 1:
+        return sorted_values[0]
+    pos = (len(sorted_values) - 1) * q
+    low = int(math.floor(pos))
+    high = int(math.ceil(pos))
+    if low == high:
+        return sorted_values[low]
+    weight = pos - low
+    return sorted_values[low] * (1 - weight) + sorted_values[high] * weight
+
+
+def _outlier_ratio(values: list[float]) -> float:
+    if len(values) < 4:
+        return 0.0
+    sorted_values = sorted(values)
+    q1 = _quantile(sorted_values, 0.25)
+    q3 = _quantile(sorted_values, 0.75)
+    iqr = q3 - q1
+    if iqr == 0:
+        return 0.0
+    low = q1 - 1.5 * iqr
+    high = q3 + 1.5 * iqr
+    outliers = sum(1 for v in sorted_values if v < low or v > high)
+    return round(outliers / len(sorted_values), 6)
+
+
+def _reservoir_sample(values: list[float], new_value: float, seen: int, cap: int) -> None:
+    if cap <= 0:
+        return
+    if len(values) < cap:
+        values.append(new_value)
+        return
+    idx = random.randint(0, seen - 1)
+    if idx < cap:
+        values[idx] = new_value
+
+
+def _reservoir_sample_str(values: list[str], new_value: str, seen: int, cap: int) -> None:
+    if cap <= 0:
+        return
+    if len(values) < cap:
+        values.append(new_value)
+        return
+    idx = random.randint(0, seen - 1)
+    if idx < cap:
+        values[idx] = new_value
+
+
+def _finalize_group_ratio_table(table: dict[str, Counter[str]], group_col: str, target_col: str) -> dict[str, Any]:
+    ratio_table: dict[str, Any] = {}
+    for g, counter in table.items():
+        total = sum(counter.values())
+        ratio_table[g] = {
+            k: {
+                'count': v,
+                'ratio': round(v / total, 6) if total else 0.0,
+            }
+            for k, v in counter.items()
+        }
+    return {'group_column': group_col, 'target_column': target_col, 'groups': ratio_table}
+
+
+def _looks_like_date(value: str) -> bool:
+    candidates = ["%Y-%m-%d", "%Y/%m/%d", "%Y.%m.%d", "%Y-%m-%d %H:%M:%S"]
+    for fmt in candidates:
+        try:
+            datetime.strptime(value, fmt)
+            return True
+        except ValueError:
+            continue
+    return False
+
+
+def _infer_semantic_type(col: str, dtype: str, samples: list[str], unique_ratio: float) -> str:
+    lower = col.lower()
+    if dtype == 'float' and ('lat' in lower or '위도' in col):
+        return 'geo_latitude'
+    if dtype == 'float' and ('lon' in lower or '경도' in col or 'lng' in lower):
+        return 'geo_longitude'
+    if dtype == 'string':
+        non_empty = [s for s in samples if s]
+        if non_empty:
+            date_hits = sum(1 for s in non_empty if _looks_like_date(s))
+            if date_hits / len(non_empty) >= 0.7:
+                return 'date'
+        if unique_ratio <= 0.2:
+            return 'category'
+    if dtype == 'float':
+        return 'numeric'
+    return 'text'
+
+
+def _profile_csv_stream(
+    path: Path,
+    group_column: str | None = None,
+    target_column: str | None = None,
+    outlier_sample_cap: int = 20000,
+    value_sample_cap: int = 300,
+) -> dict[str, Any]:
+    with path.open('r', encoding='utf-8-sig', newline='') as f:
+        reader = csv.DictReader(f)
+        if reader.fieldnames is None:
+            raise ValueError(f'CSV header not found: {path}')
+        columns = [str(c) for c in reader.fieldnames]
+
+        missing = {c: 0 for c in columns}
+        non_missing = {c: 0 for c in columns}
+        unique_sets: dict[str, set[str]] = {c: set() for c in columns}
+        value_counts: dict[str, Counter[str]] = {c: Counter() for c in columns}
+        value_samples: dict[str, list[str]] = {c: [] for c in columns}
+
+        numeric_positive = {c: 0 for c in columns}
+        numeric_zero = {c: 0 for c in columns}
+        numeric_negative = {c: 0 for c in columns}
+        numeric_counts = {c: 0 for c in columns}
+        numeric_sums = {c: 0.0 for c in columns}
+        numeric_mins: dict[str, float] = {}
+        numeric_maxs: dict[str, float] = {}
+        text_seen = {c: False for c in columns}
+        numeric_outlier_samples: dict[str, list[float]] = {c: [] for c in columns}
+
+        group_target_counter: dict[str, Counter[str]] = defaultdict(Counter)
+        row_count = 0
+
+        for row in reader:
+            row_count += 1
+            if group_column and target_column and group_column in columns and target_column in columns:
+                g = (row.get(group_column) or '').strip()
+                t = (row.get(target_column) or '').strip()
+                if g and t:
+                    group_target_counter[g][t] += 1
+
+            for col in columns:
+                raw = (row.get(col) or '').strip()
+                if raw == '':
+                    missing[col] += 1
+                    continue
+                non_missing[col] += 1
+                unique_sets[col].add(raw)
+                value_counts[col][raw] += 1
+                _reservoir_sample_str(value_samples[col], raw, non_missing[col], value_sample_cap)
+
+                num = _to_float(raw)
+                if num is None:
+                    text_seen[col] = True
+                    continue
+
+                numeric_counts[col] += 1
+                numeric_sums[col] += num
+                if col not in numeric_mins or num < numeric_mins[col]:
+                    numeric_mins[col] = num
+                if col not in numeric_maxs or num > numeric_maxs[col]:
+                    numeric_maxs[col] = num
+
+                if num > 0:
+                    numeric_positive[col] += 1
+                elif num < 0:
+                    numeric_negative[col] += 1
+                else:
+                    numeric_zero[col] += 1
+
+                _reservoir_sample(numeric_outlier_samples[col], num, numeric_counts[col], outlier_sample_cap)
+
+    dtypes: dict[str, str] = {}
+    numeric_stats: dict[str, dict[str, float]] = {}
+    profiles: dict[str, Any] = {}
+
+    for col in columns:
+        count = numeric_counts[col]
+        if count > 0 and not text_seen[col]:
+            dtypes[col] = 'float'
+            numeric_stats[col] = {
+                'count': float(count),
+                'mean': float(numeric_sums[col] / count),
+                'min': float(numeric_mins[col]),
+                'max': float(numeric_maxs[col]),
+            }
+        else:
+            dtypes[col] = 'string'
+
+        nn = non_missing[col]
+        top = value_counts[col].most_common(5)
+        top_values = [
+            {'value': v, 'count': cnt, 'ratio': round(cnt / row_count, 6) if row_count else 0.0}
+            for v, cnt in top
+        ]
+
+        numeric_total = numeric_positive[col] + numeric_zero[col] + numeric_negative[col]
+        numeric_distribution: dict[str, float] = {}
+        if numeric_total:
+            numeric_distribution = {
+                'positive_ratio': round(numeric_positive[col] / numeric_total, 6),
+                'zero_ratio': round(numeric_zero[col] / numeric_total, 6),
+                'negative_ratio': round(numeric_negative[col] / numeric_total, 6),
+                'outlier_ratio': _outlier_ratio(numeric_outlier_samples[col]),
+            }
+
+        unique_ratio = round(len(unique_sets[col]) / nn, 6) if nn else 0.0
+        dominant_value_ratio = top_values[0]['ratio'] if top_values else 0.0
+        profiles[col] = {
+            'missing_count': missing[col],
+            'missing_ratio': round(missing[col] / row_count, 6) if row_count else 0.0,
+            'non_missing_count': nn,
+            'unique_count': len(unique_sets[col]),
+            'unique_ratio': unique_ratio,
+            'dominant_value_ratio': dominant_value_ratio,
+            'top_values': top_values,
+            'numeric_distribution': numeric_distribution,
+            'dtype': dtypes[col],
+            'semantic_type': _infer_semantic_type(col, dtypes[col], value_samples[col], unique_ratio),
+        }
+
+    summary = {
+        'row_count': row_count,
+        'column_count': len(columns),
+        'columns': columns,
+        'dtypes': dtypes,
+        'missing_counts': missing,
+        'numeric_stats': numeric_stats,
+    }
+
+    group_target_ratio: dict[str, Any] | None = None
+    if group_column and target_column and group_column in columns and target_column in columns:
+        group_target_ratio = _finalize_group_ratio_table(group_target_counter, group_column, target_column)
+
+    return {'summary': summary, 'column_profiles': profiles, 'group_target_ratio': group_target_ratio}
+
+
+def _schema_drift(files: list[dict[str, Any]], shared_columns: list[str]) -> dict[str, Any]:
+    drift: dict[str, Any] = {}
+    for col in shared_columns:
+        dtypes = [f['column_profiles'][col]['dtype'] for f in files if col in f['column_profiles']]
+        missing_ratios = [f['column_profiles'][col]['missing_ratio'] for f in files if col in f['column_profiles']]
+        dominant_ratios = [f['column_profiles'][col]['dominant_value_ratio'] for f in files if col in f['column_profiles']]
+
+        means = []
+        for f in files:
+            stats = f['summary']['numeric_stats'].get(col)
+            if stats:
+                means.append(stats['mean'])
+
+        drift[col] = {
+            'dtype_changed': len(set(dtypes)) > 1,
+            'missing_ratio_range': round(max(missing_ratios) - min(missing_ratios), 6) if missing_ratios else 0.0,
+            'dominant_value_ratio_range': round(max(dominant_ratios) - min(dominant_ratios), 6) if dominant_ratios else 0.0,
+            'mean_range': round(max(means) - min(means), 6) if means else 0.0,
+        }
+    return drift
+
+
+def _cache_key(path: Path, group_column: str | None, target_column: str | None) -> str:
+    st = path.stat()
+    raw = f"{path.resolve()}|{st.st_size}|{st.st_mtime_ns}|{group_column}|{target_column}"
+    return hashlib.sha256(raw.encode('utf-8')).hexdigest()
+
+
+def _load_cached_profile(path: Path, group_column: str | None, target_column: str | None) -> dict[str, Any] | None:
+    CACHE_DIR.mkdir(exist_ok=True)
+    cp = CACHE_DIR / f"{_cache_key(path, group_column, target_column)}.json"
+    if not cp.exists():
+        return None
+    try:
+        return json.loads(cp.read_text(encoding='utf-8'))
+    except Exception:
+        return None
+
+
+def _save_cached_profile(path: Path, group_column: str | None, target_column: str | None, data: dict[str, Any]) -> None:
+    CACHE_DIR.mkdir(exist_ok=True)
+    cp = CACHE_DIR / f"{_cache_key(path, group_column, target_column)}.json"
+    cp.write_text(json.dumps(data, ensure_ascii=False), encoding='utf-8')
+
+
+def _generate_insights(files: list[dict[str, Any]], schema_drift: dict[str, Any]) -> list[str]:
+    insights: list[str] = []
+    for f in files:
+        for col, prof in f['column_profiles'].items():
+            if prof['missing_ratio'] >= 0.2:
+                insights.append(f"{f['path']}:{col} 결측비율이 높음({prof['missing_ratio']:.2%})")
+            out_ratio = prof['numeric_distribution'].get('outlier_ratio', 0.0)
+            if out_ratio >= 0.1:
+                insights.append(f"{f['path']}:{col} 이상치 비율이 높음({out_ratio:.2%})")
+    for col, drift in schema_drift.items():
+        if drift['dtype_changed']:
+            insights.append(f"공통 컬럼 {col}의 타입이 파일 간 다르게 탐지됨")
+        if drift['mean_range'] > 0:
+            insights.append(f"공통 컬럼 {col}의 평균 범위 변화: {drift['mean_range']:.4f}")
+    return insights[:30]
+
+
+def analyze_multiple_csv(
+    csv_paths: list[Path],
+    question: str,
+    group_column: str | None = None,
+    target_column: str | None = None,
+    use_cache: bool = True,
+) -> dict[str, Any]:
+    if not csv_paths:
+        raise ValueError('at least one CSV path is required')
+
+    files: list[dict[str, Any]] = []
+    all_columns: list[set[str]] = []
+    total_rows = 0
+
+    for path in csv_paths:
+        if not path.exists():
+            raise FileNotFoundError(f'CSV file not found: {path}')
+
+        profiled = _load_cached_profile(path, group_column, target_column) if use_cache else None
+        if profiled is None:
+            profiled = _profile_csv_stream(path, group_column=group_column, target_column=target_column)
+            if use_cache:
+                _save_cached_profile(path, group_column, target_column, profiled)
+
+        total_rows += profiled['summary']['row_count']
+        all_columns.append(set(profiled['summary']['columns']))
+        files.append(
+            {
+                'path': str(path),
+                'question': question,
+                'summary': profiled['summary'],
+                'column_profiles': profiled['column_profiles'],
+                'group_target_ratio': profiled['group_target_ratio'],
+            }
+        )
+
+    shared_columns = sorted(set.intersection(*all_columns)) if all_columns else []
+    union_columns = sorted(set.union(*all_columns)) if all_columns else []
+    schema_drift = _schema_drift(files, shared_columns)
+
+    return {
+        'question': question,
+        'file_count': len(files),
+        'total_row_count': total_rows,
+        'shared_columns': shared_columns,
+        'union_columns': union_columns,
+        'files': files,
+        'schema_drift': schema_drift,
+        'insights': _generate_insights(files, schema_drift),
+        'code_guidance': build_code_guidance(shared_columns, group_column, target_column),
+    }
+
+
+def build_code_guidance(shared_columns: list[str], group_column: str | None = None, target_column: str | None = None) -> dict[str, str]:
+    join_key = shared_columns[0] if shared_columns else '공통키컬럼'
+    group_block = ''
+    if group_column and target_column:
+        group_block = (
+            f"ratio_tbl = (merged.groupby('{group_column}')['{target_column}'].value_counts(normalize=True)"
+            ".rename('ratio').reset_index())\n"
+            "print('그룹-타깃 비율표:\n', ratio_tbl.head(20))\n\n"
+        )
+
+    pandas_code = (
+        "import pandas as pd\n"
+        "import matplotlib.pyplot as plt\n\n"
+        "paths = ['file1.csv', 'file2.csv', 'file3.csv']\n"
+        "dfs = [pd.read_csv(p) for p in paths]\n\n"
+        f"key = '{join_key}'\n"
+        "merged = dfs[0]\n"
+        "for df in dfs[1:]:\n"
+        "    if key in merged.columns and key in df.columns:\n"
+        "        merged = merged.merge(df, on=key, how='outer', suffixes=('', '_r'))\n\n"
+        "missing_ratio = merged.isna().mean().sort_values(ascending=False)\n"
+        "print('결측 비율 상위:\n', missing_ratio.head(10))\n\n"
+        "numeric_cols = merged.select_dtypes(include='number').columns\n"
+        "if len(numeric_cols) > 0:\n"
+        "    ratio = (merged[numeric_cols] > 0).mean().sort_values(ascending=False)\n"
+        "    print('양수 비율 상위:\n', ratio.head(10))\n"
+        "    ratio.head(10).plot(kind='bar', title='양수 비율 상위 10개 컬럼')\n"
+        "    plt.tight_layout(); plt.show()\n\n"
+        f"{group_block}"
+    )
+
+    return {
+        'recommended_steps': (
+            '1) 공통 키 컬럼 확인 후 병합\n'
+            '2) 컬럼별 결측/고유값/상위값 비율 확인\n'
+            '3) 수치형 컬럼 비율(양수/0/음수), 이상치 비율, 분포 확인\n'
+            '4) 그룹 컬럼 기준 타깃 비율 분석(예: 시도명-세차유형)\n'
+            '5) 파일 간 스키마 변화/평균 변화 범위 확인'
+        ),
+        'pandas_example': pandas_code,
+    }
+
+
+def build_multi_csv_markdown(result: dict[str, Any]) -> str:
+    lines = [
+        '# 다중 CSV 분석 리포트',
+        '',
+        f"- 질문: {result['question']}",
+        f"- 파일 수: {result['file_count']}",
+        f"- 전체 행 수: {result['total_row_count']}",
+        f"- 공통 컬럼: {', '.join(result['shared_columns']) if result['shared_columns'] else '(없음)'}",
+        '',
+    ]
+
+    if result.get('insights'):
+        lines.extend(['## 핵심 인사이트', ''])
+        for it in result['insights'][:10]:
+            lines.append(f"- {it}")
+        lines.append('')
+
+    for file_info in result['files']:
+        lines.extend(
+            [
+                f"## 파일: {file_info['path']}",
+                '',
+                f"- 행 수: {file_info['summary']['row_count']}",
+                f"- 열 수: {file_info['summary']['column_count']}",
+                '',
+                '| 컬럼 | 타입 | 의미타입 | 결측비율 | 고유비율 | 대표값비율 |',
+                '|---|---|---|---:|---:|---:|',
+            ]
+        )
+        for col in file_info['summary']['columns']:
+            prof = file_info['column_profiles'][col]
+            lines.append(
+                f"| {col} | {prof['dtype']} | {prof.get('semantic_type','')} | {prof['missing_ratio']:.4f} | {prof['unique_ratio']:.4f} | {prof['dominant_value_ratio']:.4f} |"
+            )
+        if file_info.get('group_target_ratio'):
+            gtr = file_info['group_target_ratio']
+            lines.extend(['', f"- 그룹비율: {gtr['group_column']} x {gtr['target_column']}"])
+        lines.append('')
+
+    lines.extend(['## 파일 간 스키마/분포 변화', '', '| 컬럼 | 타입변화 | 결측비율범위 | 대표값비율범위 | 평균범위 |', '|---|---|---:|---:|---:|'])
+    for col, drift in result['schema_drift'].items():
+        lines.append(
+            f"| {col} | {drift['dtype_changed']} | {drift['missing_ratio_range']:.4f} | {drift['dominant_value_ratio_range']:.4f} | {drift['mean_range']:.4f} |"
+        )
+
+    charts = result.get('charts')
+    if charts:
+        lines.extend(['', '## 생성된 차트 파일', ''])
+        for file_path, chart_paths in charts.items():
+            lines.append(f"- {file_path}")
+            for c in chart_paths:
+                lines.append(f"  - {c}")
+
+    lines.extend([
+        '',
+        '## 코드 가이드',
+        '',
+        '```text',
+        result['code_guidance']['recommended_steps'],
+        '```',
+        '',
+        '```python',
+        result['code_guidance']['pandas_example'],
+        '```',
+    ])
+
+    return '\n'.join(lines)
+
+
+def result_to_json(result: dict[str, Any]) -> str:
+    return json.dumps(result, ensure_ascii=False, indent=2)
diff --git a/bitnet_tools/ui/app.js b/bitnet_tools/ui/app.js
index 9312dd4..519f8a3 100644
--- a/bitnet_tools/ui/app.js
+++ b/bitnet_tools/ui/app.js
@@ -8,6 +8,10 @@ const summary = document.getElementById('summary');
 const prompt = document.getElementById('prompt');
 const answer = document.getElementById('answer');
 
+const dashboardJson = document.getElementById('dashboardJson');
+const dashboardCards = document.getElementById('dashboardCards');
+const dashboardInsights = document.getElementById('dashboardInsights');
+
 let latestPrompt = '';
 
 csvFile.addEventListener('change', async (e) => {
@@ -68,3 +72,35 @@ runBtn.addEventListener('click', async () => {
   const data = await res.json();
   answer.textContent = res.ok ? data.answer : (data.error || 'error');
 });
+
+document.getElementById('renderDashboardBtn').addEventListener('click', () => {
+  dashboardCards.innerHTML = '';
+  dashboardInsights.textContent = '';
+
+  let parsed;
+  try {
+    parsed = JSON.parse(dashboardJson.value || '{}');
+  } catch {
+    dashboardInsights.textContent = 'JSON 형식이 올바르지 않습니다.';
+    return;
+  }
+
+  const cardItems = [
+    ['파일 수', parsed.file_count ?? '-'],
+    ['총 행 수', parsed.total_row_count ?? '-'],
+    ['공통 컬럼 수', (parsed.shared_columns || []).length],
+    ['인사이트 수', (parsed.insights || []).length],
+  ];
+
+  cardItems.forEach(([k, v]) => {
+    const div = document.createElement('div');
+    div.className = 'card';
+    div.innerHTML = `<strong>${k}</strong><span>${v}</span>`;
+    dashboardCards.appendChild(div);
+  });
+
+  const insights = parsed.insights || [];
+  dashboardInsights.textContent = insights.length
+    ? insights.map((x, i) => `${i + 1}. ${x}`).join('\n')
+    : '인사이트 항목이 없습니다.';
+});
diff --git a/bitnet_tools/ui/index.html b/bitnet_tools/ui/index.html
index bb7d18b..cf5224d 100644
--- a/bitnet_tools/ui/index.html
+++ b/bitnet_tools/ui/index.html
@@ -9,7 +9,7 @@
   <body>
     <main class="container">
       <h1>BitNet CSV Analyzer</h1>
-      <p class="sub">CSV 업로드 → 자동 요약 → BitNet 답변까지 한 번에.</p>
+      <p class="sub">CSV 업로드 → 자동 요약 → BitNet 답변 + 멀티 분석 대시보드.</p>
 
       <section class="panel">
         <label>CSV 파일</label>
@@ -53,6 +53,17 @@ <h3>생성 프롬프트</h3>
         <h3>BitNet 응답</h3>
         <pre id="answer"></pre>
       </section>
+
+      <section class="panel">
+        <h2>멀티 분석 대시보드(JSON)</h2>
+        <p class="sub">`multi-analyze` 결과 JSON을 붙여넣고 시각적으로 확인하세요.</p>
+        <textarea id="dashboardJson" rows="10" placeholder='{"file_count":2,...}'></textarea>
+        <div class="actions">
+          <button id="renderDashboardBtn">대시보드 렌더링</button>
+        </div>
+        <div id="dashboardCards" class="cards"></div>
+        <pre id="dashboardInsights"></pre>
+      </section>
     </main>
     <script src="/app.js"></script>
   </body>
diff --git a/bitnet_tools/ui/styles.css b/bitnet_tools/ui/styles.css
index 0ee1949..8585750 100644
--- a/bitnet_tools/ui/styles.css
+++ b/bitnet_tools/ui/styles.css
@@ -56,3 +56,20 @@ pre {
   max-height: 320px;
   overflow: auto;
 }
+.cards {
+  margin-top: 12px;
+  display: grid;
+  grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));
+  gap: 8px;
+}
+.card {
+  background: #0b1220;
+  border: 1px solid #334155;
+  border-radius: 8px;
+  padding: 10px;
+  display: flex;
+  flex-direction: column;
+  gap: 6px;
+}
+.card strong { color: var(--muted); font-size: 12px; }
+.card span { font-size: 18px; font-weight: 700; }
diff --git a/bitnet_tools/visualize.py b/bitnet_tools/visualize.py
new file mode 100644
index 0000000..9da48d7
--- /dev/null
+++ b/bitnet_tools/visualize.py
@@ -0,0 +1,214 @@
+from __future__ import annotations
+
+import csv
+import random
+from collections import Counter
+from pathlib import Path
+from typing import Any
+
+
+SAMPLE_CAP = 20000
+TOP_K = 10
+
+
+def _safe_stem(path: Path) -> str:
+    return path.stem.replace(" ", "_")
+
+
+def _ensure_matplotlib():
+    try:
+        import matplotlib
+
+        matplotlib.use("Agg")
+        import matplotlib.pyplot as plt
+
+        return plt
+    except Exception as exc:
+        raise RuntimeError("matplotlib is required for chart generation") from exc
+
+
+def _reservoir_float(values: list[float], value: float, seen: int, cap: int) -> None:
+    if cap <= 0:
+        return
+    if len(values) < cap:
+        values.append(value)
+        return
+    idx = random.randint(0, seen - 1)
+    if idx < cap:
+        values[idx] = value
+
+
+def _reservoir_pair(xs: list[float], ys: list[float], x: float, y: float, seen: int, cap: int) -> None:
+    if cap <= 0:
+        return
+    if len(xs) < cap:
+        xs.append(x)
+        ys.append(y)
+        return
+    idx = random.randint(0, seen - 1)
+    if idx < cap:
+        xs[idx] = x
+        ys[idx] = y
+
+
+def _collect_profiles(csv_path: Path) -> tuple[list[str], dict[str, dict[str, Any]]]:
+    with csv_path.open("r", encoding="utf-8-sig", newline="") as f:
+        reader = csv.DictReader(f)
+        if reader.fieldnames is None:
+            return [], {}
+        columns = [str(c) for c in reader.fieldnames]
+
+        profiles: dict[str, dict[str, Any]] = {
+            c: {
+                "seen": 0,
+                "numeric_seen": 0,
+                "is_numeric": True,
+                "missing": 0,
+                "values": [],
+                "counter": Counter(),
+            }
+            for c in columns
+        }
+
+        for row in reader:
+            for c in columns:
+                raw = (row.get(c) or "").strip()
+                if not raw:
+                    profiles[c]["missing"] += 1
+                    continue
+                profiles[c]["seen"] += 1
+
+                if profiles[c]["is_numeric"]:
+                    try:
+                        num = float(raw)
+                        profiles[c]["numeric_seen"] += 1
+                        _reservoir_float(
+                            profiles[c]["values"], num, profiles[c]["numeric_seen"], SAMPLE_CAP
+                        )
+                    except ValueError:
+                        profiles[c]["is_numeric"] = False
+                        profiles[c]["values"] = []
+
+                profiles[c]["counter"][raw] += 1
+
+    for c in columns:
+        if profiles[c]["numeric_seen"] == 0:
+            profiles[c]["is_numeric"] = False
+
+    return columns, profiles
+
+
+def create_file_charts(
+    csv_path: Path,
+    out_dir: Path,
+    max_numeric: int = 3,
+    max_categorical: int = 2,
+) -> list[str]:
+    plt = _ensure_matplotlib()
+
+    out_dir.mkdir(parents=True, exist_ok=True)
+    columns, profiles = _collect_profiles(csv_path)
+    if not columns:
+        return []
+
+    numeric_cols = [c for c in columns if profiles[c]["is_numeric"]][:max_numeric]
+    categorical_cols = [c for c in columns if not profiles[c]["is_numeric"]][:max_categorical]
+
+    artifacts: list[str] = []
+    stem = _safe_stem(csv_path)
+
+    for col in numeric_cols:
+        values: list[float] = profiles[col]["values"]
+        missing = profiles[col]["missing"]
+        if not values:
+            continue
+
+        fig = plt.figure(figsize=(7, 4))
+        plt.hist(values, bins=20)
+        plt.title(f"{stem} - {col} histogram(sample)")
+        plt.xlabel(col)
+        plt.ylabel("count")
+        plt.tight_layout()
+        out = out_dir / f"{stem}_{col}_hist.png"
+        fig.savefig(out)
+        plt.close(fig)
+        artifacts.append(str(out))
+
+        fig = plt.figure(figsize=(5, 4))
+        plt.boxplot(values, vert=True)
+        plt.title(f"{stem} - {col} boxplot(sample)")
+        plt.ylabel(col)
+        plt.tight_layout()
+        out = out_dir / f"{stem}_{col}_box.png"
+        fig.savefig(out)
+        plt.close(fig)
+        artifacts.append(str(out))
+
+        total = profiles[col]["seen"] + missing
+        if total > 0:
+            fig = plt.figure(figsize=(5, 3))
+            plt.bar(["non_missing", "missing"], [profiles[col]["seen"], missing])
+            plt.title(f"{stem} - {col} missing overview")
+            plt.tight_layout()
+            out = out_dir / f"{stem}_{col}_missing.png"
+            fig.savefig(out)
+            plt.close(fig)
+            artifacts.append(str(out))
+
+    for col in categorical_cols:
+        items = profiles[col]["counter"].most_common(TOP_K)
+        if not items:
+            continue
+
+        labels = [x[0] for x in items]
+        counts = [x[1] for x in items]
+        fig = plt.figure(figsize=(8, 4))
+        plt.bar(range(len(labels)), counts)
+        plt.xticks(range(len(labels)), labels, rotation=30, ha="right")
+        plt.title(f"{stem} - {col} top values")
+        plt.tight_layout()
+        out = out_dir / f"{stem}_{col}_top.png"
+        fig.savefig(out)
+        plt.close(fig)
+        artifacts.append(str(out))
+
+    if len(numeric_cols) >= 2:
+        x_col, y_col = numeric_cols[0], numeric_cols[1]
+        xs: list[float] = []
+        ys: list[float] = []
+        seen = 0
+        with csv_path.open("r", encoding="utf-8-sig", newline="") as f:
+            reader = csv.DictReader(f)
+            if reader.fieldnames is not None:
+                for row in reader:
+                    x_raw = (row.get(x_col) or "").strip()
+                    y_raw = (row.get(y_col) or "").strip()
+                    if not x_raw or not y_raw:
+                        continue
+                    try:
+                        x, y = float(x_raw), float(y_raw)
+                    except ValueError:
+                        continue
+                    seen += 1
+                    _reservoir_pair(xs, ys, x, y, seen, SAMPLE_CAP)
+
+        if xs and ys:
+            fig = plt.figure(figsize=(6, 5))
+            plt.scatter(xs, ys, alpha=0.6, s=12)
+            plt.title(f"{stem} - {x_col} vs {y_col} scatter(sample)")
+            plt.xlabel(x_col)
+            plt.ylabel(y_col)
+            plt.tight_layout()
+            out = out_dir / f"{stem}_{x_col}_{y_col}_scatter.png"
+            fig.savefig(out)
+            plt.close(fig)
+            artifacts.append(str(out))
+
+    return artifacts
+
+
+def create_multi_charts(csv_paths: list[Path], out_dir: Path) -> dict[str, Any]:
+    results: dict[str, Any] = {}
+    for p in csv_paths:
+        results[str(p)] = create_file_charts(p, out_dir)
+    return results
diff --git a/pyproject.toml b/pyproject.toml
index 4f7a543..ae288de 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,6 +12,7 @@ dependencies = []
 
 [project.scripts]
 bitnet-analyze = "bitnet_tools.cli:main"
+bitnet-desktop = "bitnet_tools.desktop:launch_desktop"
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
diff --git a/tests/test_analysis.py b/tests/test_analysis.py
index d9d6454..a3a4517 100644
--- a/tests/test_analysis.py
+++ b/tests/test_analysis.py
@@ -2,7 +2,10 @@
     build_analysis_payload,
     build_analysis_payload_from_csv_text,
     summarize_rows,
+    build_markdown_report,
 )
+from bitnet_tools.multi_csv import analyze_multiple_csv, build_multi_csv_markdown
+
 
 
 def test_summarize_rows_basic():
@@ -38,3 +41,88 @@ def test_build_analysis_payload_from_csv_text():
 
     assert payload["csv_path"] == "<inline_csv>"
     assert payload["summary"]["row_count"] == 2
+
+
+def test_streaming_summary_keeps_mixed_type_as_string(tmp_path):
+    p = tmp_path / "mixed.csv"
+    p.write_text("a,b\n1,10\n2,hello\n", encoding="utf-8")
+
+    payload = build_analysis_payload(p, "검증")
+
+    assert payload["summary"]["dtypes"]["b"] == "string"
+    assert "b" not in payload["summary"]["numeric_stats"]
+
+
+def test_build_markdown_report():
+    rows = [{"a": "1", "b": "10"}, {"a": "2", "b": "20"}]
+    summary = summarize_rows(rows, ["a", "b"])
+    report = build_markdown_report(summary, "테스트 질문")
+
+    assert "# BitNet CSV 분석 보고서" in report
+    assert "| a |" in report
+    assert "테스트 질문" in report
+
+
+def test_multi_csv_report_builder(tmp_path):
+    p1 = tmp_path / "a.csv"
+    p2 = tmp_path / "b.csv"
+    p1.write_text("city,v\nseoul,1\n", encoding="utf-8")
+    p2.write_text("city,v2\nseoul,2\n", encoding="utf-8")
+
+    result = analyze_multiple_csv([p1, p2], "비교")
+    report = build_multi_csv_markdown(result)
+
+    assert result["file_count"] == 2
+    assert "city" in result["shared_columns"]
+    assert "다중 CSV 분석 리포트" in report
+
+
+def test_multi_csv_schema_drift_and_group_ratio(tmp_path):
+    p1 = tmp_path / "a.csv"
+    p2 = tmp_path / "b.csv"
+    p1.write_text("city,type,val\nseoul,A,1\nseoul,B,2\n", encoding="utf-8")
+    p2.write_text("city,type,val\nseoul,A,100\nbusan,A,200\n", encoding="utf-8")
+
+    result = analyze_multiple_csv([p1, p2], "드리프트", group_column="city", target_column="type")
+
+    assert "schema_drift" in result
+    assert "val" in result["schema_drift"]
+    assert result["schema_drift"]["val"]["mean_range"] > 0
+    assert result["files"][0]["group_target_ratio"] is not None
+
+
+def test_multi_csv_large_row_count(tmp_path):
+    p = tmp_path / "big.csv"
+    lines = ["city,val,type"]
+    for i in range(5000):
+        lines.append(f"seoul,{i % 100},A")
+    p.write_text("\n".join(lines) + "\n", encoding="utf-8")
+
+    result = analyze_multiple_csv([p], "대용량")
+
+    assert result["total_row_count"] == 5000
+    assert result["files"][0]["summary"]["row_count"] == 5000
+
+
+def test_multi_csv_semantic_type_and_insights(tmp_path):
+    p = tmp_path / "typed.csv"
+    p.write_text("dt,lat,val,cat\n2024-01-01,37.5,1,A\n2024-01-02,37.6,1000,A\n", encoding="utf-8")
+
+    result = analyze_multiple_csv([p], "의미타입")
+    prof = result["files"][0]["column_profiles"]
+
+    assert prof["dt"]["semantic_type"] == "date"
+    assert prof["lat"]["semantic_type"] in {"geo_latitude", "numeric"}
+    assert isinstance(result.get("insights"), list)
+
+
+def test_multi_csv_cache_created(tmp_path, monkeypatch):
+    import bitnet_tools.multi_csv as multi
+
+    monkeypatch.setattr(multi, "CACHE_DIR", tmp_path / ".cache")
+    p = tmp_path / "cache.csv"
+    p.write_text("a,b\n1,2\n", encoding="utf-8")
+
+    result = multi.analyze_multiple_csv([p], "캐시")
+    assert result["file_count"] == 1
+    assert any((tmp_path / ".cache").glob("*.json"))
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 56d148c..3350590 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -27,3 +27,151 @@ def fake_serve(host: str, port: int):
 
     assert code == 0
     assert called == {"host": "0.0.0.0", "port": 9999}
+
+
+def test_cli_doctor_mode(monkeypatch, capsys):
+    monkeypatch.setattr(
+        cli,
+        "collect_environment",
+        lambda model=None: {"ollama_installed": True, "model_requested": model},
+    )
+
+    code = cli.main(["doctor", "--model", "bitnet:latest"])
+
+    assert code == 0
+    out = capsys.readouterr().out
+    assert '"ollama_installed": true' in out
+    assert '"model_requested": "bitnet:latest"' in out
+
+
+def test_cli_report_mode(tmp_path):
+    csv_path = tmp_path / "sample.csv"
+    out_path = tmp_path / "report.md"
+    csv_path.write_text("a,b\n1,2\n", encoding="utf-8")
+
+    code = cli.main(["report", str(csv_path), "--question", "요약", "--out", str(out_path)])
+
+    assert code == 0
+    assert out_path.exists()
+    assert "BitNet CSV 분석 보고서" in out_path.read_text(encoding="utf-8")
+
+
+def test_cli_multi_analyze_mode(tmp_path):
+    p1 = tmp_path / "a.csv"
+    p2 = tmp_path / "b.csv"
+    out_json = tmp_path / "out.json"
+    out_md = tmp_path / "out.md"
+
+    p1.write_text("city,val\nseoul,1\nbusan,2\n", encoding="utf-8")
+    p2.write_text("city,val2\nseoul,10\ndaegu,20\n", encoding="utf-8")
+
+    code = cli.main([
+        "multi-analyze",
+        str(p1),
+        str(p2),
+        "--question",
+        "다중 비교",
+        "--out-json",
+        str(out_json),
+        "--out-report",
+        str(out_md),
+    ])
+
+    assert code == 0
+    assert out_json.exists()
+    assert out_md.exists()
+    assert "다중 CSV 분석 리포트" in out_md.read_text(encoding="utf-8")
+
+
+def test_cli_multi_analyze_with_group_target(tmp_path):
+    p1 = tmp_path / "a.csv"
+    p2 = tmp_path / "b.csv"
+    out_json = tmp_path / "out2.json"
+    out_md = tmp_path / "out2.md"
+
+    p1.write_text("city,type,val\nseoul,A,1\nseoul,B,2\n", encoding="utf-8")
+    p2.write_text("city,type,val\nseoul,A,10\nbusan,A,20\n", encoding="utf-8")
+
+    code = cli.main([
+        "multi-analyze",
+        str(p1),
+        str(p2),
+        "--question",
+        "그룹비율",
+        "--group-column",
+        "city",
+        "--target-column",
+        "type",
+        "--out-json",
+        str(out_json),
+        "--out-report",
+        str(out_md),
+    ])
+
+    assert code == 0
+    body = out_json.read_text(encoding="utf-8")
+    assert "group_target_ratio" in body
+
+
+def test_cli_multi_analyze_with_charts(tmp_path, monkeypatch):
+    p1 = tmp_path / "a.csv"
+    p2 = tmp_path / "b.csv"
+    out_json = tmp_path / "out3.json"
+    out_md = tmp_path / "out3.md"
+    charts_dir = tmp_path / "charts"
+
+    p1.write_text("city,val\nseoul,1\n", encoding="utf-8")
+    p2.write_text("city,val\nbusan,2\n", encoding="utf-8")
+
+    monkeypatch.setattr(cli, "create_multi_charts", lambda paths, out: {str(paths[0]): ["chart1.png"]})
+
+    code = cli.main([
+        "multi-analyze",
+        str(p1),
+        str(p2),
+        "--question",
+        "차트",
+        "--charts-dir",
+        str(charts_dir),
+        "--out-json",
+        str(out_json),
+        "--out-report",
+        str(out_md),
+    ])
+
+    assert code == 0
+    body = out_json.read_text(encoding="utf-8")
+    assert "charts" in body
+
+
+def test_cli_multi_analyze_chart_error_fallback(tmp_path, monkeypatch):
+    p1 = tmp_path / "a.csv"
+    p2 = tmp_path / "b.csv"
+    out_json = tmp_path / "out4.json"
+    out_md = tmp_path / "out4.md"
+
+    p1.write_text("city,val\nseoul,1\n", encoding="utf-8")
+    p2.write_text("city,val\nbusan,2\n", encoding="utf-8")
+
+    def boom(paths, out):
+        raise RuntimeError("matplotlib is required for chart generation")
+
+    monkeypatch.setattr(cli, "create_multi_charts", boom)
+
+    code = cli.main([
+        "multi-analyze",
+        str(p1),
+        str(p2),
+        "--question",
+        "차트실패",
+        "--charts-dir",
+        str(tmp_path / "charts"),
+        "--out-json",
+        str(out_json),
+        "--out-report",
+        str(out_md),
+    ])
+
+    assert code == 0
+    body = out_json.read_text(encoding="utf-8")
+    assert "charts_error" in body