Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@

## 0) 현재 완성도 빠른 진단

현 시점 기준 기능 완성도(실사용 관점): **약 94%**
현 시점 기준 기능 완성도(실사용 관점): **약 95%**

- 완료
- CSV 기초 요약(행/열/결측/숫자 통계)
- BitNet용 프롬프트 자동 생성
- 단일 CSV + 다중 CSV CLI 분석(`report`, `multi-analyze`)
- 컬럼별 결측/고유/상위값 비율 산출
- 다중 CSV 분석용 코드 가이드(판다스 예시 코드 자동 생성)
- 다중 CSV 자동 시각화 차트 생성(histogram/boxplot/top bar, matplotlib 설치 시)
- 브라우저 UI(`bitnet-analyze ui`)
- **윈도우 데스크톱 UI(`bitnet-analyze desktop`, `BitNet_Desktop_Start.bat`)**
- 남은 과제
Expand Down Expand Up @@ -219,7 +220,7 @@ bitnet-analyze doctor --model bitnet:latest
bitnet-analyze report sample.csv --question "핵심 요약" --out analysis_report.md

# 8) 다중 CSV 통합 분석(JSON+MD+코드가이드)
bitnet-analyze multi-analyze a.csv b.csv c.csv --question "컬럼별 비율과 지역별 차이 분석" --group-column 시도명 --target-column 세차유형 --out-json multi.json --out-report multi.md
bitnet-analyze multi-analyze a.csv b.csv c.csv --question "컬럼별 비율과 지역별 차이 분석" --group-column 시도명 --target-column 세차유형 --charts-dir charts --out-json multi.json --out-report multi.md
```

---
Expand Down
13 changes: 13 additions & 0 deletions bitnet_tools/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .analysis import DataSummary, build_analysis_payload, build_markdown_report
from .doctor import collect_environment
from .multi_csv import analyze_multiple_csv, build_multi_csv_markdown, result_to_json
from .visualize import create_multi_charts
from .web import serve


Expand Down Expand Up @@ -72,6 +73,12 @@ def _build_parser() -> argparse.ArgumentParser:
default=Path("multi_analysis_report.md"),
help="Where to store multi CSV markdown report",
)
multi_parser.add_argument(
"--charts-dir",
type=Path,
default=None,
help="Optional directory to save visualization charts",
)

report_parser = subparsers.add_parser("report", help="Build markdown summary report from CSV")
report_parser.add_argument("csv", type=Path, help="Input CSV path")
Expand Down Expand Up @@ -118,6 +125,12 @@ def main(argv: list[str] | None = None) -> int:
group_column=args.group_column,
target_column=args.target_column,
)
if args.charts_dir is not None:
try:
result["charts"] = create_multi_charts(args.csv, args.charts_dir)
except RuntimeError as exc:
result["charts_error"] = str(exc)

args.out_json.write_text(result_to_json(result), encoding="utf-8")
args.out_report.write_text(build_multi_csv_markdown(result), encoding="utf-8")
print(f"multi analysis json saved: {args.out_json}")
Expand Down
8 changes: 8 additions & 0 deletions bitnet_tools/multi_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,14 @@ def build_multi_csv_markdown(result: dict[str, Any]) -> str:
f"| {col} | {drift['dtype_changed']} | {drift['missing_ratio_range']:.4f} | {drift['dominant_value_ratio_range']:.4f} | {drift['mean_range']:.4f} |"
)

charts = result.get("charts")
if charts:
lines.extend(["", "## 생성된 차트 파일", ""])
for file_path, chart_paths in charts.items():
lines.append(f"- {file_path}")
for c in chart_paths:
lines.append(f" - {c}")

lines.extend(
[
"",
Expand Down
121 changes: 121 additions & 0 deletions bitnet_tools/visualize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
from __future__ import annotations

import csv
from pathlib import Path
from typing import Any


def _is_numeric_column(rows: list[dict[str, str]], col: str) -> bool:
seen = 0
for row in rows:
raw = (row.get(col) or "").strip()
if not raw:
continue
seen += 1
try:
float(raw)
except ValueError:
return False
return seen > 0


def _safe_stem(path: Path) -> str:
return path.stem.replace(" ", "_")


def _ensure_matplotlib():
try:
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
return plt
except Exception as exc:
raise RuntimeError("matplotlib is required for chart generation") from exc


def create_file_charts(
csv_path: Path,
out_dir: Path,
max_numeric: int = 3,
max_categorical: int = 2,
) -> list[str]:
plt = _ensure_matplotlib()

out_dir.mkdir(parents=True, exist_ok=True)
with csv_path.open("r", encoding="utf-8-sig", newline="") as f:
reader = csv.DictReader(f)
if reader.fieldnames is None:
return []
columns = [str(c) for c in reader.fieldnames]
rows = list(reader)

numeric_cols = [c for c in columns if _is_numeric_column(rows, c)][:max_numeric]
categorical_cols = [c for c in columns if c not in numeric_cols][:max_categorical]

artifacts: list[str] = []
stem = _safe_stem(csv_path)

for col in numeric_cols:
values = []
for row in rows:
raw = (row.get(col) or "").strip()
if raw:
values.append(float(raw))
if not values:
continue

fig = plt.figure(figsize=(7, 4))
plt.hist(values, bins=20)
plt.title(f"{stem} - {col} histogram")
plt.xlabel(col)
plt.ylabel("count")
plt.tight_layout()
out = out_dir / f"{stem}_{col}_hist.png"
fig.savefig(out)
plt.close(fig)
artifacts.append(str(out))

fig = plt.figure(figsize=(5, 4))
plt.boxplot(values, vert=True)
plt.title(f"{stem} - {col} boxplot")
plt.ylabel(col)
plt.tight_layout()
out = out_dir / f"{stem}_{col}_box.png"
fig.savefig(out)
plt.close(fig)
artifacts.append(str(out))

for col in categorical_cols:
counter: dict[str, int] = {}
for row in rows:
raw = (row.get(col) or "").strip()
if not raw:
continue
counter[raw] = counter.get(raw, 0) + 1
items = sorted(counter.items(), key=lambda x: x[1], reverse=True)[:10]
if not items:
continue

labels = [x[0] for x in items]
counts = [x[1] for x in items]
fig = plt.figure(figsize=(8, 4))
plt.bar(range(len(labels)), counts)
plt.xticks(range(len(labels)), labels, rotation=30, ha="right")
plt.title(f"{stem} - {col} top values")
plt.tight_layout()
out = out_dir / f"{stem}_{col}_top.png"
fig.savefig(out)
plt.close(fig)
artifacts.append(str(out))

return artifacts


def create_multi_charts(
csv_paths: list[Path],
out_dir: Path,
) -> dict[str, Any]:
results: dict[str, Any] = {}
for p in csv_paths:
results[str(p)] = create_file_charts(p, out_dir)
return results
64 changes: 64 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,67 @@ def test_cli_multi_analyze_with_group_target(tmp_path):
assert code == 0
body = out_json.read_text(encoding="utf-8")
assert "group_target_ratio" in body


def test_cli_multi_analyze_with_charts(tmp_path, monkeypatch):
p1 = tmp_path / "a.csv"
p2 = tmp_path / "b.csv"
out_json = tmp_path / "out3.json"
out_md = tmp_path / "out3.md"
charts_dir = tmp_path / "charts"

p1.write_text("city,val\nseoul,1\n", encoding="utf-8")
p2.write_text("city,val\nbusan,2\n", encoding="utf-8")

monkeypatch.setattr(cli, "create_multi_charts", lambda paths, out: {str(paths[0]): ["chart1.png"]})

code = cli.main([
"multi-analyze",
str(p1),
str(p2),
"--question",
"차트",
"--charts-dir",
str(charts_dir),
"--out-json",
str(out_json),
"--out-report",
str(out_md),
])

assert code == 0
body = out_json.read_text(encoding="utf-8")
assert "charts" in body


def test_cli_multi_analyze_chart_error_fallback(tmp_path, monkeypatch):
p1 = tmp_path / "a.csv"
p2 = tmp_path / "b.csv"
out_json = tmp_path / "out4.json"
out_md = tmp_path / "out4.md"

p1.write_text("city,val\nseoul,1\n", encoding="utf-8")
p2.write_text("city,val\nbusan,2\n", encoding="utf-8")

def boom(paths, out):
raise RuntimeError("matplotlib is required for chart generation")

monkeypatch.setattr(cli, "create_multi_charts", boom)

code = cli.main([
"multi-analyze",
str(p1),
str(p2),
"--question",
"차트실패",
"--charts-dir",
str(tmp_path / "charts"),
"--out-json",
str(out_json),
"--out-report",
str(out_md),
])

assert code == 0
body = out_json.read_text(encoding="utf-8")
assert "charts_error" in body