Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

## 0) 현재 완성도 빠른 진단

현 시점 기준 기능 완성도(실사용 관점): **약 88%**
현 시점 기준 기능 완성도(실사용 관점): **약 90%**

- 완료
- CSV 기초 요약(행/열/결측/숫자 통계)
Expand All @@ -16,7 +16,7 @@
- 브라우저 UI(`bitnet-analyze ui`)
- **윈도우 데스크톱 UI(`bitnet-analyze desktop`, `BitNet_Desktop_Start.bat`)**
- 남은 과제
- 시각화(차트) 및 리포트 내보내기
- 시각화(차트) 자동 생성
- 데이터 전처리 규칙(날짜/카테고리 자동 인식) 고도화

### 파일 붙여넣기 분석 가능 범위
Expand Down Expand Up @@ -127,6 +127,7 @@ jupyter lab
- `pythonw`로 GUI 실행(콘솔창 없이)

데스크톱 UI 내 `환경진단` 버튼으로 Ollama 설치/실행/모델 보유 여부를 즉시 확인할 수 있습니다.
또한 CSV 파일을 선택하지 않아도 CSV 텍스트를 바로 붙여넣어 분석할 수 있습니다.

---

Expand Down Expand Up @@ -210,6 +211,9 @@ bitnet-analyze desktop

# 6) 환경 진단
bitnet-analyze doctor --model bitnet:latest

# 7) 마크다운 분석 리포트 저장
bitnet-analyze report sample.csv --question "핵심 요약" --out analysis_report.md
```

---
Expand Down
26 changes: 26 additions & 0 deletions bitnet_tools/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,32 @@ def build_prompt(summary: DataSummary, question: str) -> str:
)


def build_markdown_report(summary: DataSummary, question: str) -> str:
lines = [
"# BitNet CSV 분석 보고서",
"",
f"- 질문: {question}",
f"- 행 수: {summary.row_count}",
f"- 열 수: {summary.column_count}",
"",
"## 컬럼 정보",
"",
"| 컬럼 | 타입 | 결측 수 |",
"|---|---|---:|",
]
for col in summary.columns:
lines.append(f"| {col} | {summary.dtypes.get(col, 'string')} | {summary.missing_counts.get(col, 0)} |")

if summary.numeric_stats:
lines.extend(["", "## 수치형 통계", "", "| 컬럼 | count | mean | min | max |", "|---|---:|---:|---:|---:|"])
for col, stats in summary.numeric_stats.items():
lines.append(
f"| {col} | {stats['count']:.0f} | {stats['mean']:.4f} | {stats['min']:.4f} | {stats['max']:.4f} |"
)

return "\n".join(lines)


def build_analysis_payload(csv_path: str | Path, question: str) -> dict[str, Any]:
path = Path(csv_path)
if not path.exists():
Expand Down
23 changes: 21 additions & 2 deletions bitnet_tools/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import sys
from pathlib import Path

from .analysis import build_analysis_payload
from .analysis import DataSummary, build_analysis_payload, build_markdown_report
from .doctor import collect_environment
from .web import serve

Expand Down Expand Up @@ -53,12 +53,22 @@ def _build_parser() -> argparse.ArgumentParser:
doctor_parser = subparsers.add_parser("doctor", help="Run local environment diagnostics")
doctor_parser.add_argument("--model", default=None, help="Optional model tag to check availability")

report_parser = subparsers.add_parser("report", help="Build markdown summary report from CSV")
report_parser.add_argument("csv", type=Path, help="Input CSV path")
report_parser.add_argument("--question", required=True, help="Analysis question")
report_parser.add_argument(
"--out",
type=Path,
default=Path("analysis_report.md"),
help="Where to store generated markdown report",
)

return parser


def main(argv: list[str] | None = None) -> int:
raw_args = list(sys.argv[1:] if argv is None else argv)
if raw_args and raw_args[0] not in {"analyze", "ui", "desktop", "doctor", "-h", "--help"}:
if raw_args and raw_args[0] not in {"analyze", "ui", "desktop", "doctor", "report", "-h", "--help"}:
raw_args.insert(0, "analyze")

parser = _build_parser()
Expand All @@ -79,6 +89,15 @@ def main(argv: list[str] | None = None) -> int:
print(json.dumps(report, ensure_ascii=False, indent=2))
return 0


if args.command == "report":
payload = build_analysis_payload(args.csv, args.question)
summary = DataSummary(**payload["summary"])
report = build_markdown_report(summary, args.question)
args.out.write_text(report, encoding="utf-8")
print(f"report saved: {args.out}")
return 0

if args.command == "analyze":
payload = build_analysis_payload(args.csv, args.question)
args.out.write_text(
Expand Down
24 changes: 15 additions & 9 deletions bitnet_tools/desktop.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import threading
import tkinter as tk
from pathlib import Path
from tkinter import filedialog, messagebox, ttk
from tkinter import filedialog, ttk

from .analysis import build_analysis_payload
from .analysis import build_analysis_payload, build_analysis_payload_from_csv_text
from .doctor import collect_environment


Expand Down Expand Up @@ -47,7 +47,7 @@ def _build_ui(self) -> None:

sub = ttk.Label(
frame,
text="CSV 선택 → 분석 → BitNet 실행 순서로 사용하세요.",
text="CSV 선택/붙여넣기 → 분석 → BitNet 실행 순서로 사용하세요.",
)
sub.pack(anchor="w", pady=(0, 10))

Expand All @@ -58,6 +58,11 @@ def _build_ui(self) -> None:
self.csv_label = ttk.Label(top_row, text="선택된 파일 없음")
self.csv_label.pack(side="left", padx=12)

csv_row = ttk.LabelFrame(frame, text="CSV 텍스트 (파일 미선택 시 여기에 붙여넣기)")
csv_row.pack(fill="both", pady=(0, 8))
self.csv_text = tk.Text(csv_row, height=8, wrap="none")
self.csv_text.pack(fill="both", expand=True, padx=8, pady=8)

question_row = ttk.LabelFrame(frame, text="질문")
question_row.pack(fill="x", pady=(0, 8))

Expand Down Expand Up @@ -125,6 +130,9 @@ def _open_csv(self) -> None:
return
self.csv_path = Path(path)
self.csv_label.configure(text=str(self.csv_path))
content = self.csv_path.read_text(encoding="utf-8")
self.csv_text.delete("1.0", "end")
self.csv_text.insert("1.0", content)

def _get_question(self) -> str:
question = self.question.get("1.0", "end").strip()
Expand All @@ -137,14 +145,13 @@ def _analyze(self) -> None:
self._on_ui(self._set_status, "분석 중...")
try:
question = self._get_question()
csv_text = self.csv_text.get("1.0", "end").strip()
Comment on lines 147 to +148
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Keep Tkinter widget access on the main thread

_analyze_async launches _analyze in a worker thread, but _analyze immediately reads Tk widgets via self.question.get(...) and self.csv_text.get(...); the same off-main-thread pattern is also used in _doctor/_run_model with self.model.get(...). Tkinter is not thread-safe, so these reads can intermittently raise Tcl errors or freeze/crash the desktop app when users click 분석/환경진단/BitNet 실행. Capture UI values on the main thread before starting the worker (or marshal reads via after) and keep background threads limited to non-UI work.

Useful? React with 👍 / 👎.

if self.csv_path:
payload = build_analysis_payload(self.csv_path, question)
elif csv_text:
payload = build_analysis_payload_from_csv_text(csv_text, question)
else:
self._on_ui(
messagebox.showinfo,
"파일 미선택",
"CSV를 선택하지 않아 본문 텍스트 입력을 안내합니다. 텍스트 박스에 CSV를 붙여넣으세요.",
)
self._on_ui(self._set_status, "CSV 파일을 선택하거나 CSV 텍스트를 붙여넣어 주세요")
return

self.latest_prompt = payload["prompt"]
Expand All @@ -155,7 +162,6 @@ def _analyze(self) -> None:
except Exception as exc:
self._on_ui(self._set_status, f"오류: {exc}")


def _doctor_async(self) -> None:
threading.Thread(target=self._doctor, daemon=True).start()

Expand Down
11 changes: 11 additions & 0 deletions tests/test_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
build_analysis_payload,
build_analysis_payload_from_csv_text,
summarize_rows,
build_markdown_report,
)


Expand Down Expand Up @@ -48,3 +49,13 @@ def test_streaming_summary_keeps_mixed_type_as_string(tmp_path):

assert payload["summary"]["dtypes"]["b"] == "string"
assert "b" not in payload["summary"]["numeric_stats"]


def test_build_markdown_report():
rows = [{"a": "1", "b": "10"}, {"a": "2", "b": "20"}]
summary = summarize_rows(rows, ["a", "b"])
report = build_markdown_report(summary, "테스트 질문")

assert "# BitNet CSV 분석 보고서" in report
assert "| a |" in report
assert "테스트 질문" in report
12 changes: 12 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,15 @@ def test_cli_doctor_mode(monkeypatch, capsys):
out = capsys.readouterr().out
assert '"ollama_installed": true' in out
assert '"model_requested": "bitnet:latest"' in out


def test_cli_report_mode(tmp_path):
csv_path = tmp_path / "sample.csv"
out_path = tmp_path / "report.md"
csv_path.write_text("a,b\n1,2\n", encoding="utf-8")

code = cli.main(["report", str(csv_path), "--question", "요약", "--out", str(out_path)])

assert code == 0
assert out_path.exists()
assert "BitNet CSV 분석 보고서" in out_path.read_text(encoding="utf-8")