Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

## 0) 현재 완성도 빠른 진단

현 시점 기준 기능 완성도(실사용 관점): **약 80%**
현 시점 기준 기능 완성도(실사용 관점): **약 88%**

- 완료
- CSV 기초 요약(행/열/결측/숫자 통계)
Expand All @@ -16,9 +16,8 @@
- 브라우저 UI(`bitnet-analyze ui`)
- **윈도우 데스크톱 UI(`bitnet-analyze desktop`, `BitNet_Desktop_Start.bat`)**
- 남은 과제
- 대용량 CSV(수십~수백 MB) 스트리밍 처리
- 시각화(차트) 및 리포트 내보내기
- 실행 전 환경진단(ollama 설치 여부 자동 점검)
- 데이터 전처리 규칙(날짜/카테고리 자동 인식) 고도화

### 파일 붙여넣기 분석 가능 범위

Expand Down Expand Up @@ -127,6 +126,8 @@ jupyter lab
- 패키지 설치(`pip install -e .`)
- `pythonw`로 GUI 실행(콘솔창 없이)

데스크톱 UI 내 `환경진단` 버튼으로 Ollama 설치/실행/모델 보유 여부를 즉시 확인할 수 있습니다.

---

## 4) BitNet 기본 설정값 (안정성 우선)
Expand Down Expand Up @@ -206,6 +207,9 @@ bitnet-analyze ui --host 127.0.0.1 --port 8765

# 5) 데스크톱 UI 실행
bitnet-analyze desktop

# 6) 환경 진단
bitnet-analyze doctor --model bitnet:latest
```

---
Expand Down
39 changes: 25 additions & 14 deletions bitnet_tools/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import io
import json
from pathlib import Path
from statistics import mean
from typing import Any


Expand Down Expand Up @@ -40,11 +39,20 @@ def _to_float(value: str) -> float | None:


def summarize_rows(rows: list[dict[str, str]], columns: list[str]) -> DataSummary:
return summarize_reader(rows, columns)


def summarize_reader(rows: Any, columns: list[str]) -> DataSummary:
missing_counts = {col: 0 for col in columns}
numeric_values: dict[str, list[float]] = {col: [] for col in columns}
numeric_counts: dict[str, int] = {col: 0 for col in columns}
numeric_sums: dict[str, float] = {col: 0.0 for col in columns}
numeric_mins: dict[str, float] = {}
numeric_maxs: dict[str, float] = {}
text_seen: dict[str, bool] = {col: False for col in columns}
row_count = 0

for row in rows:
row_count += 1
for col in columns:
raw = (row.get(col) or "").strip()
if raw == "":
Expand All @@ -54,25 +62,30 @@ def summarize_rows(rows: list[dict[str, str]], columns: list[str]) -> DataSummar
if num is None:
text_seen[col] = True
else:
numeric_values[col].append(num)
numeric_counts[col] += 1
numeric_sums[col] += num
if col not in numeric_mins or num < numeric_mins[col]:
numeric_mins[col] = num
if col not in numeric_maxs or num > numeric_maxs[col]:
numeric_maxs[col] = num

dtypes: dict[str, str] = {}
numeric_stats: dict[str, dict[str, float]] = {}
for col in columns:
values = numeric_values[col]
if values and not text_seen[col]:
count = numeric_counts[col]
if count > 0 and not text_seen[col]:
dtypes[col] = "float"
numeric_stats[col] = {
"count": float(len(values)),
"mean": float(mean(values)),
"min": float(min(values)),
"max": float(max(values)),
"count": float(count),
"mean": float(numeric_sums[col] / count),
"min": float(numeric_mins[col]),
"max": float(numeric_maxs[col]),
}
else:
dtypes[col] = "string"

return DataSummary(
row_count=len(rows),
row_count=row_count,
column_count=len(columns),
columns=columns,
dtypes=dtypes,
Expand Down Expand Up @@ -101,9 +114,8 @@ def build_analysis_payload(csv_path: str | Path, question: str) -> dict[str, Any
if reader.fieldnames is None:
raise ValueError("CSV header not found")
columns = [str(c) for c in reader.fieldnames]
rows = list(reader)

summary = summarize_rows(rows, columns)
summary = summarize_reader(reader, columns)

return {
"csv_path": str(path),
Expand All @@ -119,8 +131,7 @@ def build_analysis_payload_from_csv_text(csv_text: str, question: str) -> dict[s
raise ValueError("CSV header not found")

columns = [str(c) for c in reader.fieldnames]
rows = list(reader)
summary = summarize_rows(rows, columns)
summary = summarize_reader(reader, columns)

return {
"csv_path": "<inline_csv>",
Expand Down
11 changes: 10 additions & 1 deletion bitnet_tools/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from pathlib import Path

from .analysis import build_analysis_payload
from .doctor import collect_environment
from .web import serve


Expand Down Expand Up @@ -49,12 +50,15 @@ def _build_parser() -> argparse.ArgumentParser:

subparsers.add_parser("desktop", help="Run Windows desktop UI")

doctor_parser = subparsers.add_parser("doctor", help="Run local environment diagnostics")
doctor_parser.add_argument("--model", default=None, help="Optional model tag to check availability")

return parser


def main(argv: list[str] | None = None) -> int:
raw_args = list(sys.argv[1:] if argv is None else argv)
if raw_args and raw_args[0] not in {"analyze", "ui", "desktop", "-h", "--help"}:
if raw_args and raw_args[0] not in {"analyze", "ui", "desktop", "doctor", "-h", "--help"}:
raw_args.insert(0, "analyze")

parser = _build_parser()
Expand All @@ -70,6 +74,11 @@ def main(argv: list[str] | None = None) -> int:
launch_desktop()
return 0

if args.command == "doctor":
report = collect_environment(model=args.model)
print(json.dumps(report, ensure_ascii=False, indent=2))
return 0

if args.command == "analyze":
payload = build_analysis_payload(args.csv, args.question)
args.out.write_text(
Expand Down
17 changes: 16 additions & 1 deletion bitnet_tools/desktop.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from tkinter import filedialog, messagebox, ttk

from .analysis import build_analysis_payload
from .doctor import collect_environment


def run_ollama(model: str, prompt: str) -> str:
Expand Down Expand Up @@ -84,7 +85,8 @@ def _build_ui(self) -> None:
self.model.insert(0, "bitnet:latest")
self.model.pack(side="left", fill="x", expand=True, padx=8)

ttk.Button(model_row, text="1) 분석", command=self._analyze_async).pack(side="left", padx=(8, 4))
ttk.Button(model_row, text="환경진단", command=self._doctor_async).pack(side="left", padx=(8, 4))
ttk.Button(model_row, text="1) 분석", command=self._analyze_async).pack(side="left", padx=(0, 4))
ttk.Button(model_row, text="2) BitNet 실행", command=self._run_model_async).pack(side="left")

self.status = ttk.Label(frame, text="대기 중")
Expand Down Expand Up @@ -153,6 +155,19 @@ def _analyze(self) -> None:
except Exception as exc:
self._on_ui(self._set_status, f"오류: {exc}")


def _doctor_async(self) -> None:
threading.Thread(target=self._doctor, daemon=True).start()

def _doctor(self) -> None:
self._on_ui(self._set_status, "환경 진단 중...")
report = collect_environment(model=self.model.get().strip() or None)
self._on_ui(self._set_text, self.answer, json.dumps(report, ensure_ascii=False, indent=2))
if report.get("ollama_installed") and report.get("ollama_running"):
self._on_ui(self._set_status, "환경 진단 완료 (정상)")
else:
self._on_ui(self._set_status, "환경 진단 완료 (확인 필요)")

def _run_model_async(self) -> None:
threading.Thread(target=self._run_model, daemon=True).start()

Expand Down
51 changes: 51 additions & 0 deletions bitnet_tools/doctor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from __future__ import annotations

import platform
import shutil
import subprocess
import sys
from typing import Any


def _run(cmd: list[str]) -> tuple[int, str, str]:
proc = subprocess.run(cmd, capture_output=True, text=True, check=False)
return proc.returncode, proc.stdout.strip(), proc.stderr.strip()


def collect_environment(model: str | None = None) -> dict[str, Any]:
info: dict[str, Any] = {
"python": sys.version.split()[0],
"platform": platform.platform(),
"ollama_installed": False,
"ollama_running": False,
}

ollama_path = shutil.which("ollama")
if not ollama_path:
info["diagnosis"] = "ollama not found in PATH"
return info

info["ollama_installed"] = True
info["ollama_path"] = ollama_path

code, out, err = _run(["ollama", "--version"])
if code == 0:
info["ollama_version"] = out
else:
info["ollama_version_error"] = err or out or "unknown error"

code, out, err = _run(["ollama", "list"])
if code == 0:
info["ollama_running"] = True
models = []
lines = [line for line in out.splitlines() if line.strip()]
for line in lines[1:]:
models.append(line.split()[0])
info["models"] = models
if model:
info["model_requested"] = model
info["model_available"] = any(m.startswith(model) for m in models)
else:
info["ollama_list_error"] = err or out or "failed to query ollama"

return info
10 changes: 10 additions & 0 deletions tests/test_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,13 @@ def test_build_analysis_payload_from_csv_text():

assert payload["csv_path"] == "<inline_csv>"
assert payload["summary"]["row_count"] == 2


def test_streaming_summary_keeps_mixed_type_as_string(tmp_path):
p = tmp_path / "mixed.csv"
p.write_text("a,b\n1,10\n2,hello\n", encoding="utf-8")

payload = build_analysis_payload(p, "검증")

assert payload["summary"]["dtypes"]["b"] == "string"
assert "b" not in payload["summary"]["numeric_stats"]
15 changes: 15 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,18 @@ def fake_serve(host: str, port: int):

assert code == 0
assert called == {"host": "0.0.0.0", "port": 9999}


def test_cli_doctor_mode(monkeypatch, capsys):
monkeypatch.setattr(
cli,
"collect_environment",
lambda model=None: {"ollama_installed": True, "model_requested": model},
)

code = cli.main(["doctor", "--model", "bitnet:latest"])

assert code == 0
out = capsys.readouterr().out
assert '"ollama_installed": true' in out
assert '"model_requested": "bitnet:latest"' in out