diff --git a/README.md b/README.md index f9d578f..2905b74 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ ## 0) 현재 완성도 빠른 진단 -현 시점 기준 기능 완성도(실사용 관점): **약 80%** +현 시점 기준 기능 완성도(실사용 관점): **약 88%** - 완료 - CSV 기초 요약(행/열/결측/숫자 통계) @@ -16,9 +16,8 @@ - 브라우저 UI(`bitnet-analyze ui`) - **윈도우 데스크톱 UI(`bitnet-analyze desktop`, `BitNet_Desktop_Start.bat`)** - 남은 과제 - - 대용량 CSV(수십~수백 MB) 스트리밍 처리 - 시각화(차트) 및 리포트 내보내기 - - 실행 전 환경진단(ollama 설치 여부 자동 점검) + - 데이터 전처리 규칙(날짜/카테고리 자동 인식) 고도화 ### 파일 붙여넣기 분석 가능 범위 @@ -127,6 +126,8 @@ jupyter lab - 패키지 설치(`pip install -e .`) - `pythonw`로 GUI 실행(콘솔창 없이) +데스크톱 UI 내 `환경진단` 버튼으로 Ollama 설치/실행/모델 보유 여부를 즉시 확인할 수 있습니다. + --- ## 4) BitNet 기본 설정값 (안정성 우선) @@ -206,6 +207,9 @@ bitnet-analyze ui --host 127.0.0.1 --port 8765 # 5) 데스크톱 UI 실행 bitnet-analyze desktop + +# 6) 환경 진단 +bitnet-analyze doctor --model bitnet:latest ``` --- diff --git a/bitnet_tools/analysis.py b/bitnet_tools/analysis.py index 5a615ab..0474ebe 100644 --- a/bitnet_tools/analysis.py +++ b/bitnet_tools/analysis.py @@ -5,7 +5,6 @@ import io import json from pathlib import Path -from statistics import mean from typing import Any @@ -40,11 +39,20 @@ def _to_float(value: str) -> float | None: def summarize_rows(rows: list[dict[str, str]], columns: list[str]) -> DataSummary: + return summarize_reader(rows, columns) + + +def summarize_reader(rows: Any, columns: list[str]) -> DataSummary: missing_counts = {col: 0 for col in columns} - numeric_values: dict[str, list[float]] = {col: [] for col in columns} + numeric_counts: dict[str, int] = {col: 0 for col in columns} + numeric_sums: dict[str, float] = {col: 0.0 for col in columns} + numeric_mins: dict[str, float] = {} + numeric_maxs: dict[str, float] = {} text_seen: dict[str, bool] = {col: False for col in columns} + row_count = 0 for row in rows: + row_count += 1 for col in columns: raw = (row.get(col) or "").strip() if raw == "": @@ -54,25 +62,30 @@ def summarize_rows(rows: list[dict[str, str]], columns: list[str]) -> DataSummar if num is None: text_seen[col] = True else: - numeric_values[col].append(num) + numeric_counts[col] += 1 + numeric_sums[col] += num + if col not in numeric_mins or num < numeric_mins[col]: + numeric_mins[col] = num + if col not in numeric_maxs or num > numeric_maxs[col]: + numeric_maxs[col] = num dtypes: dict[str, str] = {} numeric_stats: dict[str, dict[str, float]] = {} for col in columns: - values = numeric_values[col] - if values and not text_seen[col]: + count = numeric_counts[col] + if count > 0 and not text_seen[col]: dtypes[col] = "float" numeric_stats[col] = { - "count": float(len(values)), - "mean": float(mean(values)), - "min": float(min(values)), - "max": float(max(values)), + "count": float(count), + "mean": float(numeric_sums[col] / count), + "min": float(numeric_mins[col]), + "max": float(numeric_maxs[col]), } else: dtypes[col] = "string" return DataSummary( - row_count=len(rows), + row_count=row_count, column_count=len(columns), columns=columns, dtypes=dtypes, @@ -101,9 +114,8 @@ def build_analysis_payload(csv_path: str | Path, question: str) -> dict[str, Any if reader.fieldnames is None: raise ValueError("CSV header not found") columns = [str(c) for c in reader.fieldnames] - rows = list(reader) - summary = summarize_rows(rows, columns) + summary = summarize_reader(reader, columns) return { "csv_path": str(path), @@ -119,8 +131,7 @@ def build_analysis_payload_from_csv_text(csv_text: str, question: str) -> dict[s raise ValueError("CSV header not found") columns = [str(c) for c in reader.fieldnames] - rows = list(reader) - summary = summarize_rows(rows, columns) + summary = summarize_reader(reader, columns) return { "csv_path": "", diff --git a/bitnet_tools/cli.py b/bitnet_tools/cli.py index a762c2d..972aa41 100644 --- a/bitnet_tools/cli.py +++ b/bitnet_tools/cli.py @@ -7,6 +7,7 @@ from pathlib import Path from .analysis import build_analysis_payload +from .doctor import collect_environment from .web import serve @@ -49,12 +50,15 @@ def _build_parser() -> argparse.ArgumentParser: subparsers.add_parser("desktop", help="Run Windows desktop UI") + doctor_parser = subparsers.add_parser("doctor", help="Run local environment diagnostics") + doctor_parser.add_argument("--model", default=None, help="Optional model tag to check availability") + return parser def main(argv: list[str] | None = None) -> int: raw_args = list(sys.argv[1:] if argv is None else argv) - if raw_args and raw_args[0] not in {"analyze", "ui", "desktop", "-h", "--help"}: + if raw_args and raw_args[0] not in {"analyze", "ui", "desktop", "doctor", "-h", "--help"}: raw_args.insert(0, "analyze") parser = _build_parser() @@ -70,6 +74,11 @@ def main(argv: list[str] | None = None) -> int: launch_desktop() return 0 + if args.command == "doctor": + report = collect_environment(model=args.model) + print(json.dumps(report, ensure_ascii=False, indent=2)) + return 0 + if args.command == "analyze": payload = build_analysis_payload(args.csv, args.question) args.out.write_text( diff --git a/bitnet_tools/desktop.py b/bitnet_tools/desktop.py index e0a655e..2cc0e8b 100644 --- a/bitnet_tools/desktop.py +++ b/bitnet_tools/desktop.py @@ -8,6 +8,7 @@ from tkinter import filedialog, messagebox, ttk from .analysis import build_analysis_payload +from .doctor import collect_environment def run_ollama(model: str, prompt: str) -> str: @@ -84,7 +85,8 @@ def _build_ui(self) -> None: self.model.insert(0, "bitnet:latest") self.model.pack(side="left", fill="x", expand=True, padx=8) - ttk.Button(model_row, text="1) 분석", command=self._analyze_async).pack(side="left", padx=(8, 4)) + ttk.Button(model_row, text="환경진단", command=self._doctor_async).pack(side="left", padx=(8, 4)) + ttk.Button(model_row, text="1) 분석", command=self._analyze_async).pack(side="left", padx=(0, 4)) ttk.Button(model_row, text="2) BitNet 실행", command=self._run_model_async).pack(side="left") self.status = ttk.Label(frame, text="대기 중") @@ -153,6 +155,19 @@ def _analyze(self) -> None: except Exception as exc: self._on_ui(self._set_status, f"오류: {exc}") + + def _doctor_async(self) -> None: + threading.Thread(target=self._doctor, daemon=True).start() + + def _doctor(self) -> None: + self._on_ui(self._set_status, "환경 진단 중...") + report = collect_environment(model=self.model.get().strip() or None) + self._on_ui(self._set_text, self.answer, json.dumps(report, ensure_ascii=False, indent=2)) + if report.get("ollama_installed") and report.get("ollama_running"): + self._on_ui(self._set_status, "환경 진단 완료 (정상)") + else: + self._on_ui(self._set_status, "환경 진단 완료 (확인 필요)") + def _run_model_async(self) -> None: threading.Thread(target=self._run_model, daemon=True).start() diff --git a/bitnet_tools/doctor.py b/bitnet_tools/doctor.py new file mode 100644 index 0000000..2758eea --- /dev/null +++ b/bitnet_tools/doctor.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +import platform +import shutil +import subprocess +import sys +from typing import Any + + +def _run(cmd: list[str]) -> tuple[int, str, str]: + proc = subprocess.run(cmd, capture_output=True, text=True, check=False) + return proc.returncode, proc.stdout.strip(), proc.stderr.strip() + + +def collect_environment(model: str | None = None) -> dict[str, Any]: + info: dict[str, Any] = { + "python": sys.version.split()[0], + "platform": platform.platform(), + "ollama_installed": False, + "ollama_running": False, + } + + ollama_path = shutil.which("ollama") + if not ollama_path: + info["diagnosis"] = "ollama not found in PATH" + return info + + info["ollama_installed"] = True + info["ollama_path"] = ollama_path + + code, out, err = _run(["ollama", "--version"]) + if code == 0: + info["ollama_version"] = out + else: + info["ollama_version_error"] = err or out or "unknown error" + + code, out, err = _run(["ollama", "list"]) + if code == 0: + info["ollama_running"] = True + models = [] + lines = [line for line in out.splitlines() if line.strip()] + for line in lines[1:]: + models.append(line.split()[0]) + info["models"] = models + if model: + info["model_requested"] = model + info["model_available"] = any(m.startswith(model) for m in models) + else: + info["ollama_list_error"] = err or out or "failed to query ollama" + + return info diff --git a/tests/test_analysis.py b/tests/test_analysis.py index d9d6454..e50c735 100644 --- a/tests/test_analysis.py +++ b/tests/test_analysis.py @@ -38,3 +38,13 @@ def test_build_analysis_payload_from_csv_text(): assert payload["csv_path"] == "" assert payload["summary"]["row_count"] == 2 + + +def test_streaming_summary_keeps_mixed_type_as_string(tmp_path): + p = tmp_path / "mixed.csv" + p.write_text("a,b\n1,10\n2,hello\n", encoding="utf-8") + + payload = build_analysis_payload(p, "검증") + + assert payload["summary"]["dtypes"]["b"] == "string" + assert "b" not in payload["summary"]["numeric_stats"] diff --git a/tests/test_cli.py b/tests/test_cli.py index 56d148c..7140864 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -27,3 +27,18 @@ def fake_serve(host: str, port: int): assert code == 0 assert called == {"host": "0.0.0.0", "port": 9999} + + +def test_cli_doctor_mode(monkeypatch, capsys): + monkeypatch.setattr( + cli, + "collect_environment", + lambda model=None: {"ollama_installed": True, "model_requested": model}, + ) + + code = cli.main(["doctor", "--model", "bitnet:latest"]) + + assert code == 0 + out = capsys.readouterr().out + assert '"ollama_installed": true' in out + assert '"model_requested": "bitnet:latest"' in out