rad1092 · rad1092 · Feb 15, 2026 · Feb 15, 2026
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,5 @@ __pycache__/
 # Test/runtime caches
 .pytest_cache/
 .bitnet_cache/
+
+.offline_bundle/
diff --git a/BASELINE_PLAN.md b/BASELINE_PLAN.md
@@ -0,0 +1,20 @@
+# Baseline plan (0단계)
+
+이 문서는 개선 작업 전/후 비교를 위한 기준선 고정 절차를 정의한다.
+
+## 고정 기준
+- 테스트 전체 통과 여부 (`pytest -q`)
+- 대표 입력 CSV 3종 결과 일관성
+  - `tests/fixtures/small_numeric.csv`
+  - `tests/fixtures/mixed_formats.csv`
+  - `tests/fixtures/missing_heavy.csv`
+- 핵심 요약 결과 스냅샷
+  - row_count, column_count
+  - dtypes
+  - missing_counts
+  - numeric_stats
+
+## 운영 방법
+1. 개선 전 baseline 테스트를 실행해 현재 결과를 확인
+2. 개선 작업 후 동일 테스트를 재실행
+3. 의도하지 않은 필드 변경이 있으면 원인 분석 후 수정
diff --git a/ONLINE_EXECUTION_LOG.md b/ONLINE_EXECUTION_LOG.md
@@ -0,0 +1,25 @@
+# 인터넷 1턴 실행 로그 및 후속 가이드
+
+## 이번 턴 수행 내용
+- `scripts/prepare_online_bundle.sh` 추가
+- 온라인 가능 시 다음을 자동 수행하도록 구성
+  - 환경 메타데이터 수집
+  - 로컬 wheel 빌드 시도
+  - 선택 의존성 wheel 다운로드 시도
+  - Ollama 설치 스크립트 보관 시도
+  - 오프라인 사용 가이드 생성
+
+## 이번 환경에서의 결과
+- 프록시 제한(403)으로 외부 다운로드 실패
+- Ollama 설치 스크립트도 403으로 실패
+- 따라서 다운로드 단계는 경고 파일로 남기고, 스크립트는 종료하지 않도록 설계
+
+## 다음 네트워크 허용 환경에서 기대 결과
+- `.offline_bundle/wheels`에 프로젝트 및 선택 의존성 wheel 저장
+- `.offline_bundle/models/ollama_install.sh` 보관
+- `.offline_bundle/OFFLINE_USE.md` 기반으로 오프라인 설치 가능
+
+## 실행 명령
+```bash
+./scripts/prepare_online_bundle.sh
+```
diff --git a/REVIEW_REPORT.md b/REVIEW_REPORT.md
@@ -0,0 +1,83 @@
+# 실행 가능률 및 분석 기능 점검 보고서
+
+## 1) 현재 실행 가능률(환경 기준)
+
+- 테스트 실행 결과: `24 passed` (핵심 분석/CLI/웹 핸들러 단위 기능 정상)
+- 로컬 환경 진단 결과: Python/플랫폼 확인 가능, `ollama` 미설치로 모델 실행 경로는 현재 비활성
+
+### 실행 가능률 산정(현 환경)
+- 코드 자체 품질(테스트 통과율): **100% (24/24)**
+- LLM 연동 포함 엔드투엔드 실사용률: **약 85~90%**
+  - 사유: `analyze`, `multi-analyze`, `report`, `ui`, `doctor`는 동작 가능
+  - 단, `ollama run`이 필요한 즉시 모델 응답(`/api/run`, `--model`)은 로컬 ollama 설치/기동 필요
+
+## 2) 제공 분석 기능
+
+### 단일 CSV 분석
+- 행/열 수, 컬럼 목록, 결측 수
+- 컬럼 타입 추론(숫자/문자)
+- 숫자형 기본 통계(count/mean/min/max)
+- BitNet 프롬프트 자동 생성
+- Markdown 보고서 생성
+
+### 다중 CSV 분석
+- 파일별 프로파일링(결측/고유비율/대표값/의미타입)
+- 공통 컬럼/전체 컬럼 집합 비교
+- 파일 간 스키마 드리프트(타입 변화, 결측비율 범위, 대표값비율 범위, 평균 변화)
+- 인사이트 룰 엔진(결측 높음, 이상치 비율 높음, 평균 변화 등)
+- 그룹-타깃 비율표(옵션)
+- pandas 코드 가이드 자동 생성
+- 캐시 기반 재분석 가속(`.bitnet_cache`)
+- 병렬 파일 프로파일링(workers)
+
+### 시각화
+- 수치형: histogram, boxplot, missing bar
+- 범주형: top-k bar
+- 수치형 2개 이상 시 scatter 샘플
+- 대용량 대응을 위한 reservoir sampling 적용
+
+### 웹/데스크톱
+- 브라우저 UI: CSV 텍스트 붙여넣기 단일/다중 분석
+- 차트 생성 비동기 작업(job) 제출/조회 API
+- Windows 데스크톱 UI 진입점 제공
+
+## 3) 받아본 데이터로 도출 가능한 결과
+
+- 데이터 품질 진단: 결측/편중/희소성/대표값 쏠림
+- 수치 분포 요약: 평균/최솟값/최댓값/양수·0·음수 비율
+- 이상치 위험도: IQR 기반 outlier ratio 추정
+- 컬럼 성격 파악: category/date/numeric/text/위경도 추정
+- 다중 파일 비교: 스키마 호환성 및 분포 변화(드리프트)
+- 운영 인사이트: 품질 이슈 우선순위(결측↑, 이상치↑, 타입 충돌)
+- 후속 분석 가이드: 병합 키 중심 pandas 예시 코드 자동 제안
+
+## 4) 분석 가능한 데이터 범위
+
+### 직접 지원
+- CSV 파일(단일/다중)
+- 웹 UI 입력용 CSV 텍스트(붙여넣기)
+
+### 확장/간접 지원
+- 생성된 JSON/Markdown 결과를 BitNet 프롬프트로 전달해 해석형 요약 가능
+- 코드 가이드를 활용한 pandas 후처리 확장 가능
+
+### 주의사항
+- CSV 헤더 필수(헤더 없으면 오류)
+- 숫자형은 컬럼 내 텍스트 혼입 시 string으로 판정될 수 있음
+- 고유값은 비트맵 기반 추정치(정확 cardinality 아님)
+- outlier ratio는 샘플 기반 추정
+
+## 5) 코드 전체 검토 요약
+
+### 강점
+- 분석 엔진이 스트리밍/샘플링 중심으로 메모리 사용을 제어
+- CLI, 웹 API, 데스크톱 진입점이 분리되어 사용성 좋음
+- 캐시/병렬 처리 등 실사용 성능 요소 반영
+- 테스트 커버리지(기능 단위) 양호
+
+### 개선 권장
+- 숫자형 추론 고도화(천 단위 구분기호, 퍼센트, 통화 기호 정규화)
+- 날짜 파싱 포맷 확장 및 locale 대응
+- unique 추정 정확도 옵션(HLL 등) 추가
+- UI/API 레벨에서 대용량 파일 업로드/진행률/취소 제어 강화
+- 결과 스키마 버전 필드 추가(하위호환 관리)
diff --git a/bitnet_tools/analysis.py b/bitnet_tools/analysis.py
@@ -32,11 +32,32 @@ def _to_float(value: str) -> float | None:
     v = value.strip()
     if not v:
         return None
+
+    negative_by_parentheses = v.startswith("(") and v.endswith(")")
+    if negative_by_parentheses:
+        v = v[1:-1].strip()
+
+    # normalize frequent human-entered numeric formats
+    v = (
+        v.replace(",", "")
+        .replace("₩", "")
+        .replace("$", "")
+        .replace("€", "")
+        .replace("£", "")
+        .replace("%", "")
+        .strip()
+    )
+
+    if not v:
+        return None
+
     try:
-        return float(v)
+        parsed = float(v)
     except ValueError:
         return None
 
+    return -parsed if negative_by_parentheses else parsed
+
 
 def summarize_rows(rows: list[dict[str, str]], columns: list[str]) -> DataSummary:
     return summarize_reader(rows, columns)

diff --git a/scripts/prepare_online_bundle.sh b/scripts/prepare_online_bundle.sh
@@ -0,0 +1,77 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+BUNDLE_DIR="${ROOT_DIR}/.offline_bundle"
+WHEEL_DIR="${BUNDLE_DIR}/wheels"
+MODEL_DIR="${BUNDLE_DIR}/models"
+META_DIR="${BUNDLE_DIR}/meta"
+
+mkdir -p "${WHEEL_DIR}" "${MODEL_DIR}" "${META_DIR}"
+
+echo "[1/6] Collecting environment metadata"
+python -V | tee "${META_DIR}/python_version.txt"
+pip --version | tee "${META_DIR}/pip_version.txt"
+python -m pip freeze | tee "${META_DIR}/pip_freeze.txt" >/dev/null
+
+cat > "${META_DIR}/bundle_manifest.txt" <<MANIFEST
+bundle_created_at=$(date -Iseconds)
+python=$(python -V 2>&1)
+pip=$(pip --version)
+MANIFEST
+
+echo "[2/6] Building local project wheel"
+if python -m pip wheel --no-build-isolation "${ROOT_DIR}" -w "${WHEEL_DIR}"; then
+  echo "local wheel build: success"
+else
+  echo "local wheel build failed" | tee "${META_DIR}/wheel_build_warning.txt"
+fi
+
+# Optional runtime dependencies for charts/notebooks/tests
+cat > "${META_DIR}/requirements_online.txt" <<REQ
+matplotlib
+pandas
+jupyterlab
+pytest
+REQ
+
+echo "[3/6] Attempting to download optional dependency wheels"
+if python -m pip download -r "${META_DIR}/requirements_online.txt" -d "${WHEEL_DIR}"; then
+  echo "optional wheel download: success"
+else
+  echo "optional wheel download: failed (network/proxy 제한 가능)" | tee "${META_DIR}/download_warning.txt"
+fi
+
+echo "[4/6] Attempting to fetch Ollama install script for offline archive"
+if curl -fsSL https://ollama.com/install.sh -o "${MODEL_DIR}/ollama_install.sh"; then
+  echo "ollama installer script archived"
+else
+  echo "ollama installer download failed (network/proxy 제한 가능)" | tee -a "${META_DIR}/download_warning.txt"
+fi
+
+echo "[5/6] Attempting to detect local ollama"
+if command -v ollama >/dev/null 2>&1; then
+  ollama --version | tee "${META_DIR}/ollama_version.txt"
+  # Avoid model pull in automated script unless explicitly requested
+  echo "ollama detected; model pull can be run manually:" | tee -a "${META_DIR}/ollama_version.txt"
+  echo "  ollama pull <bitnet-model-tag>" | tee -a "${META_DIR}/ollama_version.txt"
+else
+  echo "ollama not installed in current environment" | tee "${META_DIR}/ollama_version.txt"
+fi
+
+echo "[6/6] Writing offline install guide"
+cat > "${BUNDLE_DIR}/OFFLINE_USE.md" <<GUIDE
+# Offline bundle usage
+
+## Install project from local wheel
+python -m pip install --no-index --find-links ./wheels bitnet-tools
+
+## Optional dependencies (if downloaded)
+python -m pip install --no-index --find-links ./wheels matplotlib pandas jupyterlab pytest
+
+## Notes
+- If optional wheel download failed, rerun this script in a network-allowed environment.
+- If Ollama installer script exists in ./models/ollama_install.sh, execute it on a host with required permissions.
+GUIDE
+
+echo "done: ${BUNDLE_DIR}"
diff --git a/tests/fixtures/missing_heavy.csv b/tests/fixtures/missing_heavy.csv
@@ -0,0 +1,4 @@
+a,b,c
+1,,x
+,2,
+,,z
diff --git a/tests/fixtures/mixed_formats.csv b/tests/fixtures/mixed_formats.csv
@@ -0,0 +1,4 @@
+id,price,discount,note
+1,"1,200",12.5%,ok
+2,₩4500,5%,good
+3,"$3,000",,n/a
diff --git a/tests/fixtures/small_numeric.csv b/tests/fixtures/small_numeric.csv
@@ -0,0 +1,4 @@
+id,amount,qty
+1,10.5,2
+2,20,3
+3,30.25,1
diff --git a/tests/test_analysis.py b/tests/test_analysis.py
@@ -1,3 +1,5 @@
+from pathlib import Path
+
 from bitnet_tools.analysis import (
     build_analysis_payload,
     build_analysis_payload_from_csv_text,
@@ -152,3 +154,32 @@ def test_multi_csv_with_parallel_workers(tmp_path):
 
     assert result["file_count"] == 2
     assert [f["path"] for f in result["files"]] == [str(p1), str(p2)]
+
+
+def test_to_float_normalizes_currency_comma_percent_parentheses():
+    from bitnet_tools.analysis import _to_float
+
+    assert _to_float("1,234.5") == 1234.5
+    assert _to_float("₩4,500") == 4500.0
+    assert _to_float("$3,000") == 3000.0
+    assert _to_float("12.5%") == 12.5
+    assert _to_float("(99.1)") == -99.1
+
+
+def test_baseline_fixture_summaries_are_stable(tmp_path):
+    from bitnet_tools.analysis import build_analysis_payload
+
+    root = Path(__file__).parent / "fixtures"
+
+    small = build_analysis_payload(root / "small_numeric.csv", "baseline")
+    assert small["summary"]["row_count"] == 3
+    assert small["summary"]["column_count"] == 3
+    assert small["summary"]["dtypes"]["amount"] == "float"
+
+    mixed = build_analysis_payload(root / "mixed_formats.csv", "baseline")
+    assert mixed["summary"]["dtypes"]["price"] == "float"
+    assert mixed["summary"]["dtypes"]["discount"] == "float"
+    assert mixed["summary"]["missing_counts"]["discount"] == 1
+
+    missing = build_analysis_payload(root / "missing_heavy.csv", "baseline")
+    assert missing["summary"]["missing_counts"] == {"a": 2, "b": 2, "c": 1}
-Original file line number
+Diff line change
@@ -0,0 +1,4 @@
+    a,b,c
+,,x
+    ,2,
+    ,,z