rad1092 · rad1092 · Feb 14, 2026 · Feb 14, 2026 · Feb 14, 2026
diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@
 
 ## 0) 현재 완성도 빠른 진단
 
-현 시점 기준 기능 완성도(실사용 관점): **약 97%**
+현 시점 기준 기능 완성도(실사용 관점): **약 98%**
 
 - 완료
   - CSV 기초 요약(행/열/결측/숫자 통계)
@@ -22,14 +22,14 @@
   - 웹 UI 대시보드(JSON 붙여넣기 기반 KPI/인사이트 뷰)
   - **윈도우 데스크톱 UI(`bitnet-analyze desktop`, `BitNet_Desktop_Start.bat`)**
 - 남은 과제
-  - 대시보드 상호작용 고도화(파일 업로드 기반 멀티 분석 원클릭)
-  - 대규모 차트 생성 최적화(청크-스트리밍 렌더러)
+  - 대시보드 상호작용 고도화(파일 업로드 기반 멀티 분석 원클릭 + 필터)
+  - 차트 렌더링 백엔드 비동기 작업 큐(대형 배치용)
 
 ### 처리 규모 가이드
 
 - 단일/다중 CSV 분석(`analyze`, `multi-analyze`)은 스트리밍 누적 통계를 사용해 수십 MB 수준까지 안정 처리하도록 개선됨
 - `multi-analyze`는 파일 단위 캐시(`.bitnet_cache`)를 사용해 재실행 성능을 개선
-- 차트 생성(`--charts-dir`)은 matplotlib 기반이며 파일을 메모리에 적재해 그리므로 더 큰 파일에서는 샘플링 전략 권장
+- 차트 생성(`--charts-dir`)은 matplotlib 기반이며 샘플링 기반 차트 템플릿으로 메모리 사용을 제한해 대형 파일 대응성을 개선
 
 ### 파일 붙여넣기 분석 가능 범위
 

diff --git a/bitnet_tools/visualize.py b/bitnet_tools/visualize.py
@@ -1,22 +1,14 @@
 from __future__ import annotations
 
 import csv
+import random
+from collections import Counter
 from pathlib import Path
 from typing import Any
 
 
-def _is_numeric_column(rows: list[dict[str, str]], col: str) -> bool:
-    seen = 0
-    for row in rows:
-        raw = (row.get(col) or "").strip()
-        if not raw:
-            continue
-        seen += 1
-        try:
-            float(raw)
-        except ValueError:
-            return False
-    return seen > 0
+SAMPLE_CAP = 20000
+TOP_K = 10
 
 
 def _safe_stem(path: Path) -> str:
@@ -35,6 +27,77 @@ def _ensure_matplotlib():
         raise RuntimeError("matplotlib is required for chart generation") from exc
 
 
+def _reservoir_float(values: list[float], value: float, seen: int, cap: int) -> None:
+    if cap <= 0:
+        return
+    if len(values) < cap:
+        values.append(value)
+        return
+    idx = random.randint(0, seen - 1)
+    if idx < cap:
+        values[idx] = value
+
+
+def _reservoir_pair(xs: list[float], ys: list[float], x: float, y: float, seen: int, cap: int) -> None:
+    if cap <= 0:
+        return
+    if len(xs) < cap:
+        xs.append(x)
+        ys.append(y)
+        return
+    idx = random.randint(0, seen - 1)
+    if idx < cap:
+        xs[idx] = x
+        ys[idx] = y
+
+
+def _collect_profiles(csv_path: Path) -> tuple[list[str], dict[str, dict[str, Any]]]:
+    with csv_path.open("r", encoding="utf-8-sig", newline="") as f:
+        reader = csv.DictReader(f)
+        if reader.fieldnames is None:
+            return [], {}
+        columns = [str(c) for c in reader.fieldnames]
+
+        profiles: dict[str, dict[str, Any]] = {
+            c: {
+                "seen": 0,
+                "numeric_seen": 0,
+                "is_numeric": True,
+                "missing": 0,
+                "values": [],
+                "counter": Counter(),
+            }
+            for c in columns
+        }
+
+        for row in reader:
+            for c in columns:
+                raw = (row.get(c) or "").strip()
+                if not raw:
+                    profiles[c]["missing"] += 1
+                    continue
+                profiles[c]["seen"] += 1
+
+                if profiles[c]["is_numeric"]:
+                    try:
+                        num = float(raw)
+                        profiles[c]["numeric_seen"] += 1
+                        _reservoir_float(
+                            profiles[c]["values"], num, profiles[c]["numeric_seen"], SAMPLE_CAP
+                        )
+                    except ValueError:
+                        profiles[c]["is_numeric"] = False
+                        profiles[c]["values"] = []
+
+                profiles[c]["counter"][raw] += 1
+
+    for c in columns:
+        if profiles[c]["numeric_seen"] == 0:
+            profiles[c]["is_numeric"] = False
+
+    return columns, profiles
+
+
 def create_file_charts(
     csv_path: Path,
     out_dir: Path,
@@ -44,35 +107,25 @@ def create_file_charts(
     plt = _ensure_matplotlib()
 
     out_dir.mkdir(parents=True, exist_ok=True)
-    with csv_path.open("r", encoding="utf-8-sig", newline="") as f:
-        reader = csv.DictReader(f)
-        if reader.fieldnames is None:
-            return []
-        columns = [str(c) for c in reader.fieldnames]
-        rows = list(reader)
+    columns, profiles = _collect_profiles(csv_path)
+    if not columns:
+        return []
 
-    numeric_cols = [c for c in columns if _is_numeric_column(rows, c)][:max_numeric]
-    categorical_cols = [c for c in columns if c not in numeric_cols][:max_categorical]
+    numeric_cols = [c for c in columns if profiles[c]["is_numeric"]][:max_numeric]
+    categorical_cols = [c for c in columns if not profiles[c]["is_numeric"]][:max_categorical]
 
     artifacts: list[str] = []
     stem = _safe_stem(csv_path)
 
-    # template 1: numeric histogram + boxplot
     for col in numeric_cols:
-        values = []
-        missing = 0
-        for row in rows:
-            raw = (row.get(col) or "").strip()
-            if raw:
-                values.append(float(raw))
-            else:
-                missing += 1
+        values: list[float] = profiles[col]["values"]
+        missing = profiles[col]["missing"]
         if not values:
             continue
 
         fig = plt.figure(figsize=(7, 4))
         plt.hist(values, bins=20)
-        plt.title(f"{stem} - {col} histogram")
+        plt.title(f"{stem} - {col} histogram(sample)")
         plt.xlabel(col)
         plt.ylabel("count")
         plt.tight_layout()
@@ -83,35 +136,27 @@ def create_file_charts(
 
         fig = plt.figure(figsize=(5, 4))
         plt.boxplot(values, vert=True)
-        plt.title(f"{stem} - {col} boxplot")
+        plt.title(f"{stem} - {col} boxplot(sample)")
         plt.ylabel(col)
         plt.tight_layout()
         out = out_dir / f"{stem}_{col}_box.png"
         fig.savefig(out)
         plt.close(fig)
         artifacts.append(str(out))
 
-        # template 2: numeric missing ratio mini chart
-        total = len(values) + missing
+        total = profiles[col]["seen"] + missing
         if total > 0:
             fig = plt.figure(figsize=(5, 3))
-            plt.bar(["non_missing", "missing"], [len(values), missing])
+            plt.bar(["non_missing", "missing"], [profiles[col]["seen"], missing])
             plt.title(f"{stem} - {col} missing overview")
             plt.tight_layout()
             out = out_dir / f"{stem}_{col}_missing.png"
             fig.savefig(out)
             plt.close(fig)
             artifacts.append(str(out))
 
-    # template 3: categorical top-value bar
     for col in categorical_cols:
-        counter: dict[str, int] = {}
-        for row in rows:
-            raw = (row.get(col) or "").strip()
-            if not raw:
-                continue
-            counter[raw] = counter.get(raw, 0) + 1
-        items = sorted(counter.items(), key=lambda x: x[1], reverse=True)[:10]
+        items = profiles[col]["counter"].most_common(TOP_K)
         if not items:
             continue
 
@@ -127,22 +172,30 @@ def create_file_charts(
         plt.close(fig)
         artifacts.append(str(out))
 
-    # template 4: scatter for first 2 numeric columns
     if len(numeric_cols) >= 2:
         x_col, y_col = numeric_cols[0], numeric_cols[1]
         xs: list[float] = []
         ys: list[float] = []
-        for row in rows:
-            x_raw = (row.get(x_col) or "").strip()
-            y_raw = (row.get(y_col) or "").strip()
-            if not x_raw or not y_raw:
-                continue
-            xs.append(float(x_raw))
-            ys.append(float(y_raw))
+        seen = 0
+        with csv_path.open("r", encoding="utf-8-sig", newline="") as f:
+            reader = csv.DictReader(f)
+            if reader.fieldnames is not None:
+                for row in reader:
+                    x_raw = (row.get(x_col) or "").strip()
+                    y_raw = (row.get(y_col) or "").strip()
+                    if not x_raw or not y_raw:
+                        continue
+                    try:
+                        x, y = float(x_raw), float(y_raw)
+                    except ValueError:
+                        continue
+                    seen += 1
+                    _reservoir_pair(xs, ys, x, y, seen, SAMPLE_CAP)
+
         if xs and ys:
             fig = plt.figure(figsize=(6, 5))
             plt.scatter(xs, ys, alpha=0.6, s=12)
-            plt.title(f"{stem} - {x_col} vs {y_col}")
+            plt.title(f"{stem} - {x_col} vs {y_col} scatter(sample)")
             plt.xlabel(x_col)
             plt.ylabel(y_col)
             plt.tight_layout()
@@ -154,10 +207,7 @@ def create_file_charts(
     return artifacts
 
 
-def create_multi_charts(
-    csv_paths: list[Path],
-    out_dir: Path,
-) -> dict[str, Any]:
+def create_multi_charts(csv_paths: list[Path], out_dir: Path) -> dict[str, Any]:
     results: dict[str, Any] = {}
     for p in csv_paths:
         results[str(p)] = create_file_charts(p, out_dir)