Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

## 0) 현재 완성도 빠른 진단

현 시점 기준 기능 완성도(실사용 관점): **약 98%**
현 시점 기준 기능 완성도(실사용 관점): **약 92%**

- 완료
- CSV 기초 요약(행/열/결측/숫자 통계)
Expand All @@ -22,7 +22,7 @@
- 웹 UI 대시보드(JSON 붙여넣기 기반 KPI/인사이트 뷰)
- **윈도우 데스크톱 UI(`bitnet-analyze desktop`, `BitNet_Desktop_Start.bat`)**
- 남은 과제
- 대시보드 상호작용 고도화(파일 업로드 기반 멀티 분석 원클릭 + 필터)
- 대시보드 필터/드릴다운 고도화
- 차트 렌더링 백엔드 비동기 작업 큐(대형 배치용)

### 처리 규모 가이드
Expand Down
38 changes: 38 additions & 0 deletions bitnet_tools/ui/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ const summary = document.getElementById('summary');
const prompt = document.getElementById('prompt');
const answer = document.getElementById('answer');

const multiCsvFiles = document.getElementById('multiCsvFiles');
const groupColumn = document.getElementById('groupColumn');
const targetColumn = document.getElementById('targetColumn');
const multiAnalyzeBtn = document.getElementById('multiAnalyzeBtn');
const dashboardJson = document.getElementById('dashboardJson');
const dashboardCards = document.getElementById('dashboardCards');
const dashboardInsights = document.getElementById('dashboardInsights');
Expand Down Expand Up @@ -104,3 +108,37 @@ document.getElementById('renderDashboardBtn').addEventListener('click', () => {
? insights.map((x, i) => `${i + 1}. ${x}`).join('\n')
: '인사이트 항목이 없습니다.';
});


multiAnalyzeBtn.addEventListener('click', async () => {
const files = [...(multiCsvFiles.files || [])];
if (!files.length) {
dashboardInsights.textContent = '멀티 CSV 파일을 먼저 선택하세요.';
return;
}

dashboardInsights.textContent = '멀티 분석 중...';
const payloadFiles = [];
for (const f of files) {
payloadFiles.push({ name: f.name, csv_text: await f.text() });
}

const res = await fetch('/api/multi-analyze', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
files: payloadFiles,
question: question.value,
group_column: groupColumn.value.trim(),
target_column: targetColumn.value.trim(),
}),
});
const data = await res.json();
if (!res.ok) {
dashboardInsights.textContent = data.error || 'error';
return;
}

dashboardJson.value = JSON.stringify(data, null, 2);
document.getElementById('renderDashboardBtn').click();
});
20 changes: 20 additions & 0 deletions bitnet_tools/ui/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,26 @@ <h3>BitNet 응답</h3>
<pre id="answer"></pre>
</section>


<section class="panel">
<h2>멀티 CSV 원클릭 분석</h2>
<p class="sub">여러 CSV 파일 선택 후 바로 통합 분석 + 대시보드 반영.</p>
<input id="multiCsvFiles" type="file" multiple accept=".csv,text/csv" />
<div class="row">
<div>
<label>그룹 컬럼(선택)</label>
<input id="groupColumn" placeholder="예: 시도명" />
</div>
<div>
<label>타깃 컬럼(선택)</label>
<input id="targetColumn" placeholder="예: 세차유형" />
</div>
<div class="actions">
<button id="multiAnalyzeBtn">멀티 분석 실행</button>
</div>
</div>
</section>

<section class="panel">
<h2>멀티 분석 대시보드(JSON)</h2>
<p class="sub">`multi-analyze` 결과 JSON을 붙여넣고 시각적으로 확인하세요.</p>
Expand Down
2 changes: 1 addition & 1 deletion bitnet_tools/ui/styles.css
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ body {
padding: 14px;
margin: 12px 0;
}
.row { display: flex; justify-content: space-between; gap: 12px; align-items: end; }
.row { display: flex; justify-content: space-between; gap: 12px; align-items: end; flex-wrap: wrap; }
label { display: block; margin-bottom: 8px; color: var(--muted); }
textarea, input {
width: 100%;
Expand Down
38 changes: 38 additions & 0 deletions bitnet_tools/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
import json
from pathlib import Path
import subprocess
import tempfile
from urllib.parse import urlparse

from .analysis import build_analysis_payload_from_csv_text
from .multi_csv import analyze_multiple_csv


UI_DIR = Path(__file__).parent / "ui"
Expand Down Expand Up @@ -75,6 +77,42 @@ def do_POST(self) -> None:
result = build_analysis_payload_from_csv_text(csv_text, question)
return self._send_json(result)


if route == "/api/multi-analyze":
files = payload.get("files", [])
question = str(payload.get("question", "")).strip() or "다중 CSV를 비교 분석해줘"
group_column = str(payload.get("group_column", "")).strip() or None
target_column = str(payload.get("target_column", "")).strip() or None
if not isinstance(files, list) or not files:
return self._send_json({"error": "files is required"}, HTTPStatus.BAD_REQUEST)

with tempfile.TemporaryDirectory(prefix="bitnet_multi_") as td:
tmp_paths = []
for i, f in enumerate(files):
if not isinstance(f, dict):
continue
name = str(f.get("name", f"file_{i}.csv"))
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Ensure each uploaded CSV gets a unique temp filename

This code reuses the incoming name as the on-disk filename, so if two uploaded entries share a basename (for example two different data.csv files), the later write overwrites the earlier one and both analysis slots end up reading the same content. The request still succeeds, but the multi-file comparison is silently corrupted; append an index/UUID to each temp filename to preserve one file per upload item.

Useful? React with 👍 / 👎.

text = str(f.get("csv_text", ""))
if not text.strip():
continue
if not name.endswith('.csv'):
name = f"{name}.csv"
path = Path(td) / name
path.write_text(text, encoding="utf-8")
Comment on lines +100 to +101
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Restrict upload paths to the temporary directory

/api/multi-analyze trusts the client-provided name and writes directly to Path(td) / name, so a crafted filename like ../../tmp/pwn.csv or an absolute path escapes the temp dir and allows overwriting arbitrary writable files on the host before analysis runs. This is a server-side file write primitive reachable by any caller of the endpoint; sanitize to a basename (or generate server-side names) and verify the resolved path stays under td before calling write_text.

Useful? React with 👍 / 👎.

tmp_paths.append(path)

if not tmp_paths:
return self._send_json({"error": "valid csv_text files are required"}, HTTPStatus.BAD_REQUEST)

result = analyze_multiple_csv(
tmp_paths,
question,
group_column=group_column,
target_column=target_column,
use_cache=False,
)
return self._send_json(result)

if route == "/api/run":
model = str(payload.get("model", "")).strip()
prompt = str(payload.get("prompt", "")).strip()
Expand Down
29 changes: 29 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,3 +175,32 @@ def boom(paths, out):
assert code == 0
body = out_json.read_text(encoding="utf-8")
assert "charts_error" in body


def test_cli_multi_analyze_no_cache_flag(tmp_path, monkeypatch):
p1 = tmp_path / "a.csv"
p1.write_text("city,val\nseoul,1\n", encoding="utf-8")
called = {}

def fake_analyze(csv, question, group_column=None, target_column=None, use_cache=True):
called["use_cache"] = use_cache
return {
"question": question,
"file_count": 1,
"total_row_count": 1,
"shared_columns": ["city"],
"union_columns": ["city", "val"],
"files": [{"path": str(p1), "summary": {"row_count": 1, "column_count": 2, "columns": ["city", "val"]}, "column_profiles": {"city": {"dtype": "string", "missing_ratio": 0.0, "unique_ratio": 1.0, "dominant_value_ratio": 1.0}, "val": {"dtype": "float", "missing_ratio": 0.0, "unique_ratio": 1.0, "dominant_value_ratio": 1.0}}, "group_target_ratio": None}],
"schema_drift": {},
"insights": [],
"code_guidance": {"recommended_steps": "", "pandas_example": ""},
}

monkeypatch.setattr(cli, "analyze_multiple_csv", fake_analyze)

out_json = tmp_path / "o.json"
out_md = tmp_path / "o.md"
code = cli.main(["multi-analyze", str(p1), "--question", "q", "--no-cache", "--out-json", str(out_json), "--out-report", str(out_md)])

assert code == 0
assert called["use_cache"] is False