Skip to content

Commit 89c2109

Browse files
committed
refactor: complete internationalization, cleanup, and align baseline charts
0 parents  commit 89c2109

1,682 files changed

Lines changed: 95438 additions & 0 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.DS_Store

6 KB
Binary file not shown.

.env.example

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# =============================================================================
2+
# THETA Environment Configuration
3+
# =============================================================================
4+
# Copy this file to .env and modify as needed.
5+
# All paths are relative to PROJECT_ROOT unless specified as absolute paths.
6+
#
7+
# Usage:
8+
# cp .env.example .env
9+
# # Edit .env with your paths
10+
# =============================================================================
11+
12+
# =============================================================================
13+
# Core Directories (usually no need to change)
14+
# =============================================================================
15+
16+
# Project root directory (auto-detected from script location)
17+
# Uncomment and set only if you need to override auto-detection
18+
# PROJECT_ROOT=/path/to/THETA
19+
20+
# ETM module directory
21+
# ETM_DIR=${PROJECT_ROOT}/ETM
22+
23+
# Embedding module directory
24+
# EMBEDDING_DIR=${PROJECT_ROOT}/embedding
25+
26+
# Agent module directory
27+
# AGENT_DIR=${PROJECT_ROOT}/agent
28+
29+
# =============================================================================
30+
# Data Directories
31+
# =============================================================================
32+
33+
# Workspace directory for user data
34+
# WORKSPACE_DIR=${PROJECT_ROOT}/workspace
35+
36+
# Data directory (cleaned datasets)
37+
# DATA_DIR=${WORKSPACE_DIR}/data
38+
39+
# Raw data directory
40+
# RAW_DATA_DIR=${DATA_DIR}/raw_data
41+
42+
# =============================================================================
43+
# Output Directories
44+
# =============================================================================
45+
46+
# Result directory (model outputs, embeddings, BOW matrices, etc.)
47+
# RESULT_DIR=${PROJECT_ROOT}/result
48+
49+
# HuggingFace cache directory
50+
# HF_CACHE_DIR=${PROJECT_ROOT}/hf_cache
51+
52+
# =============================================================================
53+
# Model Directories
54+
# =============================================================================
55+
56+
# Base directory for embedding models
57+
# EMBEDDING_MODELS_DIR=${PROJECT_ROOT}/embedding_models
58+
59+
# Qwen embedding model paths (by size)
60+
# QWEN_MODEL_0_6B=${EMBEDDING_MODELS_DIR}/qwen3_embedding_0.6B
61+
# QWEN_MODEL_4B=${EMBEDDING_MODELS_DIR}/qwen3_embedding_4B
62+
# QWEN_MODEL_8B=${EMBEDDING_MODELS_DIR}/qwen3_embedding_8B
63+
64+
# SBERT model path (for baseline models like CTM)
65+
# SBERT_MODEL_PATH=${ETM_DIR}/model/baselines/sbert/sentence-transformers/all-MiniLM-L6-v2
66+
67+
# =============================================================================
68+
# Agent Configuration (for LLM-based analysis)
69+
# =============================================================================
70+
71+
# OpenAI API configuration (for agent features)
72+
# OPENAI_API_KEY=your-api-key-here
73+
# OPENAI_API_BASE=https://api.openai.com/v1
74+
75+
# Agent API server configuration
76+
# API_HOST=0.0.0.0
77+
# API_PORT=8000
78+
79+
# =============================================================================
80+
# GPU Configuration
81+
# =============================================================================
82+
83+
# Default GPU device ID
84+
# CUDA_VISIBLE_DEVICES=0
85+
86+
# =============================================================================
87+
# Logging
88+
# =============================================================================
89+
90+
# Log level: DEBUG, INFO, WARNING, ERROR
91+
# LOG_LEVEL=INFO

.github/workflows/deploy-docs.yml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
name: Deploy Documentation
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
paths:
8+
- 'doc/**'
9+
- 'mkdocs.yml'
10+
- 'mkdocs.zh.yml'
11+
- 'docs-requirements.txt'
12+
workflow_dispatch:
13+
14+
permissions:
15+
contents: write
16+
17+
jobs:
18+
deploy:
19+
runs-on: ubuntu-latest
20+
steps:
21+
- uses: actions/checkout@v4
22+
with:
23+
fetch-depth: 0
24+
25+
- name: Setup Python
26+
uses: actions/setup-python@v5
27+
with:
28+
python-version: '3.x'
29+
30+
- name: Cache pip
31+
uses: actions/cache@v4
32+
with:
33+
path: ~/.cache/pip
34+
key: ${{ runner.os }}-pip-${{ hashFiles('docs-requirements.txt') }}
35+
36+
- name: Install dependencies
37+
run: pip install -r docs-requirements.txt
38+
39+
- name: Deploy to GitHub Pages
40+
run: mkdocs gh-deploy --force

.gitignore

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[codz]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
share/python-wheels/
24+
*.egg-info/
25+
.installed.cfg
26+
*.egg
27+
MANIFEST
28+
29+
# PyInstaller
30+
# Usually these files are written by a python script from a template
31+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
32+
*.manifest
33+
*.spec
34+
35+
# Installer logs
36+
pip-log.txt
37+
pip-delete-this-directory.txt
38+
39+
# Unit test / coverage reports
40+
htmlcov/
41+
.tox/
42+
.nox/
43+
.coverage
44+
.coverage.*
45+
.cache
46+
nosetests.xml
47+
coverage.xml
48+
*.cover
49+
*.py.cover
50+
.hypothesis/
51+
.pytest_cache/
52+
cover/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
73+
74+
# PyBuilder
75+
.pybuilder/
76+
target/
77+
78+
# Jupyter Notebook
79+
.ipynb_checkpoints
80+
81+
# IPython
82+
profile_default/
83+
ipython_config.py
84+
85+
# pyenv
86+
# For a library or package, you might want to ignore these files since the code is
87+
# intended to run in multiple environments; otherwise, check them in:
88+
# .python-version
89+
90+
# pipenv
91+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
93+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
94+
# install all needed dependencies.
95+
#Pipfile.lock
96+
97+
# UV
98+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99+
# This is especially recommended for binary packages to ensure reproducibility, and is more
100+
# commonly ignored for libraries.
101+
#uv.lock
102+
103+
# poetry
104+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105+
# This is especially recommended for binary packages to ensure reproducibility, and is more
106+
# commonly ignored for libraries.
107+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108+
#poetry.lock
109+
#poetry.toml
110+
111+
# pdm
112+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115+
#pdm.lock
116+
#pdm.toml
117+
.pdm-python
118+
.pdm-build/
119+
120+
# pixi
121+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122+
#pixi.lock
123+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124+
# in the .venv directory. It is recommended not to include this directory in version control.
125+
.pixi
126+
127+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128+
__pypackages__/
129+
130+
# Celery stuff
131+
celerybeat-schedule
132+
celerybeat.pid
133+
134+
# SageMath parsed files
135+
*.sage.py
136+
137+
# Environments
138+
.env
139+
.envrc
140+
.venv
141+
env/
142+
venv/
143+
ENV/
144+
env.bak/
145+
venv.bak/
146+
147+
# Spyder project settings
148+
.spyderproject
149+
.spyproject
150+
151+
# Rope project settings
152+
.ropeproject
153+
154+
# mkdocs documentation
155+
/site
156+
157+
# mypy
158+
.mypy_cache/
159+
.dmypy.json
160+
dmypy.json
161+
162+
# Pyre type checker
163+
.pyre/
164+
165+
# pytype static type analyzer
166+
.pytype/
167+
168+
# Cython debug symbols
169+
cython_debug/
170+
171+
# PyCharm
172+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174+
# and can be added to the global gitignore or merged into this file. For a more nuclear
175+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
176+
#.idea/
177+
178+
# Abstra
179+
# Abstra is an AI-powered process automation framework.
180+
# Ignore directories containing user credentials, local state, and settings.
181+
# Learn more at https://abstra.io/docs
182+
.abstra/
183+
184+
# Visual Studio Code
185+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
188+
# you could uncomment the following to ignore the entire vscode folder
189+
# .vscode/
190+
191+
# Ruff stuff:
192+
.ruff_cache/
193+
194+
# PyPI configuration file
195+
.pypirc
196+
197+
# Cursor
198+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
200+
# refer to https://docs.cursor.com/context/ignore-files
201+
.cursorignore
202+
.cursorindexingignore
203+
204+
# Marimo
205+
marimo/_static/
206+
marimo/_lsp/
207+
__marimo__/

0 commit comments

Comments
 (0)