Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,25 @@ Commands:

Exit codes: `0` success, `2` user error, `5` business failure, `6`
interrupted, `7` partial success (preview ready, export failed).

## Video kinds

`vk create --mode <kind>` picks which figlens pipeline runs:

- **default (no flag)** — generative video from the document.
- **`--mode replica`** — PPT/PDF page-by-page reproduction. Suited
for slide decks where the visual structure is the message.
- **`--mode script`** — uses the uploaded document text *verbatim*
as the narration. The doc must pass a quality preflight (length,
characters, content). Preflight failures exit **2** with a clear
message; agents should treat them as user-input problems, not
retries.

Modes combine freely with `--aspect horizontal|vertical` (or `16:9` /
`9:16`) and `--bgm`. All three flags are independent of `--from`,
`--prompt`, `--voice`, and `--export`.

Exit-code summary for new modes:
- `2` — `--mode <bad>`, `--aspect <bad>`, or script preflight rejected the document.
- `5` — pipeline business failure (e.g., insufficient credits) — same as today.
- `7` — preview ok, MP4 export failed — same as today.
28 changes: 28 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,33 @@
# Changelog

## 0.4.2 — 2026-05-14

### New

- `vk create --mode replica` runs the figlens PPT/PDF page-by-page
replica pipeline. `vk create --mode script` runs the verbatim-script
("讲稿锁定") pipeline that uses the uploaded document as the
narration. Both modes are now visible to humans and AI agents
through the same single-flag surface; default invocation (no
`--mode`) is unchanged.
- `vk create --aspect horizontal|vertical` selects 16:9 or 9:16
output. Accepts `16:9` / `9:16` as aliases.
- `vk create --bgm` enables background music (off by default).
- SSE progress events for the replica pipeline's new nodes
(`doc_replica_plan`, `doc_replica_shoot`) now surface in `text` and
`ndjson` output — previously filtered out by the CLI's stage map.

### Changed

- Script-mode preflight failures (`POST /v1/tasks/init` returns code
`100004`) now exit **2** (validation, user fixes input) with the
backend's localized message, instead of exit 5 with a generic
"business error" label.
- `figlens.InitTask` now takes `InitTaskParams{KnowledgeID, DocID,
VideoKind}`. Default-zero params produce the same wire body as
before (`{"v": 3}`), so callers that don't use script mode are
unaffected.

## 0.4.1 — 2026-04-24

### Fixed
Expand Down
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,14 @@ output=sess_xxx.mp4

Or one-shot: `vk create --from ... --export --yes`.

### Choose a video mode

```bash
vk create --from deck.pdf --mode replica # PPT/PDF page-by-page
vk create --from talk.docx --mode script # narrate the doc verbatim
vk create --from <src> --aspect vertical --bgm
```

### Voice Templates

```bash
Expand Down
8 changes: 8 additions & 0 deletions README.zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,14 @@ output=sess_xxx.mp4

或者一键搞定:`vk create --from ... --export --yes`。

### 选择视频模式

```bash
vk create --from deck.pdf --mode replica # PPT/PDF 逐页还原
vk create --from talk.docx --mode script # 讲稿模式(用文档原文做旁白)
vk create --from <src> --aspect vertical --bgm
```

### 音色模板

```bash
Expand Down
89 changes: 86 additions & 3 deletions client/figlens/figlens_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@ package figlens_test
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/http/httptest"
"strings"
"testing"

"github.com/vibeknow/cli/client/figlens"
Expand All @@ -20,26 +23,59 @@ func figlensResp(w http.ResponseWriter, data any) {
}

func TestInitTask(t *testing.T) {
var gotBody map[string]any
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/v1/tasks/init" || r.Method != "POST" {
t.Fatalf("unexpected %s %s", r.Method, r.URL.Path)
}
_ = json.NewDecoder(r.Body).Decode(&gotBody)
figlensResp(w, map[string]any{
"task_id": 123, "session_id": "s_abc", "work_id": 456, "v": 3,
})
}))
defer srv.Close()

c := figlens.New(srv.URL, staticToken("tok"))
task, err := c.InitTask(context.Background())
task, err := c.InitTask(context.Background(), figlens.InitTaskParams{
KnowledgeID: "kb_1", DocID: "doc_1", VideoKind: "script_lock",
})
if err != nil {
t.Fatalf("InitTask: %v", err)
}
if task.TaskID != 123 {
t.Fatalf("task_id = %d", task.TaskID)
}
if task.SessionID != "s_abc" {
t.Fatalf("session_id = %q", task.SessionID)
if gotBody["v"] != float64(3) {
t.Fatalf("v = %v, want 3", gotBody["v"])
}
if gotBody["knowledge_id"] != "kb_1" {
t.Fatalf("knowledge_id = %v", gotBody["knowledge_id"])
}
if gotBody["video_kind"] != "script_lock" {
t.Fatalf("video_kind = %v", gotBody["video_kind"])
}
}

func TestInitTask_OmitsEmptyFields(t *testing.T) {
var raw []byte
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
raw, _ = io.ReadAll(r.Body)
figlensResp(w, map[string]any{
"task_id": 1, "session_id": "s_x", "work_id": 2, "v": 3,
})
}))
defer srv.Close()

c := figlens.New(srv.URL, staticToken("tok"))
_, err := c.InitTask(context.Background(), figlens.InitTaskParams{})
if err != nil {
t.Fatalf("InitTask: %v", err)
}
body := string(raw)
for _, f := range []string{"knowledge_id", "doc_id", "video_kind"} {
if strings.Contains(body, f) {
t.Fatalf("%s unexpectedly present in empty-params body: %s", f, body)
}
}
}

Expand Down Expand Up @@ -134,3 +170,50 @@ func TestSignedURL(t *testing.T) {
t.Fatalf("url = %q", u)
}
}

func TestFastQueryOptimize_SendsVideoKind(t *testing.T) {
var gotBody map[string]string
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
_ = json.NewDecoder(r.Body).Decode(&gotBody)
w.Header().Set("Content-Type", "text/event-stream")
fmt.Fprint(w, `data: {"code":200,"data":{"type":"aim_result","answer_done":{"text":"ok"}}}

data: [DONE]

`)
}))
defer srv.Close()

c := figlens.New(srv.URL, staticToken("tok"))
_, err := c.FastQueryOptimize(context.Background(), figlens.OptimizeParams{
KnowledgeID: "kb_1", DocID: "doc_1", VideoKind: "script_lock",
}, nil)
if err != nil {
t.Fatalf("FastQueryOptimize: %v", err)
}
if gotBody["video_kind"] != "script_lock" {
t.Fatalf("video_kind on wire = %q, want %q", gotBody["video_kind"], "script_lock")
}
}

func TestFastQueryOptimize_OmitsVideoKindWhenEmpty(t *testing.T) {
var raw []byte
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
raw, _ = io.ReadAll(r.Body)
w.Header().Set("Content-Type", "text/event-stream")
fmt.Fprint(w, `data: {"code":200,"data":{"type":"aim_result","answer_done":{"text":"ok"}}}

data: [DONE]

`)
}))
defer srv.Close()

c := figlens.New(srv.URL, staticToken("tok"))
_, _ = c.FastQueryOptimize(context.Background(), figlens.OptimizeParams{
KnowledgeID: "kb_1", DocID: "doc_1",
}, nil)
if strings.Contains(string(raw), "video_kind") {
t.Fatalf("video_kind unexpectedly present in wire body: %s", raw)
}
}
1 change: 1 addition & 0 deletions client/figlens/optimize.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ type OptimizeParams struct {
KnowledgeID string `json:"knowledge_id"`
DocID string `json:"doc_id"`
Query string `json:"query,omitempty"`
VideoKind string `json:"video_kind,omitempty"`
}

type optimizePayload struct {
Expand Down
26 changes: 14 additions & 12 deletions client/figlens/stream.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"io"
"strings"

"github.com/vibeknow/cli/internal/httpclient"
"github.com/vibeknow/cli/internal/sse"
"github.com/vibeknow/cli/internal/stage"
)
Expand All @@ -19,10 +20,13 @@ type StreamParams struct {
DocID string `json:"doc_id,omitempty"`
VoiceID string `json:"voice_id,omitempty"`
BGMEnabled bool `json:"bgm_enabled,omitempty"`
Aspect string `json:"aspect,omitempty"`
VideoKind string `json:"video_kind,omitempty"`
}

type StreamEvent struct {
Type string
Code string // set on task.failed when payload carries an envelope code
Stage string
Node string
Message string
Expand All @@ -47,18 +51,13 @@ type processLog struct {
Message string `json:"message"`
}

// mapSSECode maps a backend envelope code from an SSE payload to a CLI error code string.
// mapSSECode maps an SSE envelope code to a CLI error code label, delegating
// to httpclient.MapBusinessCode so the two transports never diverge.
func mapSSECode(code int) string {
switch code {
case 100001:
return "insufficient_credits"
case 100002:
return "freeze_not_found"
case 100003:
return "concurrent_work_limit"
default:
return "business_error"
if label, ok := httpclient.MapBusinessCode(code); ok {
return label
}
return "business_error"
}

func (c *Client) StreamChat(ctx context.Context, params StreamParams, onEvent func(StreamEvent)) error {
Expand Down Expand Up @@ -100,8 +99,11 @@ func (c *Client) StreamChat(ctx context.Context, params StreamParams, onEvent fu
msg = d.Message
}
}
code := mapSSECode(payload.Code)
onEvent(StreamEvent{Type: "task.failed", Message: fmt.Sprintf("[%s] %s", code, msg)})
onEvent(StreamEvent{
Type: "task.failed",
Code: mapSSECode(payload.Code),
Message: msg,
})
return nil
}

Expand Down
62 changes: 62 additions & 0 deletions client/figlens/stream_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package figlens_test

import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
Expand Down Expand Up @@ -79,3 +80,64 @@ func TestStreamChat_ErrorEvent(t *testing.T) {
t.Fatalf("expected task.failed event, got %v", events)
}
}

func TestStreamChat_SendsVideoKindAndAspect(t *testing.T) {
var gotBody map[string]any
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
_ = json.NewDecoder(r.Body).Decode(&gotBody)
w.Header().Set("Content-Type", "text/event-stream")
fmt.Fprint(w, `data: {"code":200,"data":{"type":"aim_result","session_id":"s_abc"}}

data: [DONE]

`)
}))
defer srv.Close()

c := figlens.New(srv.URL, staticToken("tok"))
err := c.StreamChat(context.Background(), figlens.StreamParams{
TaskID: 1, SessionID: "s_abc", Query: "q",
VideoKind: "replica", Aspect: "vertical", BGMEnabled: true,
}, func(figlens.StreamEvent) {})
if err != nil {
t.Fatalf("StreamChat: %v", err)
}
if gotBody["video_kind"] != "replica" {
t.Fatalf("video_kind = %v, want \"replica\"", gotBody["video_kind"])
}
if gotBody["aspect"] != "vertical" {
t.Fatalf("aspect = %v, want \"vertical\"", gotBody["aspect"])
}
if gotBody["bgm_enabled"] != true {
t.Fatalf("bgm_enabled = %v, want true", gotBody["bgm_enabled"])
}
}

func TestStreamChat_ScriptInvalidCode(t *testing.T) {
sseBody := `data: {"code":100004,"data":{"message":"讲稿超过 8000 字"}}

`
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/event-stream")
fmt.Fprint(w, sseBody)
}))
defer srv.Close()

c := figlens.New(srv.URL, staticToken("tok"))
var events []figlens.StreamEvent
err := c.StreamChat(context.Background(), figlens.StreamParams{TaskID: 1, SessionID: "s"}, func(ev figlens.StreamEvent) {
events = append(events, ev)
})
if err != nil {
t.Fatalf("StreamChat: %v", err)
}
if len(events) == 0 || events[0].Type != "task.failed" {
t.Fatalf("expected task.failed, got %v", events)
}
if events[0].Code != "script_invalid" {
t.Fatalf("expected Code=script_invalid, got %q", events[0].Code)
}
if events[0].Message != "讲稿超过 8000 字" {
t.Fatalf("expected backend message verbatim, got %q", events[0].Message)
}
}
23 changes: 21 additions & 2 deletions client/figlens/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,35 @@ import (
"fmt"
)

// Backend video_kind wire values. The CLI flag names (`replica`, `script`)
// map to these via cmd.resolveVideoKind.
const (
VideoKindReplica = "replica"
VideoKindScriptLock = "script_lock"
)

type Task struct {
TaskID int64 `json:"task_id"`
SessionID string `json:"session_id"`
WorkID int64 `json:"work_id"`
V int `json:"v,omitempty"`
}

func (c *Client) InitTask(ctx context.Context) (*Task, error) {
type InitTaskParams struct {
KnowledgeID string `json:"knowledge_id,omitempty"`
DocID string `json:"doc_id,omitempty"`
VideoKind string `json:"video_kind,omitempty"`
}

type initTaskWire struct {
V int `json:"v"`
InitTaskParams
}

func (c *Client) InitTask(ctx context.Context, p InitTaskParams) (*Task, error) {
var t Task
if err := c.http.Do(ctx, "POST", "/v1/tasks/init", map[string]int{"v": 3}, &t); err != nil {
body := initTaskWire{V: 3, InitTaskParams: p}
if err := c.http.Do(ctx, "POST", "/v1/tasks/init", body, &t); err != nil {
return nil, fmt.Errorf("init task: %w", err)
}
return &t, nil
Expand Down
Loading
Loading