Skip to content

Commit d8ca02d

Browse files
committed
modify usfm for chapter-level drafting to avoid import issues; move remarks to chapters
1 parent 6ada7ea commit d8ca02d

2 files changed

Lines changed: 27 additions & 10 deletions

File tree

machine/corpora/paratext_project_text_updater_base.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ def __init__(
2929
def update_usfm(
3030
self,
3131
book_id: str,
32+
chapters: Optional[Sequence[int]] = None,
3233
rows: Optional[Sequence[UpdateUsfmRow]] = None,
3334
full_name: Optional[str] = None,
3435
text_behavior: UpdateUsfmTextBehavior = UpdateUsfmTextBehavior.PREFER_EXISTING,
@@ -61,7 +62,7 @@ def update_usfm(
6162
)
6263
try:
6364
parse_usfm(usfm, handler, self._settings.stylesheet, self._settings.versification)
64-
return handler.get_usfm(self._settings.stylesheet)
65+
return handler.get_usfm(self._settings.stylesheet, chapters)
6566
except Exception as e:
6667
error_message = (
6768
f"An error occurred while parsing the usfm for '{book_id}'"

machine/corpora/update_usfm_parser_handler.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -334,27 +334,43 @@ def _end_embed_text(self, state: UsfmParserState, scripture_ref: ScriptureRef) -
334334
if embed_outside_of_block:
335335
self._end_update_block(state, [scripture_ref])
336336

337-
def get_usfm(self, stylesheet: Union[str, UsfmStylesheet] = "usfm.sty") -> str:
337+
def get_usfm(
338+
self, stylesheet: Union[str, UsfmStylesheet] = "usfm.sty", chapters: Optional[Sequence[int]] = None
339+
) -> str:
338340
if isinstance(stylesheet, str):
339341
stylesheet = UsfmStylesheet(stylesheet)
340342
tokenizer = UsfmTokenizer(stylesheet)
341343
tokens = list(self._tokens)
344+
if chapters is not None:
345+
tokens = self._get_incremental_draft_tokens(tokens, chapters)
342346
if len(self._remarks) > 0:
343347
remark_tokens: List[UsfmToken] = []
344348
for remark in self._remarks:
345349
remark_tokens.append(UsfmToken(UsfmTokenType.PARAGRAPH, "rem"))
346350
remark_tokens.append(UsfmToken(UsfmTokenType.TEXT, text=remark))
347351
if len(tokens) > 0:
348-
index = 0
349-
markers_to_skip = {"id", "ide", "rem"}
350-
while tokens[index].marker in markers_to_skip:
351-
index += 1
352-
if len(tokens) > index and tokens[index].type == UsfmTokenType.TEXT:
353-
index += 1
354-
for remark_token in reversed(remark_tokens):
355-
tokens.insert(index, remark_token)
352+
for index, token in enumerate(tokens):
353+
if token.type == UsfmTokenType.CHAPTER:
354+
tokens[index + 1 : index + 1] = remark_tokens
356355
return tokenizer.detokenize(tokens)
357356

357+
def _get_incremental_draft_tokens(self, tokens: List[UsfmToken], chapters: Sequence[int]) -> List[UsfmToken]:
358+
incremental_draft_tokens: List[UsfmToken] = []
359+
in_chapter: bool = False
360+
for index, token in enumerate(tokens):
361+
if index == 0 and token.marker == "id":
362+
incremental_draft_tokens.append(token)
363+
continue
364+
elif token.type == UsfmTokenType.CHAPTER:
365+
if token.data and int(token.data) in chapters:
366+
in_chapter = True
367+
incremental_draft_tokens.append(token)
368+
else:
369+
in_chapter = False
370+
elif in_chapter:
371+
incremental_draft_tokens.append(token)
372+
return incremental_draft_tokens
373+
358374
def _advance_rows(self, seg_scr_refs: Sequence[ScriptureRef]) -> Tuple[List[str], Optional[dict[str, object]]]:
359375
row_texts: List[str] = []
360376
row_metadata = None

0 commit comments

Comments
 (0)