-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathadd_cloze
More file actions
executable file
·149 lines (117 loc) · 4.28 KB
/
add_cloze
File metadata and controls
executable file
·149 lines (117 loc) · 4.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/env python3
"""Add Anki cloze deletions of increasing size to a text.
Reads from a file or stdin and writes to stdout by default.
"""
import argparse
import logging
import sys
logging.basicConfig(level=logging.INFO)
CHUNK_MULTIPLIER = 2
INITIAL_CHUNK_SIZE = 2
def _strip_trailing_empty_lines(lines):
while lines and not lines[-1]:
lines.pop()
return lines
def read_lines_from_file(filename):
with open(filename, "r") as file:
lines = [l.strip() for l in file]
return _strip_trailing_empty_lines(lines)
def read_lines_from_stdin():
lines = [l.strip() for l in sys.stdin]
return _strip_trailing_empty_lines(lines)
def start_cloze(line, cloze_index):
return "{{c" + str(cloze_index) + "::" + line
def finish_cloze(line):
return line + "}}"
def encloze(line, cloze_index):
return finish_cloze(start_cloze(line, cloze_index))
def create_nested_cloze_chunks(lines, index_offset=0):
"""Add nested cloze deletions in-place to ``lines``.
Partially overlapping cloze deletions are not supported, so bigger chunks
must contain smaller chunks. This is guaranteed if bigger chunk sizes are
multiples of smaller chunk sizes.
See "There is no support for partial overlaps" in
https://docs.ankiweb.net/editing.html?highlight=cloze#cloze-deletion.
"""
cloze_index = 1 + index_offset
added_chunks = set()
# Distinguishing case chunk_size == 1 to skip empty lines. We assume
# there's at most one consecutive empty line.
for i in range(len(lines)):
if not lines[i]:
continue
lines[i] = encloze(lines[i], cloze_index)
added_chunks.add((i, i))
cloze_index += 1
chunk_size = 2
while chunk_size < len(lines):
for start in range(0, len(lines), chunk_size):
end = min(start + chunk_size, len(lines)) - 1
if (start, end) in added_chunks:
logging.debug(f"{start}, {end} already in.")
continue
logging.debug(f"{start}, {end}, {cloze_index} added.")
logging.debug(f"Begin: {lines[start]}")
logging.debug(f"End: {lines[end]}")
added_chunks.add((start, end))
lines[start] = start_cloze(lines[start], cloze_index)
lines[end] = finish_cloze(lines[end])
cloze_index += 1
chunk_size *= CHUNK_MULTIPLIER
# Ensure a final cloze covering the entire text exists
if lines and (0, len(lines) - 1) not in added_chunks:
added_chunks.add((0, len(lines) - 1))
lines[0] = start_cloze(lines[0], cloze_index)
lines[-1] = finish_cloze(lines[-1])
cloze_index += 1
def write_lines_to_file(filename, lines, overwrite=False):
mode = "w" if overwrite else "x"
try:
with open(filename, mode) as file:
file.write("\n".join(lines))
except FileExistsError as e:
logging.error("File already exists: %s", e)
def add_cloze_deletion(lines, index_offset=0):
create_nested_cloze_chunks(lines, index_offset=index_offset)
return lines
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=(
"Add cloze deletion tags to chunks of text in a file or stdin. "
"By default, writes the result to stdout."
)
)
parser.add_argument(
"filename",
nargs="?",
help="Input filename (or '-' / omitted to read from stdin).",
)
parser.add_argument(
"-f",
"--force",
action="store_true",
default=False,
help="Overwrite output file if it exists.",
)
parser.add_argument(
"-o",
"--offset",
type=int,
default=0,
help="Offset added to cloze index (start index = 1 + offset).",
)
parser.add_argument(
"-O",
"--output",
help="Output filename (default: write to stdout).",
)
args = parser.parse_args()
if args.filename is None or args.filename == "-":
input_lines = read_lines_from_stdin()
else:
input_lines = read_lines_from_file(args.filename)
result_lines = add_cloze_deletion(input_lines, index_offset=args.offset)
if args.output:
write_lines_to_file(args.output, result_lines, overwrite=args.force)
else:
sys.stdout.write("\n".join(result_lines))