forked from ngrayluna/generate-wandb-python-reference
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprocess_sdk_markdown.py
More file actions
executable file
·105 lines (89 loc) · 4.45 KB
/
process_sdk_markdown.py
File metadata and controls
executable file
·105 lines (89 loc) · 4.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env python
"""
Enhanced script to remove entire classes, methods, functions, optionally `__init__`
methods, **and individual attribute bullets** flagged with
<!-- lazydoc-ignore-class-attributes -->
from lazydocs‑generated markdown.
"""
import os
import re
import argparse
import glob
from typing import List, Tuple
class MarkdownCleaner:
# ------------------------------------------------------------------ #
def __init__(self):
# 1) simple one‑off replacements
self.patterns: List[Tuple[re.Pattern, str]] = [
(re.compile(r'<a\b[^>]*>(.*?)</a>', re.DOTALL), r'\1'),
(re.compile(r'(# <kbd>module</kbd> `[\w\.]+)\.[\w]+`'), r'\1`'),
(re.compile(
r"\*\*Global Variables\*\*\n[-]+\n(?:(?!## |# <kbd>)[\s\S])*\n",
re.MULTILINE
), ""),
(re.compile(r'<b>(.*?)</b>'), r'\1'),
(re.compile(
r'---\n+_This file was automatically generated via '
r'\[lazydocs\]\([^)]+\)._\n*'
), ""),
(re.compile(r'####\s*'), r'### '),
]
# 2) existing large‑block patterns
self.block_pattern = re.compile(
r"(?s)(## <kbd>class</kbd> `.*?`|"
r"### <kbd>(?:method|function)</kbd> `.*?`\n\n```python\n.*?\n```\n\n.*?|"
r"### <kbd>property</kbd> .*?\n\n.*?)(?=\n## |\n### |\Z)"
)
self.class_pattern = re.compile(r"(?s)## <kbd>class</kbd> `.*?`.*?(?=\n## <kbd>class</kbd>|$)")
self.function_pattern = re.compile(r"(?s)## <kbd>function</kbd> `.*?`\n\n```python\n.*?\n```\n\n.*?(?=\n## |\Z)")
self.init_pattern = re.compile(
r"(?s)<!-- lazydoc-ignore-init: internal -->\s*"
r"### <kbd>method</kbd> `.*?__init__.*?`\n\n```python\n.*?\n```\n\n.*?(?=\n## |\n### |\Z)"
)
self.classmethod_pattern = re.compile(
r"(?s)### <kbd>classmethod</kbd> `.*?`\n\n```python\n.*?\n```\n\n.*?(?=\n## |\n### |\Z)"
)
# 3) what a single attribute bullet‑block looks like
self.attr_block_pattern = re.compile(
r"(?sm)^( {0,3}- .*?)" # top‑level bullet start …
r"(?=\n {0,3}- |\n## |\n### |\Z)" # … up to next bullet/header/EOF
)
# ------------------------------------------------------------------ #
def clean_text(self, markdown_text: str) -> str:
cleaned = markdown_text
# -- simple substitutions
for pat, repl in self.patterns:
cleaned = pat.sub(repl, cleaned)
# -- your original ignore markers
cleaned = self._remove_ignored_blocks(cleaned, "<!-- lazydoc-ignore: internal -->", self.block_pattern)
cleaned = self._remove_ignored_blocks(cleaned, "<!-- lazydoc-ignore-class: internal -->", self.class_pattern)
cleaned = self._remove_ignored_blocks(cleaned, "<!-- lazydoc-ignore-function: internal -->", self.function_pattern)
cleaned = self._remove_ignored_blocks(cleaned, "<!-- lazydoc-ignore-classmethod: internal -->", self.classmethod_pattern)
cleaned = self.init_pattern.sub("", cleaned)
# -- NEW: attribute bullets flagged with the inline literal
cleaned = self._remove_ignored_blocks(
cleaned,
"<!-- lazydoc-ignore-class-attributes -->",
self.attr_block_pattern
)
return cleaned
# ------------------------------------------------------------------ #
def _remove_ignored_blocks(self, text: str, token: str, pattern: re.Pattern) -> str:
"""Drop any regex‑matched block that contains the given token."""
def keep_or_drop(match: re.Match) -> str:
return "" if token in match.group(0) else match.group(0)
return pattern.sub(keep_or_drop, text)
# ----------------------------------------------------------------------#
def process_text(markdown_text: str) -> str:
return MarkdownCleaner().clean_text(markdown_text)
def main(args):
for filename in glob.glob(os.path.join(os.getcwd(), args.output_directory, "*.md")):
with open(filename, "r") as f:
text = f.read()
with open(filename, "w") as f:
f.write(process_text(text))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Post‑process lazydocs markdown.")
parser.add_argument("--output_directory", default="wandb_sdk_docs",
help="Directory containing markdown files to process")
main(parser.parse_args())