Skip to content

Commit 30e96de

Browse files
authored
Merge pull request #17 from FontysVenlo/16-bug-unknown-extension-are-not-included-in-output
feat: add option for unknown file extension
2 parents 09fe364 + bac485c commit 30e96de

6 files changed

Lines changed: 101 additions & 13 deletions

File tree

.github/workflows/test.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@ jobs:
1515
strategy:
1616
fail-fast: false
1717
matrix:
18-
python-version: ["3.8", "3.9", "3.10", "3.11"]
18+
python-version: ["3.9", "3.10", "3.11", "3.12"]
1919
os: ["ubuntu-latest", "windows-latest", "macos-latest"]
2020
runs-on: ${{ matrix.os }}
2121
steps:
22-
- uses: actions/checkout@v3
22+
- uses: actions/checkout@v4
2323
- name: Set up Python ${{ matrix.python-version }}
2424
uses: actions/setup-python@v4
2525
with:
@@ -32,7 +32,7 @@ jobs:
3232
installer-parallel: true
3333
- name: Load cached venv
3434
id: cached-poetry-venv
35-
uses: actions/cache@v3
35+
uses: actions/cache@v4
3636
with:
3737
path: .venv
3838
key: venv-${{ matrix.python-version }}-${{ runner.os }}-${{ hashFiles('poetry.lock') }}
@@ -54,7 +54,7 @@ jobs:
5454
coverage:
5555
strategy:
5656
matrix:
57-
python-version: [ "3.8" ]
57+
python-version: [ "3.12" ]
5858
runs-on: "ubuntu-latest"
5959
steps:
6060
- uses: actions/checkout@v3
@@ -70,7 +70,7 @@ jobs:
7070
installer-parallel: true
7171
- name: Load cached venv
7272
id: cached-poetry-venv
73-
uses: actions/cache@v3
73+
uses: actions/cache@v4
7474
with:
7575
path: .venv
7676
key: venv-${{ matrix.python-version }}-{{ runner.os }}-${{ hashFiles('poetry.lock') }}
@@ -96,7 +96,7 @@ jobs:
9696
typecheck:
9797
strategy:
9898
matrix:
99-
python-version: [ "3.8" ]
99+
python-version: [ "3.12" ]
100100
runs-on: "ubuntu-latest"
101101
steps:
102102
- uses: actions/checkout@v3

codestripper/cli.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from codestripper.code_stripper import strip_files
66
from codestripper.utils import FileUtils, set_logger_level, get_working_directory
7+
from codestripper.utils.enums import UnexpectedInputOptions
78

89

910
def add_commandline_arguments(parser: argparse.ArgumentParser) -> None:
@@ -26,6 +27,10 @@ def add_commandline_arguments(parser: argparse.ArgumentParser) -> None:
2627
help="set the working directory for include/exclude", default=os.getcwd())
2728
parser.add_argument("-e", "--fail-on-error", action="store_false",
2829
help="Fail if an error occurs during code stripping")
30+
parser.add_argument("-b", "--binary", choices=list(UnexpectedInputOptions), default=UnexpectedInputOptions.FAIL,
31+
action="store", help="What to do if binary file is matched")
32+
parser.add_argument("-u", "--unknown", choices=list(UnexpectedInputOptions), default=UnexpectedInputOptions.FAIL,
33+
action="store", help="What to do if a file with unknown extension is matched")
2934

3035

3136
def main() -> None:

codestripper/code_stripper.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,14 @@
1010
from codestripper.tokenizer import Tokenizer
1111
from codestripper.utils import get_working_directory
1212
from codestripper.utils.comments import comments_mapping, Comment
13+
from codestripper.utils.enums import UnexpectedInputOptions
1314

1415
logger = logging.getLogger("codestripper")
1516

1617

1718
def strip_files(files: Iterable[str], working_directory: Union[str, None] = None, * ,comments: Optional[List[str]] = None,
18-
output: Union[Path, str] = "out", dry_run: bool = False, fail_on_error: bool = False) -> List[str]:
19+
output: Union[Path, str] = "out", dry_run: bool = False, fail_on_error: bool = False,
20+
binary: UnexpectedInputOptions = UnexpectedInputOptions.FAIL, unknown_extension: UnexpectedInputOptions = UnexpectedInputOptions.FAIL) -> List[str]:
1921

2022
if comments is not None:
2123
for comment in comments:
@@ -32,18 +34,41 @@ def strip_files(files: Iterable[str], working_directory: Union[str, None] = None
3234
stripped_files: List[str] = []
3335
has_errors: bool = False
3436
for file in files:
35-
with open(os.path.join(cwd, file), 'r') as handle:
36-
content = handle.read()
37+
try:
38+
with open(os.path.join(cwd, file), 'r') as handle:
39+
content = handle.read()
40+
except UnicodeDecodeError as e:
41+
if binary == UnexpectedInputOptions.FAIL:
42+
raise e
43+
elif binary == UnexpectedInputOptions.IGNORE:
44+
logger.info(f"Ignoring binary file: '{file}'")
45+
continue
46+
else:
47+
path = os.path.join(out, file)
48+
os.makedirs(os.path.dirname(path), exist_ok=True)
49+
shutil.copy2(os.path.join(cwd, file), path)
50+
stripped_files.append(file)
51+
continue
3752
if content is not None:
53+
stripped = ""
3854
try:
3955
_, file_extension = os.path.splitext(file)
4056
file_extension = file_extension.lower()
4157
if not file_extension in comments_mapping:
42-
logger.error(f"Unknown extension: '{file_extension}', "
58+
if unknown_extension == UnexpectedInputOptions.FAIL:
59+
logger.error(f"Unknown extension: '{file_extension}', "
4360
f"please specify which comment to use for this file extension.")
44-
continue
45-
com = comments_mapping[file_extension]
46-
stripped = CodeStripper(content, com).strip()
61+
has_errors = True
62+
break
63+
elif unknown_extension == UnexpectedInputOptions.IGNORE:
64+
logger.info(f"Unknown extension: '{file_extension}' ignored")
65+
continue
66+
else:
67+
# Keep the complete content
68+
stripped = content
69+
else:
70+
com = comments_mapping[file_extension]
71+
stripped = CodeStripper(content, com).strip()
4772
except IgnoreFileError:
4873
logger.info(f"File '{file}' is ignored, because of ignore tag")
4974
continue

codestripper/utils/enums.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import enum
2+
3+
4+
class UnexpectedInputOptions(enum.Enum):
5+
FAIL = "fail",
6+
IGNORE = "ignore",
7+
INCLUDE = "include"

tests/test_codestripper.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,41 @@
11
import logging
22
import os.path
33
import re
4+
import shutil
45
from pathlib import Path
56

67
import pytest
78
from _pytest.logging import LogCaptureFixture
89

910
from codestripper.code_stripper import strip_files
1011
from codestripper.utils import FileUtils
12+
from codestripper.utils.enums import UnexpectedInputOptions
1113

1214
test_project_dir = os.path.join(Path(__file__).parent.absolute())
1315

1416

17+
def test_project_with_unknown_extension_fail(monkeypatch: pytest.MonkeyPatch):
18+
monkeypatch.chdir(test_project_dir)
19+
files = FileUtils(["**/*.java", "pom.xml", "**/*.test"], working_directory="testproject").get_matching_files()
20+
21+
with pytest.raises(Exception):
22+
strip_files(files, "testproject", output="out",unknown_extension=UnexpectedInputOptions.FAIL, fail_on_error=True)
23+
24+
25+
def test_project_with_unknown_extension_ignore(monkeypatch: pytest.MonkeyPatch):
26+
monkeypatch.chdir(test_project_dir)
27+
files = FileUtils(["**/*.java", "pom.xml", "test.test"], working_directory="testproject").get_matching_files()
28+
stripped = strip_files(files, "testproject", output="out", unknown_extension=UnexpectedInputOptions.IGNORE)
29+
assert "test.test" not in stripped
30+
31+
32+
def test_project_with_unknown_extension_include(monkeypatch: pytest.MonkeyPatch):
33+
monkeypatch.chdir(test_project_dir)
34+
files = FileUtils(["**/*.java", "pom.xml", "test.test"], working_directory="testproject").get_matching_files()
35+
stripped = strip_files(files, "testproject", output="out", unknown_extension=UnexpectedInputOptions.INCLUDE)
36+
assert "test.test" in stripped
37+
38+
1539
def test_project(monkeypatch: pytest.MonkeyPatch, caplog: LogCaptureFixture):
1640
monkeypatch.chdir(test_project_dir)
1741
with caplog.at_level(logging.INFO, logger='codestripper'):
@@ -132,3 +156,30 @@ def test_non_fail_on_error(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCa
132156
strip_files(files, "files", output="out", fail_on_error=False)
133157
errors = [rec.message for rec in caplog.records]
134158
assert len(errors) == 4
159+
160+
161+
def test_project_with_binary_fail(monkeypatch: pytest.MonkeyPatch):
162+
monkeypatch.chdir(test_project_dir)
163+
shutil.rmtree("out", ignore_errors=True)
164+
files = FileUtils(["**/*.java", "pom.xml", "test.jpg"], working_directory="testproject").get_matching_files()
165+
166+
with pytest.raises(Exception):
167+
strip_files(files, "testproject", output="out",binary=UnexpectedInputOptions.FAIL, fail_on_error=True)
168+
169+
170+
def test_project_with_binary_ignore(monkeypatch: pytest.MonkeyPatch):
171+
monkeypatch.chdir(test_project_dir)
172+
shutil.rmtree("out", ignore_errors=True)
173+
files = FileUtils(["**/*.java", "pom.xml", "test.jpg"], working_directory="testproject").get_matching_files()
174+
175+
stripped = strip_files(files, "testproject", output="out",binary=UnexpectedInputOptions.IGNORE)
176+
assert "test.jpg" not in stripped
177+
178+
179+
def test_project_with_binary_include(monkeypatch: pytest.MonkeyPatch):
180+
monkeypatch.chdir(test_project_dir)
181+
shutil.rmtree("out", ignore_errors=True)
182+
files = FileUtils(["**/*.java", "pom.xml", "test.jpg"], working_directory="testproject").get_matching_files()
183+
184+
stripped = strip_files(files, "testproject", output="out",binary=UnexpectedInputOptions.INCLUDE)
185+
assert "test.jpg" in stripped

tests/testproject/test.jpg

206 KB
Loading

0 commit comments

Comments
 (0)