diff --git a/application/utils/external_project_parsers/parsers/misc_tools_parser.py b/application/utils/external_project_parsers/parsers/misc_tools_parser.py index e4333b5fe..1a750240a 100644 --- a/application/utils/external_project_parsers/parsers/misc_tools_parser.py +++ b/application/utils/external_project_parsers/parsers/misc_tools_parser.py @@ -3,8 +3,7 @@ import os import re import urllib -from typing import List, NamedTuple -from xmlrpc.client import boolean +from typing import List from application.database import db from application.defs import cre_defs as defs @@ -22,7 +21,7 @@ class MiscTools(ParserInterface): - name = "miscelaneous tools" + name = "miscellaneous tools" tool_urls = [ "https://github.com/commjoen/wrongsecrets.git", ] @@ -33,23 +32,30 @@ def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): tools = {} for url in self.tool_urls: tool_entries = self.parse_tool(cache=cache, tool_repo=url) - tools[tool_entries[0].name] = tool_entries + if tool_entries: + tools[tool_entries[0].name] = tool_entries return ParseResult(results=tools) def parse_tool( - self, tool_repo: str, cache: db.Node_collection, dry_run: boolean = False - ): - if not dry_run: - repo = git.clone(tool_repo) + self, tool_repo: str, cache: db.Node_collection, dry_run: bool = False + ) -> List[defs.Tool]: + if dry_run: + logger.info("dry run, skipping clone and parsing for %s", tool_repo) + return [] + repo = git.clone(tool_repo) readme = os.path.join(repo.working_dir, "README.md") title_regexp = r"# (?P(\w+ ?)+)" cre_link = r".*\[.*\]\((?P<url>(https\:\/\/www\.)?opencre\.org\/cre\/(?P<cre>\d+-\d+).*)" - tool_entries = [] + tool_entries: List[defs.Tool] = [] with open(readme) as rdf: mdtext = rdf.read() if "opencre.org" not in mdtext: - logging.error("didn't find a link, bye") + logger.error( + "no opencre.org link found in %s for repo %s, skipping", + readme, + tool_repo, + ) return [] title = re.search(title_regexp, mdtext) cre = re.search(cre_link, mdtext, flags=re.IGNORECASE) @@ -86,7 +92,7 @@ def parse_tool( document=dbcre, ) ) - print( + logger.info( f"Registered new Document of type:Tool, toolType: {tool_type}, name:{tool_name} and hyperlink:{hyperlink}," f"linked to cre:{dbcre.id}" )