jeanralphaviles · d-huck · Mar 15, 2023 · Mar 16, 2023 · Mar 16, 2023
diff --git a/comment_parser/comment_parser.py b/comment_parser/comment_parser.py
@@ -33,6 +33,7 @@
 from comment_parser.parsers import python_parser
 from comment_parser.parsers import ruby_parser
 from comment_parser.parsers import shell_parser
+from comment_parser.parsers import lisp_parser
 
 MIME_MAP = {
     'application/javascript': js_parser,  # Javascript
@@ -48,6 +49,10 @@
     'text/x-script.python': python_parser,  # Python
     'text/x-shellscript': shell_parser,  # Unix shell
     'text/xml': html_parser,  # XML
+    'text/x-lisp': lisp_parser,  # Lisp
+    'text/x-clojure': lisp_parser,  # Clojure
+    'text/x-racket': lisp_parser,  # Racket
+    'text/x-scheme': lisp_parser,  # Scheme
 }
 
 

diff --git a/comment_parser/parsers/lisp_parser.py b/comment_parser/parsers/lisp_parser.py
@@ -0,0 +1,59 @@
+#!/usr/bin/python
+"""This module provides support for parsing the Lisp family of languages
+
+Works with:
+  Lisp
+  Scheme
+  Racket
+  Clojure (not including the (comment) form
+  ... and other languages which use the leading ; as the comment form
+"""
+
+import re
+from bisect import bisect_left
+from typing import List 
+from comment_parser.parsers import common 
+
+def extract_comments(code: str) -> List[common.Comment]:
+  """Extracts a list of comments from a given Lisp family source code.
+
+  Comments are represented with the Comment class found in the common module.
+  Lisp family comments come in a single form. Any string of characters begun with
+  `;` it is considered to be a comment. Note that various languages in the lisp 
+  family use multiple `;` to denote certain types of comments. For example, a 
+  comment using a single `;` may just mean an inline comment, but two (`;;`) or 
+  more `;`'s may be considered official documentation. This parser does not 
+  differentiate between the various types of comments, but will consume many `;`
+  characters and return the comment text
+
+  Args:
+    code (str): String containing code to extract comments from.
+  Returns:
+    List[common.Comment]: list of comments in the order that they appear in the 
+      code
+  """
+  pattern = r"""
+    (?P<literal> (\"([^\"\n])*\")+) |
+    (?P<single> ;+(?P<single_content>.*)?$)
+  """
+
+  compiled = re.compile(pattern, re.VERBOSE | re.MULTILINE)
+
+  lines_indexes = []
+  for match in re.finditer(r"$", code, re.M):
+    lines_indexes.append(match.start())
+
+  comments = []
+
+  for match in compiled.finditer(code):
+    kind = match.lastgroup 
+
+    start_character = match.start()
+    line_no = bisect_left(lines_indexes, start_character)
+
+    if kind == "single":
+      comment_content = match.group("single_content")
+      comment = common.Comment(comment_content, line_no + 1)
+      comments.append(comment)
+
+  return comments
diff --git a/comment_parser/parsers/tests/lisp_parser_test.py b/comment_parser/parsers/tests/lisp_parser_test.py
@@ -0,0 +1,37 @@
+#!/usr/bin/python
+"""Tests for comment_parser.parsers.lisp_parser.py"""
+
+import unittest
+from comment_parser.parsers import common
+from comment_parser.parsers import lisp_parser
+
+class LispParerTest(unittest.TestCase):
+
+  def testSimpleMain(self):
+    code = "; this is a comment\n(format t \"Hello, World!\")"
+    comments = lisp_parser.extract_comments(code)
+    expected = [common.Comment(code[1:19], 1, False)]
+    self.assertEqual(comments, expected)
+
+  def testSingleLineComment(self):
+    code = "; single line comment"
+    comments = lisp_parser.extract_comments(code)
+    expected = [common.Comment(code[1:], 1, False)]
+    self.assertEqual(comments, expected)
+
+  def testSingleLineCommentInStringLiteral(self):
+    code = '(format t "; this is not a comment")'
+    comments = lisp_parser.extract_comments(code)
+    self.assertEqual(comments, [])
+
+  def testMultipleCommentCharacters(self):
+    code = ';; this is a comment'
+    comments = lisp_parser.extract_comments(code)
+    expected = [common.Comment(code[2:], 1, False)]
+    self.assertEqual(comments, expected)
+
+  def testCommentsAfterLine(self):
+    code = '(t format "Hello World") ; this is a comment'
+    comments = lisp_parser.extract_comments(code)
+    expected = [common.Comment(' this is a comment', 1, False)]
+    self.assertEqual(comments, expected)