Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions comment_parser/comment_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from comment_parser.parsers import python_parser
from comment_parser.parsers import ruby_parser
from comment_parser.parsers import shell_parser
from comment_parser.parsers import lisp_parser

MIME_MAP = {
'application/javascript': js_parser, # Javascript
Expand All @@ -48,6 +49,10 @@
'text/x-script.python': python_parser, # Python
'text/x-shellscript': shell_parser, # Unix shell
'text/xml': html_parser, # XML
'text/x-lisp': lisp_parser, # Lisp
'text/x-clojure': lisp_parser, # Clojure
'text/x-racket': lisp_parser, # Racket
'text/x-scheme': lisp_parser, # Scheme
}


Expand Down
59 changes: 59 additions & 0 deletions comment_parser/parsers/lisp_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/python
"""This module provides support for parsing the Lisp family of languages

Works with:
Lisp
Scheme
Racket
Clojure (not including the (comment) form
... and other languages which use the leading ; as the comment form
"""

import re
from bisect import bisect_left
from typing import List
from comment_parser.parsers import common

def extract_comments(code: str) -> List[common.Comment]:
"""Extracts a list of comments from a given Lisp family source code.

Comments are represented with the Comment class found in the common module.
Lisp family comments come in a single form. Any string of characters begun with
`;` it is considered to be a comment. Note that various languages in the lisp
family use multiple `;` to denote certain types of comments. For example, a
comment using a single `;` may just mean an inline comment, but two (`;;`) or
more `;`'s may be considered official documentation. This parser does not
differentiate between the various types of comments, but will consume many `;`
characters and return the comment text

Args:
code (str): String containing code to extract comments from.
Returns:
List[common.Comment]: list of comments in the order that they appear in the
code
"""
pattern = r"""
(?P<literal> (\"([^\"\n])*\")+) |
(?P<single> ;+(?P<single_content>.*)?$)
"""

compiled = re.compile(pattern, re.VERBOSE | re.MULTILINE)

lines_indexes = []
for match in re.finditer(r"$", code, re.M):
lines_indexes.append(match.start())

comments = []

for match in compiled.finditer(code):
kind = match.lastgroup

start_character = match.start()
line_no = bisect_left(lines_indexes, start_character)

if kind == "single":
comment_content = match.group("single_content")
comment = common.Comment(comment_content, line_no + 1)
comments.append(comment)

return comments
37 changes: 37 additions & 0 deletions comment_parser/parsers/tests/lisp_parser_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/python
"""Tests for comment_parser.parsers.lisp_parser.py"""

import unittest
from comment_parser.parsers import common
from comment_parser.parsers import lisp_parser

class LispParerTest(unittest.TestCase):

def testSimpleMain(self):
code = "; this is a comment\n(format t \"Hello, World!\")"
comments = lisp_parser.extract_comments(code)
expected = [common.Comment(code[1:19], 1, False)]
self.assertEqual(comments, expected)

def testSingleLineComment(self):
code = "; single line comment"
comments = lisp_parser.extract_comments(code)
expected = [common.Comment(code[1:], 1, False)]
self.assertEqual(comments, expected)

def testSingleLineCommentInStringLiteral(self):
code = '(format t "; this is not a comment")'
comments = lisp_parser.extract_comments(code)
self.assertEqual(comments, [])

def testMultipleCommentCharacters(self):
code = ';; this is a comment'
comments = lisp_parser.extract_comments(code)
expected = [common.Comment(code[2:], 1, False)]
self.assertEqual(comments, expected)

def testCommentsAfterLine(self):
code = '(t format "Hello World") ; this is a comment'
comments = lisp_parser.extract_comments(code)
expected = [common.Comment(' this is a comment', 1, False)]
self.assertEqual(comments, expected)