Use feedback from live demo (#38)

fxchen · web-flow · commit ce8ad0e54241 · 2023-07-10T15:51:23.000-07:00
* Use feedback from live demo using kent_beck persona

* Update exception to be generic

* Make more defensive with filename validation and sanitization

* Update default persona, temperature

* Update magic strings
diff --git a/action_code_review.py b/action_code_review.py
@@ -2,60 +2,100 @@
 import os
 import sys
 import openai
+import string
 import re
+from typing import List
 
-def extract_filenames_from_diff(diff_text):
-  """
-  This function extracts filenames from git diff text using regular expressions.
-  
-  :param diff_text: str, git diff text
-  :return: list of str, list of filenames
-  """
+OPENAI_API_KEY = "OPENAI_API_KEY"
+VALID_CHARS = "-_.() %s%s" % (string.ascii_letters, string.digits)
+GIT_DIFF_REGEX_PATTERN = r'\+\+\+ b/(.*)'
+DEFAULT_MODEL = "gpt-3.5-turbo-16k"
+DEFAULT_STYLE = "concise"
+DEFAULT_PERSONA = "kent_beck"
+OPENAI_TEMPERATURE = 0.1
+OPENAI_MAX_TOKENS = 2048
+OPENAI_ERROR_NO_RESPONSE = "No response from OpenAI. Error:\n"
+OPENAI_ERROR_FAILED = "OpenAI failed to generate a review. Error:\n"
+
+# Make sure the necessary environment variables are set
+if OPENAI_API_KEY not in os.environ:
+  print(f"The {OPENAI_API_KEY} environment variable is not set.")
+  sys.exit(1)
+
+def validate_filename(filename: str) -> bool:
+    """
+    Validates a filename by checking for directory traversal and unusual characters.
+
+    Args:
+      filename: str, filename to be validated
+
+    Returns:
+      bool: True if the filename is valid, False otherwise
+    """
+    # Check for directory traversal
+    if ".." in filename or "/" in filename:
+        return False
 
-  # Pattern for lines that start with '+++ b/', which are lines that contain filenames
-  # of the "new version" files in the diff. The parentheses (.) create a group that
-  # captures the filename that follows '+++ b/'.
-  pattern = r'\+\+\+ b/(.*)'
+    # Check for unusual characters
+    for char in filename:
+        if char not in VALID_CHARS:
+            return False
 
-  # re.findall function finds all matches of the pattern in the diff_text
-  # and returns them as a list.
-  filenames = re.findall(pattern, diff_text)
+    return True
 
-  return filenames
+def extract_filenames_from_diff_text(diff_text: str) -> List[str]:
+  """
+  Extracts filenames from git diff text using regular expressions.
+
+  Args:
+    diff_text: str, git diff text
+
+  Returns:
+    List of filenames
+  """
+  filenames = re.findall(GIT_DIFF_REGEX_PATTERN, diff_text)
+  sanitized_filenames = [fn for fn in filenames if validate_filename(fn)]
+  return sanitized_filenames
 
-def format_file_contents(filenames):
+def format_file_contents_as_markdown(filenames: List[str]) -> str:
   """
-  This function iteratively goes through each filename and concatenates
+  Iteratively goes through each filename and concatenates
   the filename and its content in a specific markdown format.
 
-  :param filenames: list of str, list of filenames
-  :return: str, formatted string
+  Args:
+    filenames: List of filenames
+
+  Returns:
+    Formatted string
   """
-  files_str = ""
+  formatted_files = ""
   for filename in filenames:
-    with open(filename, 'r') as file:
-      content = file.read()
-    files_str += f"\n{filename}\n```\n{content}\n```\n"
-  return files_str
+    try:
+      with open(filename, 'r') as file:
+        file_content = file.read()
+      formatted_files += f"\n{filename}\n```\n{file_content}\n```\n"
+    except Exception as e:
+      print(f"Could not read file {filename}: {e}")
+  return formatted_files
 
 REQUEST = "Reply on how to improve the code (below). Think step-by-step. Give code examples of specific changes\n"
 
 STYLES = {
-"zen": "Format feedback in the style of a zen koan",
-"concise": "Format feedback concisely with numbered list"
+  "zen": "Format feedback in the style of a zen koan",
+  "concise": "Format feedback concisely with numbered list"
 }
 
 PERSONAS = {
-"developer": "You are an experienced software developer in a variety of programming languages and methodologies. You create efficient, scalable, and fault-tolerant solutions",
-"kent_beck": "You are Kent Beck. You are known for software design patterns, test-driven development (TDD), and agile methodologies",
-"marc_benioff": "You are Marc Benioff, internet entrepreneur and experienced software developer",
-"yoda": "You are Yoda, legendary Jedi Master. Speak like Yoda",
+  "developer": "You are an experienced software developer in a variety of programming languages and methodologies. You create efficient, scalable, and fault-tolerant solutions",
+  "kent_beck": "You are Kent Beck. You are known for software design patterns, test-driven development (TDD), and agile methodologies",
+  "marc_benioff": "You are Marc Benioff, internet entrepreneur and experienced software developer",
+  "yoda": "You are Yoda, legendary Jedi Master. Speak like Yoda",
 }
 
-openai.api_key = os.environ["OPENAI_API_KEY"]
-model = os.environ.get("MODEL", "gpt-3.5-turbo-16k")
-persona = PERSONAS.get(os.environ.get("PERSONA"), PERSONAS["developer"])
-style = STYLES.get(os.environ.get("STYLE"), STYLES["concise"])
+openai.api_key = os.environ[OPENAI_API_KEY]
+model = os.environ.get("MODEL", DEFAULT_MODEL)
+persona = PERSONAS.get(os.environ.get("PERSONA"), PERSONAS[DEFAULT_PERSONA])
+style = STYLES.get(os.environ.get("STYLE"), STYLES[DEFAULT_STYLE])
 include_files = os.environ.get("INCLUDE_FILES", "false") == "true"
 
 # Read in the diff
@@ -64,27 +104,29 @@ def format_file_contents(filenames):
 prompt = f"{persona}.{style}.{REQUEST}\n{diff}"
 
 kwargs = {'model': model}
-kwargs['temperature'] = 0.5
-kwargs['max_tokens'] = 2048
+kwargs['temperature'] = OPENAI_TEMPERATURE
+kwargs['max_tokens'] = OPENAI_MAX_TOKENS
 kwargs['messages']=[{"role": "system", "content": prompt}]
 
 # Optionally include files from the diff
 if include_files:
-  filenames = extract_filenames_from_diff(diff)
-  formatted_str = format_file_contents(filenames)
-  new_message = {"role": "user", "content": formatted_str}
+  filenames = extract_filenames_from_diff_text(diff)
+  formatted_files = format_file_contents_as_markdown(filenames)
+  new_message = {"role": "user", "content": formatted_files}
   kwargs['messages'].append(new_message)
-  
+
 try:
-  response  = openai.ChatCompletion.create(**kwargs)
+  response = openai.ChatCompletion.create(**kwargs)
   if response.choices:
     if 'text' in response.choices[0]:
       review_text = response.choices[0].text.strip()
     else:
       review_text = response.choices[0].message.content.strip()
   else:
-    review_text = f"No response from OpenAI\n{response.text}"
+    review_text = OPENAI_ERROR_NO_RESPONSE + response.text
+    sys.exit(1)
 except Exception as e:
-  review_text = f"OpenAI failed to generate a review: {e}"
+  review_text = OPENAI_ERROR_FAILED + str(e)
+  sys.exit(1)
 
 print(f"{review_text}")