SCE-Development · vineeshah · Mar 23, 2026
diff --git a/review.py b/review.py
@@ -30,7 +30,7 @@
 }
 
 # Max number of comments to post (safety limit)
-MAX_COMMENTS_PER_PR = 50
+MAX_COMMENTS_PER_PR = 5
 
 # GitHub API rate limit retry settings
 MAX_RETRIES = 3
@@ -160,9 +160,10 @@ def process_review(repo: str, pr_number: int, installation_id: int):
     # Review each file
     # -------------------------
 
-    comments = []
+    all_file_comments = []  # Store comments with file context
     client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
 
+    # First pass: Review each file individually and collect all potential issues
     for file in filtered_files:
         filename = file["filename"]
 
@@ -189,12 +190,19 @@ def process_review(repo: str, pr_number: int, installation_id: int):
   "comments": [
     {{
       "line": <line number>,
-      "message": "<review comment>"
+      "message": "<review comment>",
+      "severity": "critical" | "high" | "medium" | "low"
     }}
   ]
 }}
 
-If there are no issues, return:
+Severity guidelines:
+- critical: security vulnerabilities, data loss, crashes, broken functionality
+- high: significant bugs, performance issues, major code smells
+- medium: moderate issues, improvements needed
+- low: minor style issues, suggestions, nitpicks
+
+Only return issues that are truly worth commenting on. If there are no issues, return:
 
 {{ "comments": [] }}
 """
@@ -213,15 +221,100 @@ def process_review(repo: str, pr_number: int, installation_id: int):
             data = json.loads(result)
 
             for c in data["comments"]:
-                comments.append({
+                all_file_comments.append({
                     "path": filename,
                     "line": c["line"],
-                    "body": c["message"]
+                    "body": c["message"],
+                    "severity": c.get("severity", "medium")
                 })
 
         except Exception as e:
             print(f"LLM review failed for {filename}: {e}")
 
+    # Second pass: Consolidate and prioritize all comments (max 5 total)
+    print(f"\nCollected {len(all_file_comments)} potential issues. Consolidating to max {MAX_COMMENTS_PER_PR}...")
+
+    comments = []
+    if all_file_comments:
+        # Build a summary of all issues for prioritization
+        issues_summary = ""
+        for idx, comment in enumerate(all_file_comments, 1):
+            issues_summary += f"{idx}. [{comment['severity'].upper()}] {comment['path']}:{comment['line']} - {comment['body'][:100]}...\n"
+
+        consolidation_prompt = f"""
+You are a senior software engineer conducting a final PR review.
+
+The following potential issues were identified across all files:
+
+{issues_summary}
+
+Select at most {MAX_COMMENTS_PER_PR} of the MOST CRITICAL issues to actually comment on.
+
+Rules:
+- Prioritize critical and high severity issues first
+- Skip low-severity issues (style, nitpicks) unless there's something really important
+- If multiple issues are related, consider if they can be combined into a single comment
+- Be conservative - it's better to have fewer, more impactful comments than many minor ones
+
+Return JSON in this format:
+
+{{
+  "selected_issues": [
+    {{
+      "original_index": <index from list above>,
+      "comment": {{
+        "line": <line number>,
+        "message": "<final review comment - refine if needed for clarity>"
+      }}
+    }}
+  ],
+  "summary_comment": "<optional single summary comment if you want to add general feedback instead of or in addition to specific line comments>"
+}}
+
+If no issues are worth commenting on, return: {{ "selected_issues": [], "summary_comment": "" }}
+"""
+
+        try:
+            response = client.chat.completions.create(
+                model="gpt-4.1-mini",
+                messages=[
+                    {"role": "system", "content": "You are a senior software engineer with extreme technical expertise."},
+                    {"role": "user", "content": consolidation_prompt}
+                ],
+                temperature=0
+            )
+
+            result = response.choices[0].message.content
+            data = json.loads(result)
+
+            # Add selected specific comments
+            for selected in data["selected_issues"][:MAX_COMMENTS_PER_PR]:
+                idx = selected["original_index"] - 1
+                if 0 <= idx < len(all_file_comments):
+                    original = all_file_comments[idx]
+                    comments.append({
+                        "path": original["path"],
+                        "line": selected["comment"]["line"],
+                        "body": selected["comment"]["message"]
+                    })
+
+            # Optionally add summary comment if present and we have room
+            if data.get("summary_comment") and len(comments) < MAX_COMMENTS_PER_PR:
+                # Summary comments are posted as general PR comments (no line number)
+                comments.append({
+                    "path": None,  # Indicates general PR comment
+                    "line": None,
+                    "body": data["summary_comment"]
+                })
+
+        except Exception as e:
+            print(f"Consolidation failed, falling back to all comments: {e}")
+            # Fallback: just use all comments but respect the limit
+            comments = [
+                {"path": c["path"], "line": c["line"], "body": c["body"]}
+                for c in all_file_comments[:MAX_COMMENTS_PER_PR]
+            ]
+
     # -------------------------
     # Get commit SHA
     # -------------------------
@@ -239,20 +332,26 @@ def process_review(repo: str, pr_number: int, installation_id: int):
     # -------------------------
 
     for comment in comments:
-        print(f"Comment on {comment['path']}:{comment['line']}: {comment['body']}")
-
-        comment_url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/comments"
-        payload = {
-            "body": comment["body"],
-            "commit_id": commit_id,
-            "path": comment["path"],
-            "line": comment["line"]
-        }
+        if comment.get("path") is None:
+            # Post as general PR comment (issue comment)
+            print(f"Posting summary comment: {comment['body'][:50]}...")
+            comment_url = f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments"
+            payload = {"body": comment["body"]}
+        else:
+            # Post as line-specific review comment
+            print(f"Comment on {comment['path']}:{comment['line']}: {comment['body'][:50]}...")
+            comment_url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/comments"
+            payload = {
+                "body": comment["body"],
+                "commit_id": commit_id,
+                "path": comment["path"],
+                "line": comment["line"]
+            }
 
         try:
             response = requests.post(comment_url, headers=headers, json=payload)
             response.raise_for_status()
         except Exception as e:
             print(f"Failed to post comment: {e}")
 
-    print(f"Review complete: {len(comments)} comments posted.")
+    print(f"Review complete: {len(comments)} comment(s) posted.")