Skip to content

Commit 7d1824c

Browse files
authored
Merge pull request #34 from naaa760/fix/pr-creation-404-and-analysis-reliability
fix: repository analysis caching and PR creation 404 issues
2 parents ed6e38f + 45c7ff3 commit 7d1824c

4 files changed

Lines changed: 164 additions & 34 deletions

File tree

src/agents/repository_analysis_agent/agent.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ async def execute(self, **kwargs) -> AgentResult:
3838
await analyze_pr_history(state, request.max_prs)
3939
await analyze_contributing_guidelines(state)
4040

41+
# Only generate recommendations if we have basic repository data
42+
if not state.repository_features.language:
43+
raise ValueError("Unable to determine repository language - cannot generate appropriate rules")
44+
4145
state.recommendations = _default_recommendations(state)
4246
validate_recommendations(state)
4347
response = summarize_analysis(state, request)

src/agents/repository_analysis_agent/nodes.py

Lines changed: 67 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from __future__ import annotations
1010

11+
import logging
1112
import textwrap
1213
from typing import Any
1314

@@ -32,6 +33,9 @@ async def analyze_repository_structure(state: RepositoryAnalysisState) -> None:
3233
installation_id = state.installation_id
3334

3435
repo_data = await github_client.get_repository(repo, installation_id=installation_id)
36+
if not repo_data:
37+
raise ValueError(f"Could not fetch repository data for {repo}")
38+
3539
workflows = await github_client.list_directory_any_auth(
3640
repo_full_name=repo, path=".github/workflows", installation_id=installation_id
3741
)
@@ -42,7 +46,7 @@ async def analyze_repository_structure(state: RepositoryAnalysisState) -> None:
4246
has_codeowners=bool(await github_client.get_file_content(repo, ".github/CODEOWNERS", installation_id)),
4347
has_workflows=bool(workflows),
4448
workflow_count=len(workflows or []),
45-
language=(repo_data or {}).get("language"),
49+
language=repo_data.get("language"),
4650
contributor_count=len(contributors),
4751
pr_count=0,
4852
)
@@ -54,8 +58,14 @@ async def analyze_pr_history(state: RepositoryAnalysisState, max_prs: int) -> No
5458
installation_id = state.installation_id
5559
prs = await github_client.list_pull_requests(repo, installation_id=installation_id, state="all", per_page=max_prs)
5660

61+
if prs is None:
62+
# If PR listing fails, continue with empty samples rather than failing
63+
state.pr_samples = []
64+
state.repository_features.pr_count = 0
65+
return
66+
5767
samples: list[PullRequestSample] = []
58-
for pr in prs or []:
68+
for pr in prs:
5969
samples.append(
6070
PullRequestSample(
6171
number=pr.get("number", 0),
@@ -215,19 +225,28 @@ def _default_recommendations(
215225
216226
Currently, validators like `author_team_is` and `file_patterns` operate independently.
217227
"""
228+
logger = logging.getLogger(__name__)
229+
218230
recommendations: list[RuleRecommendation] = []
219231

220232
# Get language-specific patterns based on repository analysis
221-
source_patterns, test_patterns = _get_language_specific_patterns(state.repository_features.language)
233+
language = state.repository_features.language
234+
source_patterns, test_patterns = _get_language_specific_patterns(language)
235+
236+
logger.info(
237+
f"Generating recommendations for {state.repository_full_name}: language={language}, pr_count={state.repository_features.pr_count}"
238+
)
222239

223240
# Analyze PR history for bad habits
224241
pr_issues = _analyze_pr_bad_habits(state)
225242

226243
# Require tests when source code changes.
227244
# This is especially important if we detect missing tests in PR history
228-
test_reasoning = f"Default guardrail for code changes without tests. Patterns adapted for {state.repository_features.language or 'multi-language'} repository."
245+
test_reasoning = f"Repository analysis for {state.repository_full_name}. Language: {language or 'unknown'}. Patterns adapted for {language or 'multi-language'} repository."
229246
if pr_issues.get("missing_tests", 0) > 0:
230247
test_reasoning += f" Detected {pr_issues['missing_tests']} recent PRs without test files."
248+
if state.contributing_analysis.content and state.contributing_analysis.requires_tests:
249+
test_reasoning += " Contributing guidelines explicitly require tests."
231250

232251
# Build YAML rule with proper indentation
233252
# parameters: is at column 0, source_patterns: at column 2, list items at column 4
@@ -239,55 +258,84 @@ def _default_recommendations(
239258
severity: medium
240259
event_types:
241260
- pull_request
242-
parameters:
261+
parameters:
243262
source_patterns:
244263
{source_patterns_yaml}
245264
test_patterns:
246265
{test_patterns_yaml}
247266
"""
248267

268+
confidence = 0.74
269+
if pr_issues.get("missing_tests", 0) > 0:
270+
confidence = 0.85
271+
if state.contributing_analysis.content and state.contributing_analysis.requires_tests:
272+
confidence = min(0.95, confidence + 0.1)
273+
249274
recommendations.append(
250275
RuleRecommendation(
251276
yaml_rule=yaml_content.strip(),
252-
confidence=0.74 if pr_issues.get("missing_tests", 0) == 0 else 0.85,
277+
confidence=confidence,
253278
reasoning=test_reasoning,
254279
strategy_used="hybrid",
255280
)
256281
)
257282

258283
# Require description in PR body.
259284
# Increase confidence if we detect short titles in PR history (indicator of missing context)
260-
desc_reasoning = "Encourage context for reviewers; lightweight default."
285+
desc_reasoning = f"Repository analysis for {state.repository_full_name}."
261286
if pr_issues.get("short_titles", 0) > 0:
262287
desc_reasoning += f" Detected {pr_issues['short_titles']} PRs with very short titles (likely missing context)."
288+
else:
289+
desc_reasoning += " Encourages context for reviewers; lightweight default."
290+
291+
desc_confidence = 0.68
292+
if pr_issues.get("short_titles", 0) > 0:
293+
desc_confidence = 0.80
263294

264295
recommendations.append(
265296
RuleRecommendation(
266297
yaml_rule=textwrap.dedent(
267298
"""
268299
description: "Ensure PRs include context"
269-
enabled: true
300+
enabled: true
270301
severity: low
271-
event_types:
272-
- pull_request
273-
parameters:
302+
event_types:
303+
- pull_request
304+
parameters:
274305
min_description_length: 50
275306
"""
276307
).strip(),
277-
confidence=0.68 if pr_issues.get("short_titles", 0) == 0 else 0.80,
308+
confidence=desc_confidence,
278309
reasoning=desc_reasoning,
279310
strategy_used="static",
280311
)
281312
)
282313

283-
# If contributing guidelines require tests, increase confidence
284-
if state.contributing_analysis.content is not None and state.contributing_analysis.requires_tests:
285-
# Find the test rule and boost its confidence
286-
for rec in recommendations:
287-
if "tests" in rec.yaml_rule.lower():
288-
rec.confidence = min(0.95, rec.confidence + 0.1)
289-
rec.reasoning += " Contributing guidelines explicitly require tests."
314+
# Add a repository-specific rule if we detect specific patterns
315+
if state.repository_features.has_workflows:
316+
workflow_rule = textwrap.dedent(
317+
"""
318+
description: "Protect CI/CD workflows"
319+
enabled: true
320+
severity: high
321+
event_types:
322+
- pull_request
323+
parameters:
324+
file_patterns:
325+
- ".github/workflows/**"
326+
"""
327+
).strip()
328+
329+
recommendations.append(
330+
RuleRecommendation(
331+
yaml_rule=workflow_rule,
332+
confidence=0.90,
333+
reasoning=f"Repository {state.repository_full_name} has {state.repository_features.workflow_count} workflows that should be protected.",
334+
strategy_used="static",
335+
)
336+
)
290337

338+
logger.info(f"Generated {len(recommendations)} recommendations for {state.repository_full_name}")
291339
return recommendations
292340

293341

src/agents/repository_analysis_agent/prompts.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -83,14 +83,8 @@
8383
severity: "medium"
8484
event_types:
8585
- pull_request
86-
conditions:
87-
- type: "condition_type"
88-
parameters:
89-
key: "value"
90-
actions:
91-
- type: "action_type"
92-
parameters:
93-
key: "value"
86+
parameters:
87+
key: "value"
9488
```
9589
9690
Make sure the rule is functional and follows best practices.

src/api/recommendations.py

Lines changed: 91 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,9 @@ async def recommend_rules(
4848
if not request.repository_full_name or "/" not in request.repository_full_name:
4949
raise HTTPException(status_code=400, detail="Invalid repository name format. Expected 'owner/repo'")
5050

51-
cache_key = f"repo_analysis:{request.repository_full_name}"
51+
# Include authentication context in cache key to ensure different access levels get different results
52+
auth_context = request.installation_id or request.user_token or "anonymous"
53+
cache_key = f"repo_analysis:{request.repository_full_name}:{auth_context}"
5254
cached_result = await get_cache(cache_key)
5355

5456
if cached_result:
@@ -57,6 +59,7 @@ async def recommend_rules(
5759
"cache_hit",
5860
operation="repository_analysis",
5961
subject_ids=[request.repository_full_name],
62+
auth_context=auth_context,
6063
cached=True,
6164
)
6265
return RepositoryAnalysisResponse(**cached_result)
@@ -85,6 +88,8 @@ async def recommend_rules(
8588
decision="failed",
8689
error=result.message,
8790
)
91+
# Clear any cached results for this repository to ensure fresh analysis on retry
92+
await set_cache(cache_key, None, ttl=1) # Use 1 second TTL to effectively clear cache
8893
raise HTTPException(status_code=500, detail=result.message)
8994

9095
analysis_response = result.data.get("analysis_response")
@@ -161,6 +166,18 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith
161166
branch=request.branch_name,
162167
existing_sha=existing_branch_sha,
163168
)
169+
# Verify the branch points to the correct base
170+
if existing_branch_sha != base_sha:
171+
log_structured(
172+
logger,
173+
"branch_sha_mismatch",
174+
operation="proceed_with_pr",
175+
subject_ids=[repo],
176+
branch=request.branch_name,
177+
existing_sha=existing_branch_sha,
178+
expected_sha=base_sha,
179+
warning="Branch exists but points to different SHA than base branch",
180+
)
164181
else:
165182
# Create new branch
166183
created_ref = await github_client.create_git_ref(repo, request.branch_name, base_sha, **auth_ctx)
@@ -182,6 +199,15 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith
182199
"The branch may already exist or you may not have permission to create branches."
183200
),
184201
)
202+
log_structured(
203+
logger,
204+
"branch_created",
205+
operation="proceed_with_pr",
206+
subject_ids=[repo],
207+
branch=request.branch_name,
208+
base_branch=base_branch,
209+
new_sha=created_ref.get("object", {}).get("sha"),
210+
)
185211

186212
file_result = await github_client.create_or_update_file(
187213
repo_full_name=repo,
@@ -209,6 +235,17 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith
209235
),
210236
)
211237

238+
commit_sha = (file_result.get("commit") or {}).get("sha")
239+
log_structured(
240+
logger,
241+
"file_created",
242+
operation="proceed_with_pr",
243+
subject_ids=[repo],
244+
branch=request.branch_name,
245+
file_path=request.file_path,
246+
commit_sha=commit_sha,
247+
)
248+
212249
pr = await github_client.create_pull_request(
213250
repo_full_name=repo,
214251
title=request.pr_title,
@@ -237,16 +274,62 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith
237274
)
238275

239276
pr_url = pr.get("html_url", "")
240-
if not pr_url:
277+
pr_number = pr.get("number")
278+
if not pr_url or not pr_number:
241279
log_structured(
242280
logger,
243-
"pr_url_missing",
281+
"pr_creation_incomplete",
244282
operation="proceed_with_pr",
245283
subject_ids=[repo],
246284
pr_data=pr,
247-
error="PR created but html_url is missing",
285+
pr_url=pr_url,
286+
pr_number=pr_number,
287+
error="PR creation response missing required fields",
288+
)
289+
raise HTTPException(status_code=500, detail="PR was created but response is incomplete")
290+
291+
# Validate the PR URL is a proper GitHub URL format
292+
if not pr_url.startswith("https://github.com/") or "/pull/" not in pr_url:
293+
log_structured(
294+
logger,
295+
"pr_url_invalid",
296+
operation="proceed_with_pr",
297+
subject_ids=[repo],
298+
pr_url=pr_url,
299+
pr_number=pr_number,
300+
error="PR URL is not a valid GitHub pull request URL",
301+
)
302+
raise HTTPException(status_code=500, detail="PR was created but returned invalid URL format")
303+
304+
# Validate PR number is reasonable
305+
if not isinstance(pr_number, int) or pr_number <= 0:
306+
log_structured(
307+
logger,
308+
"pr_number_invalid",
309+
operation="proceed_with_pr",
310+
subject_ids=[repo],
311+
pr_url=pr_url,
312+
pr_number=pr_number,
313+
error="PR number is invalid",
314+
)
315+
raise HTTPException(status_code=500, detail="PR was created but returned invalid PR number")
316+
317+
# Double-check URL format one more time
318+
expected_url_pattern = f"https://github.com/{repo}/pull/{pr_number}"
319+
if pr_url != expected_url_pattern:
320+
log_structured(
321+
logger,
322+
"pr_url_mismatch",
323+
operation="proceed_with_pr",
324+
subject_ids=[repo],
325+
expected_url=expected_url_pattern,
326+
actual_url=pr_url,
327+
pr_number=pr_number,
328+
error="PR URL doesn't match expected pattern",
329+
)
330+
raise HTTPException(
331+
status_code=500, detail=f"PR URL mismatch: expected {expected_url_pattern} but got {pr_url}"
248332
)
249-
raise HTTPException(status_code=500, detail="PR was created but URL is missing")
250333

251334
log_structured(
252335
logger,
@@ -255,11 +338,12 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith
255338
subject_ids=[repo],
256339
decision="success",
257340
branch=request.branch_name,
258-
pr_number=pr.get("number"),
341+
pr_number=pr_number,
342+
pr_url=pr_url,
259343
)
260344

261345
return ProceedWithPullRequestResponse(
262-
pull_request_url=pr.get("html_url", ""),
346+
pull_request_url=pr_url,
263347
branch_name=request.branch_name,
264348
base_branch=base_branch,
265349
file_path=request.file_path,

0 commit comments

Comments
 (0)