88
99from __future__ import annotations
1010
11+ import logging
1112import textwrap
1213from typing import Any
1314
@@ -32,6 +33,9 @@ async def analyze_repository_structure(state: RepositoryAnalysisState) -> None:
3233 installation_id = state .installation_id
3334
3435 repo_data = await github_client .get_repository (repo , installation_id = installation_id )
36+ if not repo_data :
37+ raise ValueError (f"Could not fetch repository data for { repo } " )
38+
3539 workflows = await github_client .list_directory_any_auth (
3640 repo_full_name = repo , path = ".github/workflows" , installation_id = installation_id
3741 )
@@ -42,7 +46,7 @@ async def analyze_repository_structure(state: RepositoryAnalysisState) -> None:
4246 has_codeowners = bool (await github_client .get_file_content (repo , ".github/CODEOWNERS" , installation_id )),
4347 has_workflows = bool (workflows ),
4448 workflow_count = len (workflows or []),
45- language = ( repo_data or {}) .get ("language" ),
49+ language = repo_data .get ("language" ),
4650 contributor_count = len (contributors ),
4751 pr_count = 0 ,
4852 )
@@ -54,8 +58,14 @@ async def analyze_pr_history(state: RepositoryAnalysisState, max_prs: int) -> No
5458 installation_id = state .installation_id
5559 prs = await github_client .list_pull_requests (repo , installation_id = installation_id , state = "all" , per_page = max_prs )
5660
61+ if prs is None :
62+ # If PR listing fails, continue with empty samples rather than failing
63+ state .pr_samples = []
64+ state .repository_features .pr_count = 0
65+ return
66+
5767 samples : list [PullRequestSample ] = []
58- for pr in prs or [] :
68+ for pr in prs :
5969 samples .append (
6070 PullRequestSample (
6171 number = pr .get ("number" , 0 ),
@@ -215,19 +225,28 @@ def _default_recommendations(
215225
216226 Currently, validators like `author_team_is` and `file_patterns` operate independently.
217227 """
228+ logger = logging .getLogger (__name__ )
229+
218230 recommendations : list [RuleRecommendation ] = []
219231
220232 # Get language-specific patterns based on repository analysis
221- source_patterns , test_patterns = _get_language_specific_patterns (state .repository_features .language )
233+ language = state .repository_features .language
234+ source_patterns , test_patterns = _get_language_specific_patterns (language )
235+
236+ logger .info (
237+ f"Generating recommendations for { state .repository_full_name } : language={ language } , pr_count={ state .repository_features .pr_count } "
238+ )
222239
223240 # Analyze PR history for bad habits
224241 pr_issues = _analyze_pr_bad_habits (state )
225242
226243 # Require tests when source code changes.
227244 # This is especially important if we detect missing tests in PR history
228- test_reasoning = f"Default guardrail for code changes without tests . Patterns adapted for { state . repository_features . language or 'multi-language' } repository."
245+ test_reasoning = f"Repository analysis for { state . repository_full_name } . Language: { language or 'unknown' } . Patterns adapted for { language or 'multi-language' } repository."
229246 if pr_issues .get ("missing_tests" , 0 ) > 0 :
230247 test_reasoning += f" Detected { pr_issues ['missing_tests' ]} recent PRs without test files."
248+ if state .contributing_analysis .content and state .contributing_analysis .requires_tests :
249+ test_reasoning += " Contributing guidelines explicitly require tests."
231250
232251 # Build YAML rule with proper indentation
233252 # parameters: is at column 0, source_patterns: at column 2, list items at column 4
@@ -239,55 +258,84 @@ def _default_recommendations(
239258severity: medium
240259event_types:
241260 - pull_request
242- parameters:
261+ parameters:
243262 source_patterns:
244263{ source_patterns_yaml }
245264 test_patterns:
246265{ test_patterns_yaml }
247266"""
248267
268+ confidence = 0.74
269+ if pr_issues .get ("missing_tests" , 0 ) > 0 :
270+ confidence = 0.85
271+ if state .contributing_analysis .content and state .contributing_analysis .requires_tests :
272+ confidence = min (0.95 , confidence + 0.1 )
273+
249274 recommendations .append (
250275 RuleRecommendation (
251276 yaml_rule = yaml_content .strip (),
252- confidence = 0.74 if pr_issues . get ( "missing_tests" , 0 ) == 0 else 0.85 ,
277+ confidence = confidence ,
253278 reasoning = test_reasoning ,
254279 strategy_used = "hybrid" ,
255280 )
256281 )
257282
258283 # Require description in PR body.
259284 # Increase confidence if we detect short titles in PR history (indicator of missing context)
260- desc_reasoning = "Encourage context for reviewers; lightweight default ."
285+ desc_reasoning = f"Repository analysis for { state . repository_full_name } ."
261286 if pr_issues .get ("short_titles" , 0 ) > 0 :
262287 desc_reasoning += f" Detected { pr_issues ['short_titles' ]} PRs with very short titles (likely missing context)."
288+ else :
289+ desc_reasoning += " Encourages context for reviewers; lightweight default."
290+
291+ desc_confidence = 0.68
292+ if pr_issues .get ("short_titles" , 0 ) > 0 :
293+ desc_confidence = 0.80
263294
264295 recommendations .append (
265296 RuleRecommendation (
266297 yaml_rule = textwrap .dedent (
267298 """
268299 description: "Ensure PRs include context"
269- enabled: true
300+ enabled: true
270301 severity: low
271- event_types:
272- - pull_request
273- parameters:
302+ event_types:
303+ - pull_request
304+ parameters:
274305 min_description_length: 50
275306 """
276307 ).strip (),
277- confidence = 0.68 if pr_issues . get ( "short_titles" , 0 ) == 0 else 0.80 ,
308+ confidence = desc_confidence ,
278309 reasoning = desc_reasoning ,
279310 strategy_used = "static" ,
280311 )
281312 )
282313
283- # If contributing guidelines require tests, increase confidence
284- if state .contributing_analysis .content is not None and state .contributing_analysis .requires_tests :
285- # Find the test rule and boost its confidence
286- for rec in recommendations :
287- if "tests" in rec .yaml_rule .lower ():
288- rec .confidence = min (0.95 , rec .confidence + 0.1 )
289- rec .reasoning += " Contributing guidelines explicitly require tests."
314+ # Add a repository-specific rule if we detect specific patterns
315+ if state .repository_features .has_workflows :
316+ workflow_rule = textwrap .dedent (
317+ """
318+ description: "Protect CI/CD workflows"
319+ enabled: true
320+ severity: high
321+ event_types:
322+ - pull_request
323+ parameters:
324+ file_patterns:
325+ - ".github/workflows/**"
326+ """
327+ ).strip ()
328+
329+ recommendations .append (
330+ RuleRecommendation (
331+ yaml_rule = workflow_rule ,
332+ confidence = 0.90 ,
333+ reasoning = f"Repository { state .repository_full_name } has { state .repository_features .workflow_count } workflows that should be protected." ,
334+ strategy_used = "static" ,
335+ )
336+ )
290337
338+ logger .info (f"Generated { len (recommendations )} recommendations for { state .repository_full_name } " )
291339 return recommendations
292340
293341
0 commit comments