AutoPiff/rules/scoring.yaml at master · splintersfury/AutoPiff · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
# AutoPiff scoring model (v2 comprehensive)
# Goal: rank findings by "worth a human's time" while minimizing noise.
# Key principles:
# - Reachability matters a lot (ioctl > irp > internal)
# - Conservative semantic rules dominate score
# - Pairing noise penalizes heavily
# - Confidence gates everything (low confidence findings should not float to the top)
# v2: expanded from 11 rules / 5 categories to 58 rules / 22 categories.

version: 2

# Global caps and thresholds
gating:
  # If matching confidence is very low, cap total score hard.
  matching_confidence:
    min_required: 0.40
    cap_if_below: 3.0

  # If semantic confidence is low, cap score.
  semantic_confidence:
    soft_min: 0.60
    cap_if_below_soft_min: 5.0
    hard_min: 0.45
    drop_if_below_hard_min: true

  # If reachability confidence is low, reduce reachability contribution.
  reachability_confidence:
    soft_min: 0.55
    multiplier_if_below: 0.70

# Base scoring contributions
weights:
  # Semantic rule contributions: this is the backbone of ranking.
  # Weights reflect conservative belief about "this looks like a vuln fix".
  semantic_rule_base:
    # --- Original rules (v1) ---
    added_len_check_before_memcpy: 6.0
    added_struct_size_validation: 4.5
    added_index_bounds_check: 4.0

    null_after_free_added: 5.0
    guard_before_free_added: 4.0

    probe_for_read_or_write_added: 6.0
    previous_mode_gating_added: 5.0
    seh_guard_added_around_user_deref: 3.5

    safe_size_math_helper_added: 4.5
    alloc_size_overflow_check_added: 5.5

    interlocked_refcount_added: 3.0

    # --- Race condition rules ---
    spinlock_acquisition_added: 3.5
    mutex_or_resource_lock_added: 3.5
    double_fetch_to_capture_fix: 5.0
    cancel_safe_irp_queue_added: 3.0

    # --- Type confusion rules ---
    object_type_validation_added: 5.0
    handle_object_type_check_added: 5.5
    wow64_thunk_validation_added: 4.5

    # --- Authorization rules ---
    privilege_check_added: 5.5
    access_mode_enforcement_added: 6.0
    device_acl_hardening: 4.0
    registry_access_mask_hardened: 3.0

    # --- Information disclosure rules ---
    buffer_zeroing_before_copy_added: 4.5
    stack_variable_initialization_added: 3.5
    output_length_truncation_added: 3.5
    kernel_pointer_scrubbing_added: 5.0

    # --- IOCTL hardening rules ---
    method_neither_probe_added: 6.0
    ioctl_input_size_validation_added: 5.0
    ioctl_code_default_case_added: 2.0

    # --- MDL handling rules ---
    mdl_safe_mapping_replacement: 4.0
    mdl_probe_access_mode_fix: 6.0
    mdl_null_check_added: 3.5

    # --- Object management rules ---
    ob_reference_balance_fix: 4.0
    handle_force_access_check_added: 5.0

    # --- String handling rules ---
    safe_string_function_replacement: 3.5
    unicode_string_length_validation_added: 4.0

    # --- Pool hardening rules ---
    pool_type_nx_migration: 3.0
    deprecated_pool_api_replacement: 2.5
    pool_allocation_null_check_added: 2.5

    # --- Crypto hardening rules ---
    secure_zero_memory_added: 3.0
    constant_time_comparison_added: 3.5

    # --- Error path hardening rules ---
    error_path_cleanup_added: 3.0
    goto_cleanup_pattern_added: 2.5
    irp_completion_status_fix: 3.0

    # --- DoS hardening rules ---
    recursion_depth_limit_added: 3.5
    loop_iteration_bound_added: 2.5
    resource_quota_check_added: 3.0

    # --- NDIS hardening rules ---
    oid_request_validation_added: 5.0
    nbl_chain_length_validation_added: 4.0

    # --- Filesystem filter rules ---
    flt_context_reference_leak_fix: 3.5
    flt_create_race_mitigation: 4.5

    # --- PnP/Power rules ---
    surprise_removal_guard_added: 3.0
    power_state_validation_added: 2.5
    io_remove_lock_added: 3.5

    # --- DMA/MMIO rules ---
    mmio_mapping_bounds_validation_added: 5.5
    dma_buffer_bounds_check_added: 4.0

    # --- WDF hardening rules ---
    wdf_request_buffer_size_check_added: 4.5
    wdf_request_completion_guard_added: 3.0

    # --- Attack surface rules (new feature detection) ---
    new_ioctl_handler: 5.0
    new_pool_operations: 3.5
    new_user_buffer_access: 5.0
    new_mdl_operations: 3.5
    new_object_handle_ops: 3.0
    new_dma_mmio_access: 4.0
    new_memory_copy_operations: 3.5
    new_string_operations: 2.5

  # Category multipliers: small nudge, not the main driver.
  category_multiplier:
    # Original categories
    bounds_check: 1.05
    user_boundary_check: 1.10
    int_overflow: 1.05
    lifetime_fix: 1.05
    state_hardening: 0.95
    # New categories (v2)
    race_condition: 1.05
    type_confusion: 1.10
    authorization: 1.10
    info_disclosure: 1.00
    ioctl_hardening: 1.10
    mdl_handling: 1.05
    object_management: 1.05
    string_handling: 1.00
    pool_hardening: 0.95
    crypto_hardening: 0.90
    error_path_hardening: 0.90
    dos_hardening: 0.90
    ndis_hardening: 1.05
    filesystem_filter: 1.00
    pnp_power: 0.95
    dma_mmio: 1.10
    wdf_hardening: 1.05
    # Attack surface category
    new_attack_surface: 0.90

  # Change type multipliers: patches vs new features.
  # New features are slightly depressed since they lack a "known vuln" signal.
  change_type_multiplier:
    patch: 1.0
    new_feature: 0.85

  # Reachability contributions: large differentiator.
  # This is added as a bonus on top of semantic rule score.
  reachability_bonus:
    ioctl: 4.0
    irp: 2.5
    pnp: 2.0
    internal: 0.5
    unknown: 0.0

  # Additional bonuses when there is explicit sink proximity evidence.
  # (Only apply when semantic rule already triggered; do not score sinks alone.)
  sink_bonus:
    # Original
    memory_copy: 1.5
    pool_alloc: 1.2
    pool_free: 1.0
    user_probe: 1.5
    io_sanitization: 1.0
    exceptions: 0.6
    string_copy: 0.8
    refcounting: 0.4
    # New (v2)
    synchronization: 0.6
    object_management: 0.8
    handle_validation: 1.0
    authorization: 1.2
    mdl_operations: 1.2
    memory_zeroing: 0.6
    ndis_operations: 1.0
    mmio_dma: 1.5
    filesystem_filter: 0.8
    pnp_power: 0.6
    wdf_operations: 0.8
    device_security: 1.0
    irp_cancel: 0.4
    irp_completion: 0.6

  # Penalties for noisy pairing or low-quality comparisons.
  penalties:
    pairing_decision:
      accept: 0.0
      quarantine: 2.0
      reject: 999.0  # should never be scored if rejected; safety guard

    noise_risk:
      low: 0.0
      medium: 1.0
      high: 2.5

    # Penalize when function matching indicates instability.
    matching_quality:
      high: 0.0
      medium: 0.8
      low: 1.8

# Score composition rules (authoritative)
composition:
  # Final score per finding is built as:
  #
  # semantic_score =
  #   sum(rule_base_weight * rule_confidence) * category_multiplier
  #
  # reachability_score =
  #   reachability_bonus * reachability_confidence_adjusted
  #
  # sink_score =
  #   sum(sink_bonus[group]) * min(1.0, semantic_confidence)
  #
  # penalties =
  #   pairing_decision_penalty + noise_risk_penalty + matching_quality_penalty
  #
  # final_score = (semantic_score + reachability_score + sink_score) - penalties
  #
  # Then apply gating caps/drops from `gating`.

  max_findings_in_report: 10

  # Only one semantic rule is required to create a finding.
  # If multiple rules trigger for the same function, they stack (conservative but useful).
  allow_rule_stacking: true

  # How to adjust reachability contribution when confidence is low.
  reachability_confidence_adjust:
    below_soft_min_multiplier: gating.reachability_confidence.multiplier_if_below

  # Clamp scores to avoid weird negatives.
  clamp:
    min: 0.0
    max: 15.0

# Explainability requirements: ranking stage MUST output these fields for each finding.
explainability:
  required_fields:
    - rule_ids
    - category
    - semantic_confidence
    - matching_confidence
    - reachability_class
    - reachability_confidence
    - sinks
    - penalties_applied
    - score_breakdown

  score_breakdown_format:
    semantic:
      - rule_id
      - base_weight
      - rule_confidence
      - contribution
    reachability:
      - class
      - bonus
      - confidence
      - contribution
    sinks:
      - sink_group
      - bonus
      - contribution
    penalties:
      - type
      - value
    final:
      - total_before_clamp
      - total_after_clamp
      - gates_triggered

# Conservative defaults for mapping "matching confidence" to quality buckets
matching_quality_buckets:
  high:
    min_confidence: 0.80
  medium:
    min_confidence: 0.60
  low:
    min_confidence: 0.00