frznprograms · frznprograms · Sep 11, 2025 · Sep 11, 2025 · Sep 12, 2025
diff --git a/README.md b/README.md
@@ -1,5 +1,8 @@
 <h1 style="text-align: center;">ARC - Automated Review Checking with Machine Learning</h1>
-by Rudy's Rangers, for TikTok TechJam 2025 *FINALS*
+by Rudy's Rangers, for TikTok TechJam 2025 
+
+We are proud and grateful to the team at TikTok for granting us **5th place** out of over 300 teams at the TikTok TechJam 2025 for our achievements and innovation in this project.
+
 <h3 style="text-align: center;">Chosen Problem Statment: Filtering the Noise: ML for Trustworthy Location Reviews</h2>
 
 ### Authors

diff --git a/frontend/src/app/page.tsx b/frontend/src/app/page.tsx
@@ -15,8 +15,14 @@ interface ReviewData {
 
 interface StageUpdate {
   stage: number;
-  status: 'starting' | 'passed' | 'rejected' | 'error' | 'banned';
+  status: 'starting' | 'passed' | 'rejected' | 'error' | 'banned' | 'uncertain';
   message: string;
+  scores?: {
+    ad: number;
+    irrelevant: number;
+    rant: number;
+    unsafe: number;
+  };
 }
 
 interface MapLocation {
@@ -121,13 +127,27 @@ export default function ReviewAnalyzer() {
               const update: StageUpdate = {
                 stage: data.stage,
                 status: data.status,
-                message: data.message
+                message: data.message,
+                scores: data.scores
               };
 
               setStageUpdates(prev => [...prev, update]);
               setCurrentStage(data.stage);
+
+              // Enhanced logging for threshold tuning
               if (data.scores) {
-                console.log('Encoder scores:', data.scores);
+                console.log('=== ENCODER PROBABILITIES FOR THRESHOLD TUNING ===');
+                console.log('Review:', reviewData.review.slice(0, 100) + '...');
+                console.log('Probabilities:', data.scores);
+
+                const thresholds = { ad: 0.3, irrelevant: 0.25, rant: 0.2, unsafe: 0.4 };
+                const triggered = Object.entries(data.scores).filter(([key, prob]) => 
+                  prob > thresholds[key as keyof typeof thresholds]
+                );
+
+                console.log('Triggered categories:', triggered.length > 0 ? triggered : 'None');
+                console.log('Result:', data.status);
+                console.log('===========================================');
               }
             } catch (parseError) {
               console.error('Error parsing SSE data:', parseError);

diff --git a/src/app/backend.py b/src/app/backend.py
@@ -76,7 +76,10 @@ async def get_stage_counters():
             "encoder_stage": int(encoder_count.decode("utf-8")) if encoder_count else 0,
         }
     except Exception as e:
-        return {"safety_stage": 0, "fasttext_stage": 0, "encoder_stage": 0}
+        return {
+            "safety_stage": 0,
+            "fasttext_stage": 0,
+            "encoder_stage": 0
 
 
 async def safety_stage(review_data):
@@ -160,6 +163,22 @@ async def encoder_stage(prompt):
     pipeline.redis.incr("encoder_stage")
     yield {"stage": 3, "status": "starting", "message": "Running encoder model..."}
     await asyncio.sleep(0.1)
+
+    if not hasattr(pipeline.encoder, '_lora_amplified'):
+        amplification_factor = 4.0
+
+        for name, module in pipeline.encoder.named_modules():
+            if hasattr(module, 'scaling') and any(x in name for x in ['q_lin', 'k_lin', 'v_lin']):
+                original_scaling = module.scaling
+
+                if isinstance(original_scaling, dict):
+                    for adapter_name in original_scaling:
+                        original_value = original_scaling[adapter_name]
+                        module.scaling[adapter_name] = original_value * amplification_factor
+                elif isinstance(original_scaling, (int, float)):
+                    module.scaling = original_scaling * amplification_factor
+
+        pipeline.encoder._lora_amplified = True
 
     inputs = pipeline.tokenizer(
         prompt,
@@ -172,11 +191,10 @@ async def encoder_stage(prompt):
     with torch.no_grad():
         outputs = pipeline.encoder(**inputs)
         probs = torch.sigmoid(outputs.logits)
-        preds = (probs > 0.5).int()
+        thresholds = torch.tensor([0.2, 0.2, 0.2, 0.4])  # [ad, irrelevant, rant, toxicity]
+        preds = (probs > thresholds).int()
 
-    # Check if any prediction is positive (rejected)
     has_positive_pred = torch.any(preds > 0).item()
-
     # Get prediction scores for each bucket for console logging
     scores = probs.squeeze().tolist()
     bucket_names = ["ad", "irrelevant", "rant", "unsafe"]
@@ -190,15 +208,19 @@ async def encoder_stage(prompt):
             "scores": score_details,
         }
     else:
-        # Find which labels triggered rejection
-        failed_labels = [bucket_names[i] for i in range(len(preds)) if preds[0, i] > 0]
-        max_prob_idx = probs.argmax().item()
-        primary_label = bucket_names[max_prob_idx]
+        # Find which labels triggered rejection with their thresholds
+        thresholds_list = [0.2, 0.2, 0.2, 0.4]  # [ad, irrelevant, rant, toxicity]
+        failed_labels = []
+        for i in range(len(preds[0])):
+            if preds[0, i] > 0:
+                prob = probs[0, i].item()
+                threshold = thresholds_list[i]
+                failed_labels.append(f"{bucket_names[i]}({prob:.3f}>{threshold})")
 
         if len(failed_labels) == 1:
-            reject_reason = f"'{primary_label}' (probability: {probs.max().item():.3f})"
+            reject_reason = failed_labels[0]
         else:
-            reject_reason = f"'{primary_label}' and {len(failed_labels)-1} other(s) (max probability: {probs.max().item():.3f})"
+            reject_reason = f"{len(failed_labels)} categories: {', '.join(failed_labels)}"
 
         yield {
             "stage": 3,

diff --git a/src/pipelines/inference_pipeline.py b/src/pipelines/inference_pipeline.py
@@ -299,10 +299,8 @@ def add_banned_ids(self, user_id):
         """
         key = str(user_id)
 
-        # Increment counter atomically (creates key with value 1 if not exists)
         count = self.redis.incr(key)
 
-        # If counter reaches 3, set to -1
         if count >= 1000:
             self.redis.set(key, -1)