diff --git a/README.md b/README.md
index 8f9a134..9bf845d 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,8 @@
ARC - Automated Review Checking with Machine Learning
-by Rudy's Rangers, for TikTok TechJam 2025 *FINALS*
+by Rudy's Rangers, for TikTok TechJam 2025
+
+We are proud and grateful to the team at TikTok for granting us **5th place** out of over 300 teams at the TikTok TechJam 2025 for our achievements and innovation in this project.
+
Chosen Problem Statment: Filtering the Noise: ML for Trustworthy Location Reviews
### Authors
diff --git a/frontend/src/app/page.tsx b/frontend/src/app/page.tsx
index d812252..6fe432d 100644
--- a/frontend/src/app/page.tsx
+++ b/frontend/src/app/page.tsx
@@ -15,8 +15,14 @@ interface ReviewData {
interface StageUpdate {
stage: number;
- status: 'starting' | 'passed' | 'rejected' | 'error' | 'banned';
+ status: 'starting' | 'passed' | 'rejected' | 'error' | 'banned' | 'uncertain';
message: string;
+ scores?: {
+ ad: number;
+ irrelevant: number;
+ rant: number;
+ unsafe: number;
+ };
}
interface MapLocation {
@@ -121,13 +127,27 @@ export default function ReviewAnalyzer() {
const update: StageUpdate = {
stage: data.stage,
status: data.status,
- message: data.message
+ message: data.message,
+ scores: data.scores
};
setStageUpdates(prev => [...prev, update]);
setCurrentStage(data.stage);
+
+ // Enhanced logging for threshold tuning
if (data.scores) {
- console.log('Encoder scores:', data.scores);
+ console.log('=== ENCODER PROBABILITIES FOR THRESHOLD TUNING ===');
+ console.log('Review:', reviewData.review.slice(0, 100) + '...');
+ console.log('Probabilities:', data.scores);
+
+ const thresholds = { ad: 0.3, irrelevant: 0.25, rant: 0.2, unsafe: 0.4 };
+ const triggered = Object.entries(data.scores).filter(([key, prob]) =>
+ prob > thresholds[key as keyof typeof thresholds]
+ );
+
+ console.log('Triggered categories:', triggered.length > 0 ? triggered : 'None');
+ console.log('Result:', data.status);
+ console.log('===========================================');
}
} catch (parseError) {
console.error('Error parsing SSE data:', parseError);
diff --git a/src/app/backend.py b/src/app/backend.py
index 0994d66..b620a46 100644
--- a/src/app/backend.py
+++ b/src/app/backend.py
@@ -76,7 +76,10 @@ async def get_stage_counters():
"encoder_stage": int(encoder_count.decode("utf-8")) if encoder_count else 0,
}
except Exception as e:
- return {"safety_stage": 0, "fasttext_stage": 0, "encoder_stage": 0}
+ return {
+ "safety_stage": 0,
+ "fasttext_stage": 0,
+ "encoder_stage": 0
async def safety_stage(review_data):
@@ -160,6 +163,22 @@ async def encoder_stage(prompt):
pipeline.redis.incr("encoder_stage")
yield {"stage": 3, "status": "starting", "message": "Running encoder model..."}
await asyncio.sleep(0.1)
+
+ if not hasattr(pipeline.encoder, '_lora_amplified'):
+ amplification_factor = 4.0
+
+ for name, module in pipeline.encoder.named_modules():
+ if hasattr(module, 'scaling') and any(x in name for x in ['q_lin', 'k_lin', 'v_lin']):
+ original_scaling = module.scaling
+
+ if isinstance(original_scaling, dict):
+ for adapter_name in original_scaling:
+ original_value = original_scaling[adapter_name]
+ module.scaling[adapter_name] = original_value * amplification_factor
+ elif isinstance(original_scaling, (int, float)):
+ module.scaling = original_scaling * amplification_factor
+
+ pipeline.encoder._lora_amplified = True
inputs = pipeline.tokenizer(
prompt,
@@ -172,11 +191,10 @@ async def encoder_stage(prompt):
with torch.no_grad():
outputs = pipeline.encoder(**inputs)
probs = torch.sigmoid(outputs.logits)
- preds = (probs > 0.5).int()
+ thresholds = torch.tensor([0.2, 0.2, 0.2, 0.4]) # [ad, irrelevant, rant, toxicity]
+ preds = (probs > thresholds).int()
- # Check if any prediction is positive (rejected)
has_positive_pred = torch.any(preds > 0).item()
-
# Get prediction scores for each bucket for console logging
scores = probs.squeeze().tolist()
bucket_names = ["ad", "irrelevant", "rant", "unsafe"]
@@ -190,15 +208,19 @@ async def encoder_stage(prompt):
"scores": score_details,
}
else:
- # Find which labels triggered rejection
- failed_labels = [bucket_names[i] for i in range(len(preds)) if preds[0, i] > 0]
- max_prob_idx = probs.argmax().item()
- primary_label = bucket_names[max_prob_idx]
+ # Find which labels triggered rejection with their thresholds
+ thresholds_list = [0.2, 0.2, 0.2, 0.4] # [ad, irrelevant, rant, toxicity]
+ failed_labels = []
+ for i in range(len(preds[0])):
+ if preds[0, i] > 0:
+ prob = probs[0, i].item()
+ threshold = thresholds_list[i]
+ failed_labels.append(f"{bucket_names[i]}({prob:.3f}>{threshold})")
if len(failed_labels) == 1:
- reject_reason = f"'{primary_label}' (probability: {probs.max().item():.3f})"
+ reject_reason = failed_labels[0]
else:
- reject_reason = f"'{primary_label}' and {len(failed_labels)-1} other(s) (max probability: {probs.max().item():.3f})"
+ reject_reason = f"{len(failed_labels)} categories: {', '.join(failed_labels)}"
yield {
"stage": 3,
diff --git a/src/pipelines/inference_pipeline.py b/src/pipelines/inference_pipeline.py
index 7a27bbf..06cc14c 100644
--- a/src/pipelines/inference_pipeline.py
+++ b/src/pipelines/inference_pipeline.py
@@ -299,10 +299,8 @@ def add_banned_ids(self, user_id):
"""
key = str(user_id)
- # Increment counter atomically (creates key with value 1 if not exists)
count = self.redis.incr(key)
- # If counter reaches 3, set to -1
if count >= 1000:
self.redis.set(key, -1)