Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
<h1 style="text-align: center;">ARC - Automated Review Checking with Machine Learning</h1>
by Rudy's Rangers, for TikTok TechJam 2025 *FINALS*
by Rudy's Rangers, for TikTok TechJam 2025

We are proud and grateful to the team at TikTok for granting us **5th place** out of over 300 teams at the TikTok TechJam 2025 for our achievements and innovation in this project.

<h3 style="text-align: center;">Chosen Problem Statment: Filtering the Noise: ML for Trustworthy Location Reviews</h2>

### Authors
Expand Down
26 changes: 23 additions & 3 deletions frontend/src/app/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,14 @@ interface ReviewData {

interface StageUpdate {
stage: number;
status: 'starting' | 'passed' | 'rejected' | 'error' | 'banned';
status: 'starting' | 'passed' | 'rejected' | 'error' | 'banned' | 'uncertain';
message: string;
scores?: {
ad: number;
irrelevant: number;
rant: number;
unsafe: number;
};
}

interface MapLocation {
Expand Down Expand Up @@ -121,13 +127,27 @@ export default function ReviewAnalyzer() {
const update: StageUpdate = {
stage: data.stage,
status: data.status,
message: data.message
message: data.message,
scores: data.scores
};

setStageUpdates(prev => [...prev, update]);
setCurrentStage(data.stage);

// Enhanced logging for threshold tuning
if (data.scores) {
console.log('Encoder scores:', data.scores);
console.log('=== ENCODER PROBABILITIES FOR THRESHOLD TUNING ===');
console.log('Review:', reviewData.review.slice(0, 100) + '...');
console.log('Probabilities:', data.scores);

const thresholds = { ad: 0.3, irrelevant: 0.25, rant: 0.2, unsafe: 0.4 };
const triggered = Object.entries(data.scores).filter(([key, prob]) =>
prob > thresholds[key as keyof typeof thresholds]
);

console.log('Triggered categories:', triggered.length > 0 ? triggered : 'None');
console.log('Result:', data.status);
console.log('===========================================');
}
} catch (parseError) {
console.error('Error parsing SSE data:', parseError);
Expand Down
42 changes: 32 additions & 10 deletions src/app/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,10 @@ async def get_stage_counters():
"encoder_stage": int(encoder_count.decode("utf-8")) if encoder_count else 0,
}
except Exception as e:
return {"safety_stage": 0, "fasttext_stage": 0, "encoder_stage": 0}
return {
"safety_stage": 0,
"fasttext_stage": 0,
"encoder_stage": 0


async def safety_stage(review_data):
Expand Down Expand Up @@ -160,6 +163,22 @@ async def encoder_stage(prompt):
pipeline.redis.incr("encoder_stage")
yield {"stage": 3, "status": "starting", "message": "Running encoder model..."}
await asyncio.sleep(0.1)

if not hasattr(pipeline.encoder, '_lora_amplified'):
amplification_factor = 4.0

for name, module in pipeline.encoder.named_modules():
if hasattr(module, 'scaling') and any(x in name for x in ['q_lin', 'k_lin', 'v_lin']):
original_scaling = module.scaling

if isinstance(original_scaling, dict):
for adapter_name in original_scaling:
original_value = original_scaling[adapter_name]
module.scaling[adapter_name] = original_value * amplification_factor
elif isinstance(original_scaling, (int, float)):
module.scaling = original_scaling * amplification_factor

pipeline.encoder._lora_amplified = True

inputs = pipeline.tokenizer(
prompt,
Expand All @@ -172,11 +191,10 @@ async def encoder_stage(prompt):
with torch.no_grad():
outputs = pipeline.encoder(**inputs)
probs = torch.sigmoid(outputs.logits)
preds = (probs > 0.5).int()
thresholds = torch.tensor([0.2, 0.2, 0.2, 0.4]) # [ad, irrelevant, rant, toxicity]
preds = (probs > thresholds).int()

# Check if any prediction is positive (rejected)
has_positive_pred = torch.any(preds > 0).item()

# Get prediction scores for each bucket for console logging
scores = probs.squeeze().tolist()
bucket_names = ["ad", "irrelevant", "rant", "unsafe"]
Expand All @@ -190,15 +208,19 @@ async def encoder_stage(prompt):
"scores": score_details,
}
else:
# Find which labels triggered rejection
failed_labels = [bucket_names[i] for i in range(len(preds)) if preds[0, i] > 0]
max_prob_idx = probs.argmax().item()
primary_label = bucket_names[max_prob_idx]
# Find which labels triggered rejection with their thresholds
thresholds_list = [0.2, 0.2, 0.2, 0.4] # [ad, irrelevant, rant, toxicity]
failed_labels = []
for i in range(len(preds[0])):
if preds[0, i] > 0:
prob = probs[0, i].item()
threshold = thresholds_list[i]
failed_labels.append(f"{bucket_names[i]}({prob:.3f}>{threshold})")

if len(failed_labels) == 1:
reject_reason = f"'{primary_label}' (probability: {probs.max().item():.3f})"
reject_reason = failed_labels[0]
else:
reject_reason = f"'{primary_label}' and {len(failed_labels)-1} other(s) (max probability: {probs.max().item():.3f})"
reject_reason = f"{len(failed_labels)} categories: {', '.join(failed_labels)}"

yield {
"stage": 3,
Expand Down
2 changes: 0 additions & 2 deletions src/pipelines/inference_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,10 +299,8 @@ def add_banned_ids(self, user_id):
"""
key = str(user_id)

# Increment counter atomically (creates key with value 1 if not exists)
count = self.redis.incr(key)

# If counter reaches 3, set to -1
if count >= 1000:
self.redis.set(key, -1)

Expand Down