diff --git a/docs/intern-data/stream-assignments.json b/docs/intern-data/stream-assignments.json new file mode 100644 index 000000000..3412a9f98 --- /dev/null +++ b/docs/intern-data/stream-assignments.json @@ -0,0 +1,74 @@ +{ + "program": "WINNIIO Intern Program May-June 2026", + "streams": { + "A": { + "name": "Boardy Clone", + "repo_owner": "LifeAtlas", + "repo_name": null, + "lead": null, + "customer": "Nicolas", + "interns": [ + { "name": "Sanskriti", "github": "sanskriti", "lpi_level": 4 }, + { "name": "Shubham Kumar", "github": "shubham-kumar", "lpi_level": 4 }, + { "name": "Shourya Solanki", "github": "shourya-solanki", "lpi_level": 4 }, + { "name": "Yash Maheshwari", "github": "yashm0910", "lpi_level": 3 }, + { "name": "Praveen Singh", "github": "praveen-singh", "lpi_level": 3 }, + { "name": "Aryan", "github": "aryan", "lpi_level": 3 }, + { "name": "Ankit Kumar Singh", "github": "ankit-kumar-singh", "lpi_level": 3 }, + { "name": "Jahanvi Gupta", "github": "Jahanvi3005", "lpi_level": 3 }, + { "name": "Vansh Singhal", "github": "vansh_singhal", "lpi_level": 3 }, + { "name": "Adil Islam", "github": "adil_islam", "lpi_level": 3 } + ] + }, + "B": { + "name": "DataPro+ DealFlow", + "repo_owner": "LifeAtlas", + "repo_name": "datacenter-flow", + "backend_repo": "DataCenterBackend", + "lead": null, + "customer": "Josh Young", + "interns": [ + { "name": "Daksh Garg", "github": "daksh-garg", "lpi_level": 4 }, + { "name": "Abhinav Chaudhary", "github": "abhinav-chaudhary", "lpi_level": 4 }, + { "name": "Varshit Pratap Singh Bhadauria", "github": "varshit-pratap-singh-bhadauria", "lpi_level": 4 }, + { "name": "Touqeer Hamdani", "github": "Touqeer-Hamdani", "lpi_level": 3 }, + { "name": "Aditi Mehta", "github": "aditi-mehta", "lpi_level": 3 }, + { "name": "Ananyaa M", "github": "ananyaa-m", "lpi_level": 3 }, + { "name": "Priyanshu Bhardwaj", "github": "Priyanshu-BHardwaj", "lpi_level": 3 }, + { "name": "Rahul Bijarnia", "github": "rahul_bijarnia", "lpi_level": 3 }, + { "name": "Khushi Garg", "github": "khushi-garg", "lpi_level": 3 }, + { "name": "Lavanya Parashar", "github": "lavanya-parashar", "lpi_level": 3 }, + { "name": "Saima Afroz", "github": "saima-afroz", "lpi_level": 3 }, + { "name": "Sonal Yadav", "github": "sonal-yadav", "lpi_level": 3 }, + { "name": "V Bharath Raju", "github": "bharath-raju", "lpi_level": 3 } + ] + }, + "C": { + "name": "Industrial Twin Dashboard", + "repo_owner": "LifeAtlas", + "repo_name": "factory-twin-dashboard", + "branch": "main", + "lead": null, + "customer": "Nicolas (internal)", + "interns": [ + { "name": "Aadyant Sood", "github": "aadyant-sood", "lpi_level": 4 }, + { "name": "Jaivardhan Singh", "github": "jaivardhan-singh", "lpi_level": 4 }, + { "name": "Harshit Kumar", "github": "harshit-kumar", "lpi_level": 4 }, + { "name": "Kailash Narayana Prasad", "github": "kailash-narayana-prasad", "lpi_level": 3 }, + { "name": "Sania Gurung", "github": "sania-gurung", "lpi_level": 3 }, + { "name": "Yashika Verma", "github": "yashika-verma", "lpi_level": 3 }, + { "name": "Naman Anand", "github": "naman-anand", "lpi_level": 3 }, + { "name": "Dia Vats", "github": "Dia-vats", "lpi_level": 3 }, + { "name": "Devika Hooda", "github": "devikahooda775", "lpi_level": 3 }, + { "name": "Srishti Gusain", "github": "srishti-gusain", "lpi_level": 3 }, + { "name": "Anupaul Saikia", "github": "anupaul-saikia", "lpi_level": 3 }, + { "name": "Abishek Sharma", "github": "abhishek2006", "lpi_level": 3 } + ] + } + }, + "tracking": { + "pr_deadline": "Friday before demo", + "scoring_cron": "Friday 14:00 IST (08:30 UTC)", + "metrics": ["prs_merged", "lines_changed", "reviews_given", "streak_weeks"] + } +} diff --git a/docs/scores.json b/docs/scores.json index 5eaa78720..28997be32 100644 --- a/docs/scores.json +++ b/docs/scores.json @@ -17,70 +17,86 @@ }, "kailash-narayana-prasad": { "name": "Kailash Narayana Prasad", - "level": 3, + "level": 5, "score": 11, "max": 13, "pct": 85, - "updated": "2026-04-17T13:56:06.147Z" + "updated": "2026-05-12T10:00:00.000Z", + "note": "L5 #472: 48/100. Generic and formulaic. | L6 #479: 0/100. PR only modifies challenge spec.", + "l5_score": 48, + "l6_score": 0 }, "sania-gurung": { "name": "Sania Gurung", - "level": 3, + "level": 5, "score": 12, "max": 13, "pct": 92, - "updated": "2026-04-20T18:22:15.222Z" + "updated": "2026-05-09T10:47:15.000Z", + "note": "L5 PR #469 MERGED May 9. L6 not yet submitted. Score pending rubric review." }, "shubham-kumar": { "name": "Shubham Kumar", - "level": 3, - "score": 12, + "level": 6, + "score": 13, "max": 13, - "pct": 92, - "updated": "2026-04-19T21:41:57.944Z" + "pct": 100, + "updated": "2026-05-09T10:30:43.000Z", + "note": "L5 PR #468 MERGED May 9. L6 PR #471 MERGED May 9. First to complete both. Scores pending rubric review." }, "harshit-kumar": { "name": "Harshit Kumar", - "level": 4, + "level": 6, "score": 12, "max": 13, "pct": 92, - "updated": "2026-04-21T12:00:00.000Z", - "note": "Level 4 submission merged. Score adjusted +1 for L4 completion." + "updated": "2026-05-12T10:00:00.000Z", + "note": "L5 #470: 85/100. Full capacity table, root cause analysis. | L6 #473: 90/100. Polished UI, heatmap, SPOF, 6-check self-test.", + "l5_score": 85, + "l6_score": 90 }, "ananyaa-m": { "name": "Ananyaa M", - "level": 3, + "level": 6, "score": 11, "max": 13, "pct": 85, - "updated": "2026-04-18T17:33:19.855Z" + "updated": "2026-05-12T10:00:00.000Z", + "note": "L4 #464: 80%. L5 #465 OPEN. L6 #499: 75/100 (CORRECTED from 0 — full submission with 278-line app, 6 pages, deployed).", + "l6_score": 75, + "l4_score": 80 }, "srishti-gusain": { "name": "Srishti Gusain", - "level": 3, + "level": 5, "score": 9, "max": 13, "pct": 69, - "updated": "2026-04-17T06:00:00.000Z" + "updated": "2026-05-12T10:00:00.000Z", + "l5_score": 42, + "note": "L5 #481: 42/100. Thin. Scrum lead - technical depth needs work." }, "varshit-pratap-singh-bhadauria": { "name": "Varshit Pratap Singh Bhadauria", - "level": 4, + "level": 6, "score": 10, "max": 13, "pct": 77, - "updated": "2026-04-21T12:00:00.000Z", - "note": "Level 4 submission merged. Score adjusted +1 for L4 completion." + "updated": "2026-05-12T10:00:00.000Z", + "note": "L5 #467: 45/100. Meets minimums, ChatGPT text in README. | L6 #477: 52/100. All pages, missing imports, 2-check self-test.", + "l5_score": 45, + "l6_score": 52 }, "aadyant-sood": { "name": "Aadyant Sood", - "level": 4, + "level": 6, "score": 13, "max": 13, "pct": 100, - "updated": "2026-04-21T12:00:00.000Z", - "note": "Level 4 submission merged. Score adjusted +1 for L4 completion." + "updated": "2026-05-12T10:00:00.000Z", + "note": "L5 #483: 62/100. Missing Q1, corrupted CSV data. | L6 #494: 68/100. All pages, Neo4j, MERGE. No SPOF flagging.", + "l5_score": 62, + "l6_score": 68 }, "jahanvi-gupta": { "name": "Jahanvi Gupta", @@ -145,7 +161,9 @@ "score": 6, "max": 13, "pct": 46, - "updated": "2026-04-19T19:31:57.552Z" + "updated": "2026-05-12T10:00:00.000Z", + "l4_score": 25, + "note": "L4 #460: 5/20 (25%). Broken, wrong directory, incomplete. Stays L3." }, "peeyush-raj": { "name": "Peeyush Raj", @@ -165,11 +183,13 @@ }, "anupaul-saikia": { "name": "Anupaul Saikia", - "level": 3, + "level": 5, "score": 8, "max": 13, "pct": 62, - "updated": "2026-04-20T08:13:51.567Z" + "updated": "2026-05-12T10:00:00.000Z", + "l5_score": 52, + "note": "L5 #492: 52/100. Inconsistent naming, Q3 no numbers." }, "pranu-chandra": { "name": "Pranu Chandra", @@ -221,20 +241,25 @@ }, "devika-hooda": { "name": "Devika Hooda", - "level": 3, + "level": 6, "score": 10, "max": 13, "pct": 77, - "updated": "2026-04-19T14:00:00.000Z" + "updated": "2026-05-12T10:00:00.000Z", + "l5_score": 60, + "note": "L5 #480: 60/100. Cites actual numbers Q3. | L6 #500: 72/100. All pages + self-test, Plotly, deployed.", + "l6_score": 72 }, "shourya-solanki": { "name": "Shourya Solanki", - "level": 4, + "level": 6, "score": 12, "max": 13, "pct": 92, - "updated": "2026-04-21T12:00:00.000Z", - "note": "Level 4 submission merged. Score adjusted +1 for L4 completion." + "updated": "2026-05-12T10:00:00.000Z", + "note": "L5 #478: 72/100. Solid across all 5 questions. | L6 #489: 85/100. Strong, forecast bonus, MERGE + constraints.", + "l5_score": 72, + "l6_score": 85 }, "sanskriti": { "name": "Sanskriti", @@ -247,12 +272,13 @@ }, "naman-anand": { "name": "Naman Anand", - "level": 3, + "level": 6, "score": 10, "max": 13, "pct": 77, - "updated": "2026-04-21T04:45:55.176Z", - "note": "L4 PR open (pending merge)." + "updated": "2026-05-12T10:00:00.000Z", + "note": "L6 #491: 82/100. Excellent, forecast bonus, SPOF flagging.", + "l6_score": 82 }, "vusala-akanksha": { "name": "Vusala Akanksha", @@ -276,7 +302,9 @@ "score": 11, "max": 13, "pct": 85, - "updated": "2026-04-20T11:52:52.810Z" + "updated": "2026-05-12T10:00:00.000Z", + "l6_score": 15, + "note": "L6 #497: 15/100. Code in wrong path. Stays at L3." }, "rahul-bijarnia": { "name": "Rahul Bijarnia", @@ -284,16 +312,19 @@ "score": 11, "max": 13, "pct": 85, - "updated": "2026-04-20T11:48:29.042Z" + "updated": "2026-05-12T10:00:00.000Z", + "l6_score": 5, + "note": "L6 #482: 5/100. Flask hello world. Stays at L3." }, "yash-maheshwari": { "name": "Yash Maheshwari", - "level": 3, + "level": 5, "score": 11, "max": 13, "pct": 85, - "updated": "2026-04-19T15:37:02.790Z", - "note": "L4 PR open (pending merge)." + "updated": "2026-05-12T10:00:00.000Z", + "note": "L5 #484: 70/100. Good WorkPackage model.", + "l5_score": 70 }, "ayush-sharma": { "name": "Ayush Sharma", @@ -321,39 +352,46 @@ }, "abhinav-chaudhary": { "name": "Abhinav Chaudhary", - "level": 4, + "level": 5, "score": 11, "max": 13, "pct": 85, - "updated": "2026-04-21T12:00:00.000Z", - "note": "Level 4 submission merged. Score adjusted +1 for L4 completion." + "updated": "2026-05-12T10:00:00.000Z", + "note": "L5 #501: 92/100. Exceptional.", + "l5_score": 92 }, "jaivardhan-singh": { "name": "Jaivardhan Singh", - "level": 4, + "level": 6, "score": 13, "max": 13, "pct": 100, - "updated": "2026-04-21T12:00:00.000Z", - "note": "Level 4 submission merged. Score adjusted +1 for L4 completion." + "updated": "2026-05-12T10:00:00.000Z", + "note": "L5 #485: 95/100. OUTSTANDING. | L6 #485: 68/100 (was 88, -20 for .env + secrets.toml with real Neo4j creds). Must remove before merge.", + "l5_score": 95, + "l6_score": 68 }, "praveen-singh": { "name": "Praveen Singh", - "level": 3, + "level": 6, "score": 12, "max": 13, "pct": 92, - "updated": "2026-04-21T06:15:10.476Z", - "note": "L4 PR open (pending merge)." + "updated": "2026-05-12T10:00:00.000Z", + "note": "L5 #488: 55/100. Standard but Q3 lacks numbers. | L6 #488: 60/100. All pages, MERGE. env handling issues.", + "l5_score": 55, + "l6_score": 60 }, "aryan": { "name": "Aryan", - "level": 3, + "level": 6, "score": 12, "max": 13, "pct": 92, - "updated": "2026-04-20T16:33:18.045Z", - "note": "L4 PR open (pending merge)." + "updated": "2026-05-12T10:00:00.000Z", + "note": "L5 #487: 57/100. Generic. Similarity flag with #488. | L6 #487: 58/100. All pages. Similar to #488.", + "l5_score": 57, + "l6_score": 58 }, "level-2---farhan-ahmed-siddique": { "name": "level-2 - Farhan Ahmed Siddique", @@ -389,11 +427,12 @@ }, "touqeer-hamdani": { "name": "Touqeer Hamdani", - "level": 3, + "level": 6, "score": 12, "max": 13, "pct": 92, - "updated": "2026-04-20T13:50:17.938Z" + "updated": "2026-05-10T10:03:00.000Z", + "note": "L6 deployed: l6-factory-dashboard-touqeerhamdani.streamlit.app. NO PR submitted \u00e2\u20ac\u201d told to submit via GitHub. Does not count until PR is in." }, "srishti-gusain-(track-a)": { "name": "Srishti Gusain (Track A)", @@ -418,8 +457,10 @@ "score": 13, "max": 13, "pct": 100, - "updated": "2026-04-21T12:00:00.000Z", - "note": "Manual correction: L1+L2+L3+L4 all merged. All 5 tracks, security audit, threat model, A2A. Auto-scorer missed due to placeholder GitHub username. Most complete submission in cohort." + "updated": "2026-05-12T10:00:00.000Z", + "note": "L5 25/100, L6 15/100. FAIL - fabricated data. Reverted to L4.", + "l5_score": 25, + "l6_score": 15 }, "bharath-raju": { "name": "Bharath Raju", @@ -431,11 +472,13 @@ }, "vansh-singhal": { "name": "Vansh Singhal", - "level": 3, + "level": 5, "score": 10, "max": 13, "pct": 77, - "updated": "2026-04-20T08:22:27.103Z" + "updated": "2026-05-12T10:00:00.000Z", + "l5_score": 58, + "note": "L5 #498: 58/100. Cross-copy suspected with #478." }, "yashika-vemra": { "name": "Yashika Vemra", @@ -447,11 +490,13 @@ }, "yashika-verma": { "name": "Yashika Verma", - "level": 3, + "level": 5, "score": 11, "max": 13, "pct": 85, - "updated": "2026-04-20T12:13:39.775Z" + "updated": "2026-05-12T10:00:00.000Z", + "l5_score": 58, + "note": "L5 #475: 58/100. Rich schema but Q3 lacks numbers." }, "mohammad-zishan-noor": { "name": "Mohammad Zishan Noor", @@ -479,12 +524,14 @@ }, "abhishek-sharma": { "name": "Abhishek Sharma", - "level": 3, + "level": 5, "score": 9, "max": 13, "pct": 69, - "updated": "2026-04-20T19:34:37.503Z", - "note": "Duplicate entry cleaned. Flag: PR #396 linked Jahanvi3005 repo — attribution unclear." + "updated": "2026-05-12T10:00:00.000Z", + "note": "L5 38/100, L6 0/100. SECURITY: committed .env with real Neo4j creds.", + "l5_score": 38, + "l6_score": 0 }, "sandeep-gunti": { "name": "Sandeep Gunti", diff --git a/docs/streams/PROGRAM-OVERVIEW.md b/docs/streams/PROGRAM-OVERVIEW.md new file mode 100644 index 000000000..530bb6eca --- /dev/null +++ b/docs/streams/PROGRAM-OVERVIEW.md @@ -0,0 +1,122 @@ +# WINNIIO Intern Program — May-June 2026 + +**35 interns | 3 streams | 8 weeks (May 9 – July 4)** +**Sprint 0 (everyone): L5/L6 due Tuesday May 13** +**Stream work begins: Friday May 16** + +--- + +## Sprint 0: L5/L6 — Universal Foundation (May 6-13) + +ALL interns complete L5 (written) + L6 (build) regardless of stream. + +| What | Where | +|------|-------| +| L5 Brief (Graph Thinking) | `challenges/level5-knowledge-graph-foundations.md` | +| L6 Brief (Build It) | `challenges/level6-build-a-knowledge-graph.md` | +| Scoring Guide | `challenges/scoring-guide-l5l6.md` | +| Data (3 CSVs) | `challenges/data/` | +| Submissions | `submissions//level5/` and `level6/` | + +**Skills taught:** Neo4j, Cypher, Streamlit, Plotly, deployment, hybrid vector+graph thinking. + +--- + +## The 3 Streams + +### Stream A — Boardy AI Superconnector (10 interns) + +| | | +|---|---| +| **Brief** | `docs/streams/stream-a-boardy.md` | +| **Repo** | [LifeAtlas/lifeatlas-boardy](https://github.com/LifeAtlas/lifeatlas-boardy) | +| **Customer** | Nicolas (internal — WINNIIO ecosystem) | +| **What** | AI-powered professional matching using Neo4j + Vector DB + Hybrid RAG | +| **Week 1 demo** | Top 10 intern pairings with SMILE-reasoned explanations | + +**Interns:** Sanskriti, Shubham Kumar, Shourya Solanki, Yash Maheshwari, Praveen Singh, Aryan, Ankit Kumar Singh, Jahanvi Gupta, Vansh Singhal, Adil Islam + +--- + +### Stream B — DataPro+ DealFlow (13 interns) + +| | | +|---|---| +| **Brief** | `docs/streams/stream-b-datapro-dealflow.md` | +| **Repos** | [LifeAtlas/datacenter-flow](https://github.com/LifeAtlas/datacenter-flow) (frontend) + [LifeAtlas/DataCenterBackend](https://github.com/LifeAtlas/DataCenterBackend) (backend) | +| **Customer** | Josh Young (DataProPlus / EnergyJobline, UK) | +| **What** | Gated datacenter marketplace with 3-layer access control, NDA workflow, referral engine | +| **Week 1 demo** | Clone running locally, Supabase schema designed, "Request More Info" workflow planned | + +**Interns:** Daksh Garg, Abhinav Chaudhary, Varshit Pratap Singh Bhadauria, Touqeer Hamdani, Aditi Mehta, Ananyaa M, Priyanshu Bhardwaj, Rahul Bijarnia, Khushi Garg, Lavanya Parashar, Saima Afroz, Sonal Yadav, V Bharath Raju + +--- + +### Stream C — Industrial Twin Dashboard (12 interns) + +| | | +|---|---| +| **Brief** | `docs/streams/stream-c-industrial-twin-dashboard.md` | +| **Repo** | [LifeAtlas/factory-twin-dashboard](https://github.com/LifeAtlas/factory-twin-dashboard) | +| **Customer** | Nicolas (internal — platform play for 160+ Swedish factories) | +| **What** | Generic production planning dashboard replacing Excel, with knowledge graph + 3D visualization | +| **Week 1 demo** | L6 dashboard extended to 6 views, factory floor 3D model started | + +**Interns:** Aadyant Sood, Jaivardhan Singh, Harshit Kumar, Kailash Narayana Prasad, Sania Gurung, Yashika Verma, Naman Anand, Dia Vats, Devika Hooda, Srishti Gusain, Anupaul Saikia, Abishek Sharma + +--- + +## Timeline + +| Date | Milestone | +|------|-----------| +| **Tue May 6** | L5/L6 challenges live. Interns start. | +| **Tue May 13** | L5/L6 deadline. Submissions via PR. | +| **Wed May 14** | L5/L6 scored. Stream adjustments if needed. | +| **Fri May 16** | Stream work begins. Week 1 demos. | +| **Fri May 23** | Week 2 demos. All streams productive. | +| **Fri May 30** | Week 3 demos. First external stakeholder feedback. | +| **Fri Jun 6** | Week 4 demos. Integration milestones. | +| **Fri Jun 13** | Week 5 demos. Advanced features. | +| **Fri Jun 20** | Week 6 demos. Testing + client feedback. | +| **Fri Jun 27** | Week 7 demos. Hardening + deployment. | +| **Fri Jul 4** | Final demos. Handoff. Retrospective. | + +--- + +## Scoring & Tracking + +| Metric | How | Cadence | +|--------|-----|---------| +| PRs merged | GitHub API scan across all 3 stream repos | Weekly (Friday 14:00 IST) | +| Lines changed | Same scan | Weekly | +| Reviews given | Same scan | Weekly | +| Streak weeks | Consecutive weeks with merged PR | Rolling | +| L5/L6 scores | Manual scoring by leads | One-time (May 14) | + +LPI leaderboard: `lpi-developer-kit/docs/index.html` + +--- + +## Key Principles + +1. **L5/L6 is Sprint 0** — teaches the universal toolchain (Neo4j + Streamlit + Vector + Deploy) +2. **Impact first, data last** — start with the decision, work backward to the data +3. **MVT (Minimal Viable Twin)** — one real answer from one real query = your first twin +4. **Daily commits** — no silent days +5. **Weekly demos** — every Friday, show what works +6. **Staging workflow** — feature branch off staging → PR to staging → never touch main + +--- + +## Management + +``` +Nicolas (CEO) — vision, stakeholder relationships, weekly demo review + └── Danial (Tech Lead) — code review, architecture decisions, GitHub admin + ├── Stream A leads — Boardy coordination + ├── Stream B leads — DataPro+ coordination + └── Stream C leads — Industrial Twin coordination +``` + +**Communication:** WhatsApp for async. Weekly demo Fridays. Danial for technical questions. diff --git a/docs/streams/stream-a-boardy.md b/docs/streams/stream-a-boardy.md new file mode 100644 index 000000000..d810c9248 --- /dev/null +++ b/docs/streams/stream-a-boardy.md @@ -0,0 +1,176 @@ +# Stream A — Boardy AI Superconnector + +**Customer:** Nicolas (internal — WINNIIO ecosystem) +**Repo:** `LifeAtlas/lifeatlas-boardy` +**Team Size:** 10 interns +**Duration:** May 16 – June 27, 2026 +**Reference product:** boardy.ai + +--- + +## Background + +### The Problem + +Professional networking is broken. LinkedIn connects people who are SIMILAR. But the most valuable introductions connect people whose REALITIES COMPLEMENT each other — someone who has what you need, and needs what you have, RIGHT NOW. + +No platform does timing-aware, context-sensitive, explained introductions at scale. + +### The Concept + +**Boardy.ai** is an AI that predicts who you should meet and makes the introduction — via conversational interface, proactively, with reasoning. + +**WINNIIO's version:** Same concept, powered by SMILE methodology. The matching isn't keyword similarity — it's reality-aware, timing-sensitive, and explains its reasoning. + +### Cold Start Solution + +Every matching system dies without data. Our unfair advantage: **we already have a real network.** + +| Data source | What it gives us | Available now? | +|-------------|-----------------|:-:| +| 32 intern soul files | Skills, interests, tracks, identity statements | YES | +| Stream assignments | Who's working on what, who needs what help | YES | +| Meeting transcripts | Who spoke, engagement signals | YES | +| GitHub activity | Actual contribution patterns | Week 2+ | +| Daily standups | Blockers, progress, collaboration signals | Week 2+ | + +**Day 1 network = the intern cohort. Day 30 network = WINNIIO's full ecosystem.** + +--- + +## SMILE-Powered Matching vs Normal Matching + +| Normal matching | SMILE matching | +|----------------|---------------| +| Cosine similarity on profile embeddings | What's each person's REALITY right now? | +| Static: "you're both in AI" | Hypothesis: "If A meets B, expected outcome is X" — testable | +| One signal (skills overlap) | Multi-signal: skills + needs + timing + personality + complementarity | +| Profile-based | Context-based: "A just hit a blocker that B solved yesterday" | +| Reactive: user searches | Proactive: "Something changed. This intro is now more relevant." | + +**The SMILE difference:** We don't match people who are SIMILAR — we match people whose REALITIES complement each other RIGHT NOW. + +--- + +## Architecture — Neo4j + Vector DB + Hybrid RAG + +``` +Input Layer: + ├── Soul files (structured profiles) + ├── Conversation agent (learns needs/offers over time) + └── Activity signals (git, standups, meetings) + +Knowledge Layer: + ├── Neo4j Graph: + │ ├── Nodes: Person, Skill, Interest, Project, Track + │ ├── Edges: HAS_SKILL, NEEDS, OFFERS, WORKS_ON, COLLABORATED_WITH + │ └── Graph algorithms: community detection, centrality, structural holes + │ + ├── Vector DB (Qdrant or pgvector): + │ ├── Profile embeddings (identity + skills + interests) + │ ├── Need embeddings / Offer embeddings + │ └── Semantic similarity matching + │ + └── Hybrid RAG Matching: + ├── Graph retrieval: structural matches (network position, communities) + ├── Vector retrieval: semantic matches (meaning similarity) + ├── Fusion: weighted combination of both signals + └── LLM Re-ranker: scores top candidates with SMILE reasoning + +Output Layer: + ├── Introduction message (personalized, contextual) + ├── Match reasoning (transparent: "Graph says X + Semantics say Y → therefore Z") + └── Feedback capture (did it work? → updates weights) +``` + +--- + +## The Hybrid RAG Pipeline + +``` +Step 1 — Graph Retrieval (Neo4j): + MATCH (a:Person)-[:HAS_SKILL]->(s)<-[:NEEDS]-(b:Person) + WHERE a <> b AND NOT (a)-[:CONNECTED_TO]-(b) + → Candidates from structural opportunity + +Step 2 — Vector Retrieval: + embed(A.needs) → nearest neighbors in offers_index + → Candidates from semantic similarity + +Step 3 — Hybrid Fusion: + graph_score + vector_score + timing_score + reciprocity + → Unified ranked list + +Step 4 — LLM Re-rank: + Top 20 → Claude: "Rate match quality, explain WHY using SMILE reasoning" + → Final top 10 with explanations +``` + +--- + +## Progressive Build + +### Phase A: Static Matching (Week 1) +- Input: 32 soul files (already exist) +- Output: Top 10 recommended pairings with SMILE-reasoned explanations +- Validation: Nicolas evaluates — "yes that's smart" or "no that's wrong" +- No LLM needed for scoring. LLM only for generating explanation text. + +### Phase B: Conversational (Week 2-3) +- User messages Boardy → asks questions → builds reality canvas +- Matches recalculate based on new information +- Proactive: "Based on what you just told me, you should meet X" + +### Phase C: Live Network (Week 4-6) +- Activity signals (git, standups) feed into graph automatically +- Feedback loop: "Did this intro help?" → improves scoring +- Multi-vertical config (extend beyond intern network) + +### Phase D: Platform (Week 7-8) +- Polish, hardening, demo-ready +- Evaluation metrics: does hybrid beat graph-only? Beat vector-only? +- Documentation and handoff + +--- + +## What Makes This Not "LinkedIn People You May Know" + +1. **Complementarity over similarity.** Matches people whose gaps fill each other's strengths. +2. **Timing-aware.** Matches change as situations change. +3. **Explains WHY.** Every intro comes with SMILE-reasoned explanation. +4. **Reciprocal.** Won't suggest a match unless BOTH sides benefit. +5. **Domain-aware.** Understands manufacturing vs health vs telecom. +6. **Proactive.** Notices changes and surfaces opportunities. + +--- + +## Connection to L5/L6 + +| L5/L6 Component | Stream A Extension | +|-----------------|-------------------| +| Neo4j graph schema (L5 Q1) | Person-Skill-Need-Offer knowledge graph | +| Vector + Graph hybrid (L5 Q4) | The entire matching engine | +| Streamlit dashboard (L6) | Match visualization + network graph | +| Self-test page (L6) | Automated match quality evaluation | + +**L5 Bonus A** is specifically designed for this stream: model 5 intern profiles as a knowledge graph, write a complementary-skills query. + +--- + +## Budget + Constraints + +- Week 1 API cost: ~$2-5. Scoring is pure Python. Only explanations use LLM. +- Week 2+: Conversation agent adds cost. Budget: $10/day cap. +- Token tracking from Day 1. +- No hallucinated profiles. Use only soul file data. +- Prompts versioned in `prompts/`. + +--- + +## Rules + +- The intern network is your test data AND your first product. +- Impact first, data last. "What introduction changes someone's week?" +- MVT: One good match, well-explained. That's your twin. +- Daily commits. Weekly demos. +- Every match must explain WHY — no black-box recommendations. diff --git a/docs/streams/stream-b-datapro-dealflow.md b/docs/streams/stream-b-datapro-dealflow.md new file mode 100644 index 000000000..f24928bd5 --- /dev/null +++ b/docs/streams/stream-b-datapro-dealflow.md @@ -0,0 +1,260 @@ +# Stream B — DataPro+ DealFlow Platform + +**Customer:** Josh Young (DataProPlus / EnergyJobline, UK) +**Repos:** `LifeAtlas/datacenter-flow` (frontend) + `LifeAtlas/DataCenterBackend` (backend) +**Team Size:** 13 interns +**Duration:** May 16 – June 27, 2026 + +--- + +## Background + +### The Problem + +The global datacenter market is exploding — driven by AI, cloud computing, and data sovereignty requirements. But the **deal-making process** for datacenter capacity, land, and investment is stuck in the 1990s: + +- Opportunities circulate via email, WhatsApp, and spreadsheets +- There's no central platform where buyers and sellers can discover each other +- Sensitive site data (exact location, power contracts, pricing) must be controlled — you can't just put it on a website +- Brokers like Josh manually coordinate introductions between parties +- Due diligence documents are shared as email attachments with no tracking + +**Nothing like this exists.** The incumbents (CoStar, CBRE) focus on traditional real estate. Nobody has built a purpose-built datacenter dealflow platform with gated access and digital twin visualization. + +### The Opportunity + +Josh Young has been brokering datacenter deals across Europe, Middle East, and Asia for years. He has: +- **Live opportunities ready to list** (sites in UK, Nordics, Middle East) +- **A network of buyers** (hyperscalers, colocation providers, investors) +- **A proven revenue model**: introduction fees of 1-3% per deal (deals range EUR 500K to multi-million) + +One successful deal through the platform pays for everything. + +### What Already Exists + +Daniel W built an initial demo at `datacenter-flow.netlify.app` in late 2025. It shows: +- A map-based interface for browsing datacenter opportunities +- Deal cards with key parameters (MW, location, status) +- Basic investor view + +The codebase is clean: React 19 + Vite 6 + Tailwind, ~6K lines, 60 files. **You can read all of it on day 1.** + +### What Actually Exists (Honest Audit — May 6, 2026) + +**Frontend (`datacenter-flow`) — Solid browsing shell, ~70% of UI:** +- Supabase auth fully wired (AuthContext, session management, signOut) +- Real data loading from Supabase (deals + 7 related tables: deal_power, deal_land, deal_permits, deal_funding, deal_offtake, deal_specs, documents) +- Deal CRUD in deal-service.ts (create with all nested parameters) +- HomePage: category/region/stage filtering, search, sort, deal cards +- DealPage: detailed view with all parameter sections +- MapPage: Leaflet map with deal locations +- InvestorPage: investor-facing view +- DealEditForm: admin can add/edit deals +- LockedContent component: blur effect + lock overlay — but **FAKE unlock** (setTimeout, not real access control) +- ShareConfigModal: sharing with visibility controls +- Rich TypeScript types: 15+ interfaces for deal parameters + +**Backend (`DataCenterBackend`) — Almost nothing:** +- Single endpoint: `POST /extract-data` → uploads PDFs to Gemini for structured extraction +- Rich Pydantic schema (17 nested models matching frontend types) +- No CRUD endpoints, no auth, no database connection, no user management +- CORS wide open (`allow_origins=["*"]`) + +**What's NOT built (the 90% that matters for Josh):** +- Real 3-layer access control (LockedContent is cosmetic only) +- NDA workflow (no signing, no tracking, no per-opportunity access) +- Admin panel (no user approval, no engagement tracking) +- Lead capture ("Request More Info" → email to Josh) +- User tracking (IP/session/activity) +- Link sharing prevention +- Referral/introduction layer +- Backend API for any of the above +- Supabase RLS policies +- No Supabase project currently connected (env vars needed) + +**Last pushed: January 9, 2026 — 4 months stale.** + +--- + +## The 3-Layer Information Control Model + +This is the core innovation. Every opportunity has three layers of visibility: + +### Layer 1 — Public Teaser +Anyone can see: country/region, approximate MW capacity, power status, planning stage, infrastructure overview. **No exact location. No pricing. No owner identity.** + +Purpose: attract interest, let buyers browse and filter. + +### Layer 2 — After NDA +After a user creates an account and signs a digital NDA, they unlock: detailed site intelligence, media packs, technical documents, financial summaries. + +Purpose: serious buyers can evaluate the opportunity before committing to an introduction. + +### Layer 3 — After Approval +Josh (the platform operator) manually approves a buyer for a specific opportunity. They then get: exact location, direct introduction to the site owner, meeting coordination. + +Purpose: protect the deal. Josh earns his introduction fee at this stage. + +### Why This Matters Architecturally + +This isn't a simple CRUD app. You're building: +- **Role-based access control** (viewer → registered → NDA-signed → approved) +- **Per-opportunity permissions** (approved for Site A doesn't mean approved for Site B) +- **Audit trail** (who viewed what, when, from where) +- **Document management** with access tiers +- **Lead qualification workflow** for the platform operator + +--- + +## Three Opportunity Categories + +| Category | What | Key Fields | +|----------|------|------------| +| **A: Capacity Leasing** | Existing datacenter with MW available | Location (city-level), MW available, power source, PUE, connectivity, timeline | +| **B: Land / Powered Land** | Sites suitable for new builds | Acreage, power availability, planning status, distance to fiber/substation | +| **C: Investment / JV** | Projects seeking capital or partners | Capital required, project stage, expected ROI, partner requirements | + +--- + +## Week 0 — This Week (May 6-13): Study + L5/L6 + +This week you're doing L5/L6 (individual LPI challenges) AND studying up on DataPro+. + +### Required Reading (before May 16) +1. **Clone both repos** and run the frontend locally +2. **Read every file in `src/types/deal.ts`** — understand the data model +3. **Read `src/lib/deal-service.ts`** — understand the Supabase integration +4. **Read `src/components/deals/LockedContent.tsx`** — understand the current (fake) access control +5. **Read the backend `schemas.py`** — understand the Pydantic model +6. **Read this entire document** — understand Josh's vision and the business model + +### Questions to Answer (bring to first standup May 16) +1. What tables does the frontend expect in Supabase? (hint: look at deal-service.ts) +2. What's the gap between LockedContent.tsx and Josh's 3-layer access model? +3. How would you implement "Request More Info" → lead capture → email to Josh? +4. What's missing from the backend to support the frontend's needs? + +--- + +## Your Mission (8 Weeks, starting May 16) + +Transform the existing demo into a **production gated marketplace** that Josh can use to list real opportunities and manage real buyer relationships. + +**What you're inheriting:** A working browsing UI with Supabase auth, deal CRUD, maps, and filtering. The frontend is ~70% done for browsing. What's missing is everything that makes it a CONTROLLED platform — access control, NDA, lead capture, admin panel, and backend APIs. + +### Week 1-2: Real Access Control + Backend +- Set up NEW Supabase project for DataPro+ +- Migrate schema from frontend types → real tables with RLS +- Replace fake LockedContent with real per-user, per-opportunity access tiers +- Build "Request More Info" → account creation → lead capture +- Backend: user CRUD, opportunity CRUD, role management endpoints + +### Week 3-4: NDA + Admin Panel +- Digital NDA signing workflow (in-platform, stored per user + per opportunity) +- Admin panel: approve/reject users, manage opportunities, track engagement +- Email notifications to Josh on new leads +- Per-opportunity approval system (approved for Site A ≠ approved for Site B) + +### Week 5-6: Intelligence + Referral Layer +- Improve Gemini PDF extraction pipeline (backend already has the endpoint) +- Auto-categorize opportunities (A: Capacity / B: Land / C: Investment) +- "Make an Introduction" referral workflow (Josh's May 4 feature request) +- Referral tracking (who referred whom, deal linkage) + +### Week 7-8: Polish + Launch +- Security hardening (IP tracking, session monitoring, link sharing prevention) +- Mobile responsive pass +- Digital twin visualization (3D site viewer — the USP) +- **Goal: Josh lists first real opportunity** + +--- + +## The Future + +DataPro+ DealFlow is the first vertical on the Life Atlas platform. The pattern you're building — **gated marketplace + knowledge graph + digital twin** — applies to: + +- **Energy infrastructure** (solar farms, wind, grid connections) +- **Manufacturing facilities** (factory capacity, equipment, workforce) +- **Real estate development** (commercial, industrial, mixed-use) +- **Any asset class** where deals require controlled access, due diligence, and broker-mediated introductions + +The datacenter market is the beachhead. If this works, the platform scales to every asset class where information asymmetry is the business model. + +### Technology Vision + +What you build here connects to the broader Life Atlas architecture: + +- **Knowledge Graph (Neo4j)** — relationships between sites, buyers, sellers, deals, documents, capabilities. Not just a database of listings — a graph of the entire market. +- **Vector Search (Qdrant)** — "Find sites similar to this one" using embeddings of site descriptions, specs, and documents. Buyer profiles matched to relevant opportunities. +- **Digital Twin** — 3D visualization of datacenter sites (power infrastructure, cooling, connectivity, expansion potential). This is the USP that no competitor has. +- **LPI (Life Programmable Interface)** — every query goes through Life Atlas's sovereign consultation layer. Rate limiting, access control, audit logging, and AI guardrails baked in. + +### Revenue Projections + +| Revenue Stream | Per-Deal / Per-Month | Notes | +|----------------|---------------------|-------| +| Introduction fees | 1-3% of deal value | EUR 500K – multi-million per deal | +| Listing fees | Per opportunity | Site owners pay to list | +| Subscription | $10K/month | For brokers, developers, hyperscalers | +| Data intelligence | Per report | Market analysis, site comparison | + +**The goal is that one deal funds the platform.** Everything after that is margin. + +--- + +## Phase 2: Referral / Introduction Layer (May 4, 2026) + +Josh's latest feature request — a network-powered deal sourcing engine: + +### Concept +Allow trusted individuals to **introduce buyers or opportunities** to the platform. In return, they participate in a share of the success fee if a deal completes. + +### Key Principles +- This is NOT a public referral marketplace or contact database +- It IS controlled, private, curated, managed through Josh & Rich +- Everything still flows through the platform as the introducer +- Positioning: "Introduce relevant parties and participate in opportunities" (NOT "submit contacts and earn money") + +### User Flow + +**Refer a Buyer:** +1. User clicks "Make an Introduction" +2. Form: Name, Email, Company, Type (Buyer/Investor/Hyperscaler), Relationship level (Direct/Indirect), Free text context +3. Submission goes to admin (Josh) — NOT visible publicly +4. Josh reviews, qualifies, and if relevant: asks user to make proper introduction +5. Josh takes over the process, manages all communication + +**Refer an Opportunity:** +Same flow but for Land / Power / Capacity / Investment opportunities. + +### Fee Model +- Referrer receives a % of the deal fee +- Example: $100M deal, 1.5% fee ($1.5M), referrer gets 0.25–0.5% ($250K–$500K) +- Manual payouts initially — just tracking capability needed + +### Inspiration +BountyHunter (bountyhunterworld.com) — Josh wants people to invest in sharing opportunities with their LinkedIn and personal networks in return for a slice of the fee. + +### Why This Matters +This turns the platform from direct-outreach-only into a **network-powered dealflow engine**. Josh's observation: "Everyone I speak to says 'I know people' — this monetises that." + +--- + +## Key Links + +- **Existing demo:** datacenter-flow.netlify.app +- **Frontend repo:** github.com/LifeAtlas/datacenter-flow +- **Backend repo:** github.com/LifeAtlas/DataCenterBackend +- **Backend API:** dc-backend.lifeatlas.online + +--- + +## What Makes This Real + +This is not a school project. Josh has: +- Live opportunities waiting to list +- Buyers asking for a platform +- Revenue model validated through years of manual brokering +- 5+ high-level datacenter contacts who confirmed "nothing like this exists" + +Your code will serve real users, process real deals, and generate real revenue. Treat it accordingly. diff --git a/docs/streams/stream-c-industrial-twin-dashboard.md b/docs/streams/stream-c-industrial-twin-dashboard.md new file mode 100644 index 000000000..6c34451c3 --- /dev/null +++ b/docs/streams/stream-c-industrial-twin-dashboard.md @@ -0,0 +1,228 @@ +# Stream C — Industrial Twin Dashboard + +**Customer:** Nicolas (internal — platform play) +**Repo:** `LifeAtlas/factory-twin-dashboard` (to be created) +**Team Size:** 12 interns +**Duration:** May 16 – June 27, 2026 + +--- + +## Background + +### The Problem + +Sweden has ~160 steel and composite fabrication companies (SBI members). Nearly all of them manage production planning in Excel — sometimes 40-50 sheet workbooks maintained by a single person. If that person gets sick, the entire production visibility for a 100M+ SEK company disappears. + +This is not unique to steel. Manufacturing across Europe runs on manual spreadsheets, tribal knowledge, and single-person dependencies. The factory floor has sensors and machines, but the planning layer is still a person with a laptop. + +### The Opportunity + +Life Atlas is building a **reusable industrial digital twin dashboard** that: +- Replaces manual Excel production planning with automated dashboards +- Models the factory as a knowledge graph (projects, stations, workers, products, capacity) +- Visualizes production in 3D (factory floor, station layouts, load heatmaps) +- Predicts bottlenecks before they happen +- Eventually simulates "what-if" scenarios (move 2 welders from station A to B — what happens?) + +The first proof point is a Swedish steel fabrication company (real client, paying engagement). Your job is to build the **generic platform** that works for ANY factory — not just one customer. + +### Why This Matters for Sweden + +From today's industry discussions: +- 160+ metal fabrication companies in Sweden alone use Excel for production planning +- Production resilience is a national security concern (defense customers include major contractors) +- Swedish data sovereignty requirements mean cloud solutions must keep data in-country +- Edge-native, local-first architecture is not optional — it's the business model + +### What You're Building On + +**L5/L6 is your foundation.** The factory CSV data you're working with this week (8 projects, 9 stations, 13 workers, 8 weeks) is a simplified version of real production planning data. Your Neo4j graph schema from L5 and your Streamlit dashboard from L6 are literally the starting point for Stream C. + +--- + +## The Data Model + +A typical steel fabrication factory has: + +| Entity | Description | Example | +|--------|-------------|---------| +| **Project** | Construction job with a delivery deadline | "Office Building Gothenburg" | +| **Product Type** | What's being manufactured (7 types) | IQB (beams), IQP (columns), SB (special), etc. | +| **Station** | Production area in the factory (9-10) | Welding, Assembly, Casting, Painting | +| **Worker** | Person with station certifications | Certified for stations 011, 013, 016 | +| **Week** | Time period for planning | Week 1-52, planned vs actual hours | +| **Etapp** | Delivery phase within a project | Phase 1 (foundations), Phase 2 (structure) | + +### Key Relationships +- Projects produce Products (with quantities and unit factors) +- Projects are scheduled at Stations (with planned vs actual hours per week) +- Workers work at Stations (primary + can cover others) +- Weeks have Capacity (own staff + hired + overtime vs demand) +- Stations have Bottleneck alerts when actual > planned + +### The 17 Formulas + +Real production planning uses ~17 core calculations: +- Total time per project = sum(quantity × factor) per product type +- Weekly delta = accumulated hours this week minus last week +- Variance = actual station hours vs planned +- Capacity deficit = (own + hired + overtime) - total demand +- Station load distribution across weeks +- Gjutning/Painting proportional allocation splits +- Average time per unit (rolling and weekly) + +These are documented. Your job is to implement them in Python, not invent them. + +--- + +## Architecture + +``` +Excel / CSV Import (openpyxl, pandas) + → PostgreSQL / Neo4j (structured + graph) + → Python Engine (17 formulas) + → Streamlit Dashboard (6+ views) + → 3D Visualization (Cesium / Three.js) +``` + +### Stack +- **Python 3.12** + pandas + openpyxl +- **Streamlit** for dashboards (Plotly for charts) +- **Neo4j** for knowledge graph (relationships, traversals, what-if queries) +- **PostgreSQL** for tabular data (time series, capacity tracking) +- **Cesium** or **Three.js** for 3D factory floor visualization + +--- + +## Dashboard Views (6 minimum) + +### 1. Capacity Overview +- Weekly staffing: own staff, hired, overtime +- Surplus/deficit per week (color-coded: green/yellow/red) +- Rolling 8-week forecast + +### 2. Project View +- All active projects with plan vs actual hours +- Time series: planned trajectory vs actual trajectory +- Variance percentage per project + +### 3. Production Volume +- Product type × week × station matrix +- Etapp breakdown (which delivery phase) +- Completed units vs remaining + +### 4. Station Load +- 9-10 stations, hours per week, color-coded by utilization +- Highlight overloaded stations (actual > planned by >10%) +- Worker assignment per station + +### 5. Average Times +- Accumulated and weekly average time per product type +- Trend lines showing if efficiency is improving or degrading + +### 6. Worker Coverage +- Matrix: which workers can cover which stations +- Single-point-of-failure alerts (station with only 1 certified worker) +- Certification gaps + +--- + +## Week 0 — This Week (May 6-13): L5/L6 + Study + +### L5/L6 (Individual — LPI Leaderboard) +Everyone does L5 (graph thinking) and L6 (build factory dashboard). This IS your Stream C onboarding: +- L5 Q1: Design the graph schema → this becomes your Stream C data model +- L5 Q5: Write your L6 blueprint → this becomes your Stream C architecture +- L6: Build the Streamlit dashboard → this is literally what you'll extend in Stream C + +### Study Up +1. Read about digital twins in manufacturing (what they are, why they matter) +2. Understand Neo4j graph modeling for production planning +3. Look at Cesium (cesium.com) for 3D geospatial visualization +4. Read about Swedish manufacturing (SBI, steel fabrication, production stations) + +--- + +## Your Mission (8 Weeks, starting May 16) + +### Sub-Teams (suggested, 12 interns) + +| Sub-Team | Focus | Size | +|----------|-------|------| +| **Dashboard Core** | Streamlit views, Plotly charts, 17 formula engine | 4 | +| **Data Pipeline** | Excel parser, CSV/DB import, Neo4j graph seeding | 3 | +| **Analytics & Alerts** | Bottleneck detection, capacity forecasting, what-if scenarios | 3 | +| **3D Visualization** | Cesium/Three.js factory floor, station layout, load heatmaps | 2 | + +### Week 1-2: Foundation +- Extend L6 dashboard from 4 views to 6 views +- Build Excel parser (openpyxl) for real-format production spreadsheets +- Set up PostgreSQL + Neo4j for dual storage +- 3D team: first factory floor model (rectangles for stations, positioned in grid) + +### Week 3-4: Formula Engine + Graph Queries +- Implement all 17 production formulas in Python +- Power dashboards from Neo4j queries (not raw CSV) +- Capacity forecasting: "Given current trajectory, which station overloads in week 9?" +- 3D team: color-code stations by load, click station → show projects + +### Week 5-6: Intelligence Layer +- Bottleneck alerts (automated detection when actual > planned by threshold) +- What-if scenarios: "Move 2 workers from station 014 to 013 — what happens?" +- Worker coverage gap analysis +- 3D team: embed visualization in Streamlit dashboard + +### Week 7-8: Polish + Platform +- Make the dashboard configurable for ANY factory (not hardcoded to one dataset) +- Template system: upload your Excel → get your dashboard +- Performance optimization, mobile responsive +- **Goal: Demo to a real factory owner** + +--- + +## The Future + +This dashboard is the **beachhead product** for Life Atlas's industrial digital twin platform. + +### Phase 2: Simulation + Optimization (post-intern) +- What-if scenarios with constraint optimization +- Predictive bottleneck detection using historical patterns +- Copilot queries: "Which projects are at risk this month?" +- Integration with real factory systems (AGDA PS for station hours, Visma Net for cost/revenue) + +### Phase 3: Full Digital Twin +- Real-time sensor data integration +- 3D walkthrough of factory with live production status +- Multi-factory visualization (combine physical sites into unified virtual factory) +- Edge-native deployment (data stays in-country, on-premise option) + +### Scale +- 160 SBI member factories in Sweden alone +- Thousands of similar manufacturers across Europe +- Every factory that runs on Excel is a potential customer +- The platform pattern applies to ANY production environment (not just steel) + +--- + +## Connection to L5/L6 + +| L5/L6 Component | Stream C Extension | +|-----------------|-------------------| +| Neo4j graph schema (L5 Q1) | Production knowledge graph with all 17 formulas | +| SQL vs Cypher comparison (L5 Q2) | Graph-powered dashboard queries | +| Bottleneck analysis (L5 Q3) | Automated alert system | +| Vector + Graph hybrid (L5 Q4) | "Find similar past projects" for new order estimation | +| Streamlit dashboard (L6) | 6-view production planning dashboard | +| Self-test page (L6) | CI/CD quality gates | + +**L5/L6 is not homework. It's your first sprint.** + +--- + +## Key Context + +- This is a PLATFORM play, not a one-off dashboard. Think "Shopify for factory dashboards." +- The synthetic data in `challenges/data/` mirrors real production planning patterns +- Real customers exist and are paying — but interns work on the GENERIC platform, not on customer-specific data +- 3D visualization is the differentiator that no competitor has +- Swedish data sovereignty is a requirement, not a nice-to-have