forked from Mroziu12/DVP
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcalculateBoxplotData.js
More file actions
233 lines (189 loc) · 7.12 KB
/
calculateBoxplotData.js
File metadata and controls
233 lines (189 loc) · 7.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
/**
* Calculate Boxplot Statistics for Skill Level vs Salary
*
* This script processes ClearOffers.json to calculate boxplot statistics
* (min, Q1, median, Q3, max) for salary distribution across different skill levels.
*
* Usage: node calculateBoxplotData.js
* Output: boxplotData.js (JavaScript file with data embedded)
*/
const fs = require('fs');
const path = require('path');
// Read the ClearOffers.json file
const offersPath = path.join(__dirname, 'ClearOffers2.json');
const offers = JSON.parse(fs.readFileSync(offersPath, 'utf8'));
console.log(`Loaded ${offers.length} offers from ClearOffers.json`);
/**
* Calculate quartiles for an array of numbers
* @param {number[]} sortedArray - Sorted array of numbers
* @returns {Object} Object containing min, q1, median, q3, max, whiskerMin, whiskerMax, outliers
*/
function calculateQuartiles(sortedArray) {
if (sortedArray.length === 0) {
return { min: 0, q1: 0, median: 0, q3: 0, max: 0, whiskerMin: 0, whiskerMax: 0, outliers: [], count: 0 };
}
const min = sortedArray[0];
const max = sortedArray[sortedArray.length - 1];
const median = getPercentile(sortedArray, 50);
const q1 = getPercentile(sortedArray, 25);
const q3 = getPercentile(sortedArray, 75);
// Calculate IQR and whisker limits
const iqr = q3 - q1;
const whiskerLowerLimit = q1 - 1.5 * iqr;
const whiskerUpperLimit = q3 + 1.5 * iqr;
// Find actual whisker positions (furthest points within limits)
let whiskerMin = q1;
let whiskerMax = q3;
const outliers = [];
sortedArray.forEach(value => {
if (value < whiskerLowerLimit || value > whiskerUpperLimit) {
outliers.push(value);
} else {
// Update whisker positions
if (value < whiskerMin) whiskerMin = value;
if (value > whiskerMax) whiskerMax = value;
}
});
return {
min: Math.round(min * 100) / 100,
q1: Math.round(q1 * 100) / 100,
median: Math.round(median * 100) / 100,
q3: Math.round(q3 * 100) / 100,
max: Math.round(max * 100) / 100,
whiskerMin: Math.round(whiskerMin * 100) / 100,
whiskerMax: Math.round(whiskerMax * 100) / 100,
outliers: outliers.map(v => Math.round(v * 100) / 100),
count: sortedArray.length
};
}
/**
* Get percentile value from sorted array
* @param {number[]} sortedArray - Sorted array of numbers
* @param {number} percentile - Percentile to calculate (0-100)
* @returns {number} Percentile value
*/
function getPercentile(sortedArray, percentile) {
const index = (percentile / 100) * (sortedArray.length - 1);
const lower = Math.floor(index);
const upper = Math.ceil(index);
const weight = index - lower;
if (lower === upper) {
return sortedArray[lower];
}
return sortedArray[lower] * (1 - weight) + sortedArray[upper] * weight;
}
/**
* Process offers for a specific skill and calculate boxplot data
* @param {string} skillName - Name of the skill to filter by
* @returns {Object} Boxplot data grouped by skill level
*/
function calculateBoxplotDataForSkill(skillName) {
console.log(`\nProcessing skill: ${skillName}`);
// Group salaries by skill level for specific skill
const salaryByLevel = {
1: [],
2: [],
3: [],
4: [],
5: []
};
// Process each offer
offers.forEach(offer => {
// Skip offers without salary
if (!offer.salary_eur || offer.salary_eur <= 0) {
return;
}
// Check if this offer requires the specified skill
if (!offer.skills || !Array.isArray(offer.skills)) {
return;
}
// Find the skill in this offer
const skill = offer.skills.find(s =>
s.name && s.name.toLowerCase() === skillName.toLowerCase()
);
if (skill && skill.level >= 1 && skill.level <= 5) {
salaryByLevel[skill.level].push(offer.salary_eur);
}
});
// Calculate statistics for each level
const boxplotData = {};
for (let level = 1; level <= 5; level++) {
let salaries = salaryByLevel[level];
if (salaries.length > 0) {
// Sort salaries
salaries.sort((a, b) => a - b);
// Filter out extreme outliers (above 95th percentile) to prevent chart distortion
// Only apply if we have enough data points
if (salaries.length >= 10) {
const p95 = getPercentile(salaries, 95);
salaries = salaries.filter(s => s <= p95);
}
// Calculate quartiles on filtered data
const stats = calculateQuartiles(salaries);
boxplotData[level] = stats;
console.log(`Level ${level}: ${stats.count} offers, median: €${stats.median}`);
} else {
console.log(`Level ${level}: No data`);
}
}
return boxplotData;
}
/**
* Calculate boxplot data for all skills
* @returns {Object} Boxplot data for all skills
*/
function calculateAllBoxplotData() {
console.log('\n=== Calculating Boxplot Data for All Skills ===\n');
// Get unique skill names
const skillSet = new Set();
offers.forEach(offer => {
if (offer.skills && Array.isArray(offer.skills)) {
offer.skills.forEach(skill => {
if (skill.name) {
skillSet.add(skill.name);
}
});
}
});
const allSkills = Array.from(skillSet).sort();
console.log(`Found ${allSkills.length} unique skills`);
// Calculate boxplot data for each skill
const allBoxplotData = {};
allSkills.forEach(skillName => {
const boxplotData = calculateBoxplotDataForSkill(skillName);
// Only include skills that have data for at least one level
if (Object.keys(boxplotData).length > 0) {
allBoxplotData[skillName] = boxplotData;
}
});
console.log(`\nGenerated boxplot data for ${Object.keys(allBoxplotData).length} skills`);
return allBoxplotData;
}
// Main execution
console.log('=== Boxplot Data Calculator ===\n');
const allBoxplotData = calculateAllBoxplotData();
// Write to JavaScript file for browser usage
const outputPath = path.join(__dirname, 'boxplotData.js');
const jsContent = `// Auto-generated boxplot data
// Generated on: ${new Date().toISOString()}
// Format: { skillName: { level: { min, q1, median, q3, max, count } } }
const BOXPLOT_DATA = ${JSON.stringify(allBoxplotData, null, 2)};
// Export for use in browser
if (typeof window !== 'undefined') {
window.BOXPLOT_DATA = BOXPLOT_DATA;
}
// Export for Node.js
if (typeof module !== 'undefined' && module.exports) {
module.exports = BOXPLOT_DATA;
}
`;
fs.writeFileSync(outputPath, jsContent, 'utf8');
console.log(`\n✓ Boxplot data written to: ${outputPath}`);
console.log(`✓ Total skills with data: ${Object.keys(allBoxplotData).length}`);
// Print sample data for verification
const sampleSkills = Object.keys(allBoxplotData).slice(0, 3);
console.log('\n=== Sample Data ===');
sampleSkills.forEach(skill => {
console.log(`\n${skill}:`);
console.log(JSON.stringify(allBoxplotData[skill], null, 2));
});