-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathConsts.h
More file actions
151 lines (145 loc) · 8.48 KB
/
Consts.h
File metadata and controls
151 lines (145 loc) · 8.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#ifndef CONSTS_H
#define CONSTS_H
constexpr int maxN = 20;
constexpr int maxW = maxN * 1000;
constexpr int maxMask = (1<<maxN)-1;
constexpr int INF = 2e9;
constexpr char* langs[maxN] = {"ARABIC", "CHINESE", "DUTCH", "ENGLISH", "FINNISH", "FRENCH", "GERMAN", "GREEK", "HEBREW", "ITALIAN", "JAPANESE", "KOREAN", "NORWEGIAN", "POLISH", "PORTUGUESE", "RUSSIAN", "SPANISH", "SWEDISH", "THAI", "VIETNAMESE"};
enum LANGUAGE {
ARABIC = 0,
CHINESE = 1,
DUTCH = 2,
ENGLISH = 3,
FINNISH = 4,
FRENCH = 5,
GERMAN = 6,
GREEK = 7,
HEBREW = 8,
ITALIAN = 9,
JAPANESE = 10,
KOREAN = 11,
NORWEGIAN = 12,
POLISH = 13,
PORTUGUESE = 14,
RUSSIAN = 15,
SPANISH = 16,
SWEDISH = 17,
THAI = 18,
VIETNAMESE = 19
};
constexpr int similarity[maxN][maxN] = {
{ 1000, 56, 172, 145, 83, 211, 237, 175, 721, 258, 193, 214, 169, 181, 215, 192, 234, 171, 144, 156 },
{ 56, 1000, 128, 115, 115, 159, 128, 42, 28, 106, 174, 182, 130, 114, 114, 140, 114, 139, 162, 79 },
{ 172, 128, 1000, 782, 149, 418, 865, 396, 144, 494, 95, 111, 662, 404, 388, 420, 416, 793, 213, 69 },
{ 145, 115, 782, 1000, 144, 531, 687, 279, 86, 475, 128, 108, 717, 410, 435, 475, 407, 690, 104, 67 },
{ 83, 115, 149, 144, 1000, 135, 175, 143, 79, 135, 184, 200, 129, 231, 125, 207, 98, 154, 28, 79 },
{ 211, 159, 418, 531, 135, 1000, 443, 391, 202, 798, 132, 174, 561, 495, 729, 495, 701, 515, 112, 66 },
{ 237, 128, 865, 687, 175, 443, 1000, 386, 207, 524, 160, 75, 719, 469, 412, 498, 432, 819, 229, 103 },
{ 175, 42, 396, 279, 143, 391, 386, 1000, 193, 344, 160, 86, 377, 215, 322, 243, 294, 387, 274, 198 },
{ 721, 28, 144, 86, 79, 202, 207, 193, 1000, 225, 121, 158, 195, 202, 243, 180, 206, 197, 104, 128 },
{ 258, 106, 494, 475, 135, 798, 524, 344, 225, 1000, 100, 163, 502, 587, 817, 567, 860, 610, 136, 46 },
{ 193, 174, 95, 128, 184, 132, 160, 160, 121, 100, 1000, 157, 116, 67, 100, 67, 67, 104, 57, 123 },
{ 214, 182, 111, 108, 200, 174, 75, 86, 158, 163, 157, 1000, 51, 177, 169, 130, 141, 83, 176, 98 },
{ 169, 130, 662, 717, 129, 561, 719, 377, 195, 502, 116, 51, 1000, 443, 489, 462, 468, 861, 151, 97 },
{ 181, 114, 404, 410, 231, 495, 469, 215, 202, 587, 67, 177, 443, 1000, 500, 916, 521, 508, 126, 0 },
{ 215, 114, 388, 435, 125, 729, 412, 322, 243, 817, 100, 169, 489, 500, 1000, 498, 833, 525, 65, 45 },
{ 192, 140, 420, 475, 207, 495, 498, 243, 180, 567, 67, 130, 462, 916, 498, 1000, 471, 493, 134, 31 },
{ 234, 114, 416, 407, 98, 701, 432, 294, 206, 860, 67, 141, 468, 521, 833, 471, 1000, 535, 91, 45 },
{ 171, 139, 793, 690, 154, 515, 819, 387, 197, 610, 104, 83, 861, 508, 525, 493, 535, 1000, 124, 69 },
{ 144, 162, 213, 104, 28, 112, 229, 274, 104, 136, 57, 176, 151, 126, 65, 134, 91, 124, 1000, 190 },
{ 156, 79, 69, 67, 79, 66, 103, 198, 128, 46, 123, 98, 97, 0, 45, 31, 45, 69, 190, 1000 },
};
constexpr int popularity[maxN] = {
320, // 0: ARABIC ~335–422 million total (varies by MSA vs. dialects)
950, // 1: CHINESE ~1,100–1,184 million total (mostly Mandarin; huge native base)
140, // 2: DUTCH ~24–30 million total (very high per-capita online/cultural presence but small overall)
950, // 3: ENGLISH ~1.45–1.52 billion total (by far the most used second language)
80, // 4: FINNISH ~5–6 million total (very small global footprint)
300, // 5: FRENCH ~300–312 million total (strong in Africa, diplomacy, culture)
200, // 6: GERMAN ~130–140 million total (strong economy/science but limited L2 spread)
60, // 7: GREEK ~13–15 million total (mostly native, diaspora)
90, // 8: HEBREW ~9–15 million total (revived, but small speaker base)
280, // 9: ITALIAN ~65–85 million total (cultural prestige, some L2)
130, // 10: JAPANESE ~125–130 million total (almost all native, huge cultural export)
100, // 11: KOREAN ~80–85 million total (growing via K-pop/drama, still mostly native)
60, // 12: NORWEGIAN ~5–6 million total (very small)
110, // 13: POLISH ~40–45 million total (large diaspora)
260, // 14: PORTUGUESE ~260–280 million total (Brazil + Portugal + Africa)
260, // 15: RUSSIAN ~255–260 million total (strong in post-Soviet space)
550, // 16: SPANISH ~550–560 million total (huge in Americas, growing L2)
100, // 17: SWEDISH ~13–15 million total (small but high development)
70, // 18: THAI ~60–70 million total (mostly native in Thailand)
95 // 19: VIETNAMESE ~90–100 million total (mostly native, growing diaspora)
};
constexpr int presence[maxN] = {
380, // 0: ARABIC ~335–422M total; strong regional + religious/media presence, but dialects limit unity/online share (~0.6%)
850, // 1: CHINESE ~1.1–1.2B total (mostly native); massive population base, growing international business, but limited L2 fluency & online dominance
220, // 2: DUTCH ~24–30M total; high per-capita online/economic presence (Netherlands + Belgium), but small absolute reach
1000, // 3: ENGLISH ~1.45–1.52B total; undisputed #1 in global business, internet (52%+ of content), science, media, aviation, diplomacy
90, // 4: FINNISH ~5–6M total; very high development/online per capita, but tiny global footprint
420, // 5: FRENCH ~300–312M total; strong in Africa/diplomacy/culture/fashion; good L2 spread & international orgs
320, // 6: GERMAN ~130–140M total; major economy/science/engineering; solid European presence, moderate L2
110, // 7: GREEK ~13–15M total; cultural/historical weight, but mostly native + diaspora
140, // 8: HEBREW ~9–15M total; revived + tech/startup presence (Israel), but small numbers
340, // 9: ITALIAN ~65–85M total; huge cultural/art/food/fashion prestige; moderate L2 learners
380, // 10: JAPANESE ~125–130M total; almost all native; enormous global cultural export (anime, games, tech, J-pop)
320, // 11: KOREAN ~80–85M total; rapid rise via K-pop, K-drama, tech (Samsung); growing L2 interest
100, // 12: NORWEGIAN ~5–6M total; very small; high development but minimal global reach
180, // 13: POLISH ~40–45M total; large diaspora (US/UK); solid in Europe, limited beyond
380, // 14: PORTUGUESE ~260–280M total; Brazil drives numbers + growing African presence; moderate online/cultural
340, // 15: RUSSIAN ~255–260M total; strong post-Soviet + energy/diplomacy; significant media/online in region
680, // 16: SPANISH ~550–560M total; huge in Americas + US growth; #2–3 online, very teachable & widespread
160, // 17: SWEDISH ~13–15M total; high innovation/online per capita (Spotify etc.), but small scale
140, // 18: THAI ~60–70M total; mostly native in Thailand + tourism; limited global spread
190 // 19: VIETNAMESE ~90–100M total; growing diaspora + economic rise; mostly native, emerging presence
};
// Example: 1 if tonal, 0 else
constexpr int toneLevel[maxN] = {
1, // ARABIC
2, // CHINESE (Mandarin tones)
1, // DUTCH
1, // ENGLISH
1, // FINNISH
1, // FRENCH
1, // GERMAN
1, // GREEK
1, // HEBREW
1, // ITALIAN
1, // JAPANESE (pitch accent)
1, // KOREAN (pitch accent)
1, // NORWEGIAN
1, // POLISH
1, // PORTUGUESE
1, // RUSSIAN
1, // SPANISH
1, // SWEDISH
3, // THAI (5 tones, complex)
3 // VIETNAMESE (6 tones)
};
constexpr int wordOrder[maxN] = { // 1=SOV, 2=SVO, 3=VSO (approx)
2, // ARABIC (VSO/SVO)
1, // CHINESE SVO but topic-prominent
2, // DUTCH SVO/V2
2, // ENGLISH SVO
2, // FINNISH SVO flexible
2, // FRENCH SVO
2, // GERMAN V2
2, // GREEK SVO
2, // HEBREW SVO/VSO
2, // ITALIAN SVO
1, // JAPANESE SOV
1, // KOREAN SOV
2, // NORWEGIAN SVO/V2
2, // POLISH SVO flexible
2, // PORTUGUESE SVO
1, // RUSSIAN SVO flexible
2, // SPANISH SVO
2, // SWEDISH SVO/V2
2, // THAI SVO
2 // VIETNAMESE SVO
};
// Script type: 0=Latin, 1=Cyrillic, 5=Arabic, 5=Hebrew, 3=Han/Kana/Hangul, 2=Greek, 4=Thai
constexpr int script[maxN] = {5,3,0,0,0,0,0,2,5,0,3,3,0,0,0,1,0,0,4,0};
// Morphology complexity: 0=low (isolating like English), 1=medium, 2=high (agglutinative like Finnish)
constexpr int morphComplex[maxN] = {1,0,1,0,2,1,1,1,1,1,2,2,1,1,1,1,1,1,1,1};
#endif