llm-code-benchmark/ExtendedEval.jsonl at main · AuthEceSoftEng/llm-code-benchmark · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
{"task_id": "ExtendedEval/0", "prompt": "from typing import List\nimport math\n\ndef has_close_elements(numbers: List[float], threshold: float) -> bool:\n    \"\"\"\n    Return True if any two distinct elements in `numbers` differ by less than or equal to `threshold`.\n\n    The function must handle:\n    - Very small differences (floating-point precision).\n    - Lists with negative, zero, and positive numbers.\n    - Edge cases like empty lists or one element.\n    - Duplicated numbers, including infinities.\n    - NaN values (NaN != NaN, so NaN pairs return False unless identical objects).\n\n    Examples:\n    >>> has_close_elements([1.0, 2.0, 3.0], 0.5)\n    False\n    >>> has_close_elements([1e-9, 1e-9 + 1e-12], 1e-9)\n    True\n    >>> has_close_elements([], 0.1)\n    False\n    >>> has_close_elements([float('inf'), float('inf')], 0.0)\n    True\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "has_close_elements", "canonical_solution": "    n = len(numbers)\n    for i in range(n):\n        for j in range(i + 1, n):\n            x, y = numbers[i], numbers[j]\n            # Handle NaN: NaN != NaN, but identical NaN objects are equal\n            if math.isnan(x) and math.isnan(y):\n                continue  # NaN pairs are not considered close\n            if x == y or (not (math.isnan(x) or math.isnan(y)) and abs(x - y) <= threshold):\n                return True\n    return False", "test": "METADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'ExtendedEval'\n}\n\ndef check(candidate):\n    assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.3) == True\n    assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.05) == False\n    assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.95) == True\n    assert candidate([-1000.0, -1000.1, -1000.2], 0.15) == True\n    assert candidate([0.00000001, 0.000000011], 1e-9) == True\n    assert candidate([float('inf'), float('inf')], 0.0) == True\n    assert candidate([float('-inf'), float('inf')], 1e10) == False\n    assert candidate([], 1.0) == False\n    assert candidate([1.0], 1.0) == False\n    assert candidate([1.0, 1.0], 0.0) == True\n    # NaN handling\n    assert candidate([float('nan'), float('nan')], 0.0) == False\n    assert candidate([5.0, 5.0 + 1e-12], 1e-11) == True"}
{"task_id": "ExtendedEval/0_clustering", "prompt": "from typing import List\n\ndef has_close_cluster(numbers: List[float], threshold: float, min_cluster_size: int = 3, precision: int = 6) -> bool:\n    \"\"\"\n    Return True if there exists a cluster of at least min_cluster_size numbers\n    where all pairs within the cluster are within threshold distance of each other.\n    \n    Comparison is done after rounding all numbers to the specified precision.\n    A cluster is a group of numbers where every pair in the group has distance <= threshold.\n    \n    Args:\n    - numbers: A list of floating-point numbers.\n    - threshold: Maximum allowed distance between any two numbers in a cluster.\n    - min_cluster_size: Minimum number of elements required to form a cluster.\n    - precision: Number of decimal places to round to before comparison.\n    \n    Returns:\n    - True if a valid cluster exists, False otherwise.\n    \n    Examples:\n    >>> has_close_cluster([1.0, 1.1, 1.2, 5.0], 0.15, min_cluster_size=3)\n    True  # 1.0, 1.1, 1.2 form a cluster\n    >>> has_close_cluster([1.0, 2.0, 3.0], 0.5, min_cluster_size=3)\n    False  # no cluster of size 3 exists\n    >>> has_close_cluster([1.0, 1.05, 2.0, 2.05], 0.1, min_cluster_size=2)\n    True  # multiple clusters of size 2\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "has_close_cluster", "canonical_solution": "    if len(numbers) < min_cluster_size:\n        return False\n    \n    # Round all numbers to specified precision\n    rounded_numbers = [round(num, precision) for num in numbers]\n    n = len(rounded_numbers)\n    \n    # Check all possible subsets of size min_cluster_size or larger\n    from itertools import combinations\n    \n    for size in range(min_cluster_size, n + 1):\n        for cluster_indices in combinations(range(n), size):\n            cluster = [rounded_numbers[i] for i in cluster_indices]\n            \n            # Check if all pairs in this cluster are within threshold\n            is_valid_cluster = True\n            for i in range(len(cluster)):\n                for j in range(i + 1, len(cluster)):\n                    if abs(cluster[i] - cluster[j]) > threshold:\n                        is_valid_cluster = False\n                        break\n                if not is_valid_cluster:\n                    break\n            \n            if is_valid_cluster:\n                return True\n    \n    return False", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'test'\n}\n\ndef check(candidate):\n    # Basic clustering tests\n    assert candidate([1.0, 1.1, 1.2, 5.0], 0.15, min_cluster_size=3) == True\n    assert candidate([1.0, 2.0, 3.0], 0.5, min_cluster_size=3) == False\n    assert candidate([1.0, 1.05, 2.0, 2.05], 0.1, min_cluster_size=2) == True\n    \n    # Edge cases\n    assert candidate([], 0.1, min_cluster_size=3) == False\n    assert candidate([1.0], 0.1, min_cluster_size=3) == False\n    assert candidate([1.0, 2.0], 0.1, min_cluster_size=3) == False\n    \n    # Single element cluster\n    assert candidate([1.0], 0.1, min_cluster_size=1) == True\n    assert candidate([1.0, 5.0], 0.1, min_cluster_size=1) == True\n    \n    # Precision tests\n    assert candidate([1.000001, 1.000002, 1.000003], 0.00001, min_cluster_size=3, precision=6) == True\n    assert candidate([1.000001, 1.000002, 1.000003], 0.00001, min_cluster_size=3, precision=4) == False\n    \n    # Multiple possible clusters\n    assert candidate([1.0, 1.1, 1.2, 3.0, 3.1, 3.2], 0.15, min_cluster_size=3) == True\n    assert candidate([1.0, 1.1, 3.0, 3.1, 5.0, 5.1], 0.15, min_cluster_size=3) == False\n    \n    # Large cluster\n    assert candidate([1.0, 1.01, 1.02, 1.03, 1.04], 0.05, min_cluster_size=5) == True\n    assert candidate([1.0, 1.01, 1.02, 1.03, 1.06], 0.05, min_cluster_size=5) == False\n    \n    # Identical elements\n    assert candidate([2.0, 2.0, 2.0], 0.0, min_cluster_size=3) == True\n    assert candidate([2.0, 2.0, 2.1], 0.05, min_cluster_size=3) == False\n    \n    # Negative numbers\n    assert candidate([-1.0, -1.1, -1.2, 2.0], 0.15, min_cluster_size=3) == True\n    assert candidate([-1.0, -0.5, 0.0, 0.5, 1.0], 0.6, min_cluster_size=3) == True"}
{"task_id": "ExtendedEval/1", "prompt": "from typing import List\n\ndef separate_paren_groups(paren_string: str) -> List[str]:\n    \"\"\"\n    Given a string consisting of multiple balanced groups of parentheses (e.g., '() ((())) ()'),\n    return a list where each element is a complete and balanced group of parentheses.\n\n    Rules:\n    - Ignore any whitespace.\n    - Groups are not nested within each other (but can be nested internally).\n    - The function should raise a ValueError if the parentheses are unbalanced.\n\n    Examples:\n    >>> separate_paren_groups('() ((())) ()')\n    ['()', '((()))', '()']\n    >>> separate_paren_groups('(()()) ((())())')\n    ['(()())', '((())())']\n    >>> separate_paren_groups('(((())))')\n    ['(((())))']\n    >>> separate_paren_groups('')\n    []\n    >>> separate_paren_groups('((())')\n    ValueError\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "separate_paren_groups", "canonical_solution": "    # Remove all whitespace\n    paren_string = ''.join(paren_string.split())\n    \n    if not paren_string:\n        return []\n    \n    groups = []\n    current_group = \"\"\n    depth = 0\n    \n    for char in paren_string:\n        if char == '(':\n            current_group += char\n            depth += 1\n        elif char == ')':\n            current_group += char\n            depth -= 1\n            if depth == 0:\n                groups.append(current_group)\n                current_group = \"\"\n            elif depth < 0:\n                raise ValueError(\"Unbalanced parentheses\")\n        else:\n            raise ValueError(\"Invalid character in parentheses string\")\n    \n    if depth != 0:\n        raise ValueError(\"Unbalanced parentheses\")\n    \n    return groups", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'ExtendedEval'\n}\n\ndef check(candidate):\n    assert candidate('(()()) ((())) () ((())()())') == ['(()())', '((()))', '()', '((())()())']\n    assert candidate('() (()) ((())) (((())))') == ['()', '(())', '((()))', '(((())))']\n    assert candidate('(()(())((())))') == ['(()(())((())))']\n    assert candidate('()') == ['()']\n    assert candidate('') == []\n    try:\n        candidate('((())')\n        assert False, 'Expected ValueError'\n    except ValueError:\n        pass\n    try:\n        candidate('())(()')\n        assert False, 'Expected ValueError'\n    except ValueError:\n        pass"}
{"task_id": "ExtendedEval/1", "prompt": "from typing import List\n\n\ndef separate_paren_groups(paren_string: str, keep_inner_content: bool = True) -> List[str]:\n    \"\"\"\n    Given a string containing multiple balanced groups of parentheses, \n    possibly with characters or whitespace in between or inside them, \n    return each group as a separate string.\n\n    If `keep_inner_content` is True, include all characters inside each group.\n    If False, strip any characters inside and return only the parentheses structure.\n\n    Groups are top-level balanced (not nested in each other).\n\n    Spaces outside parentheses are ignored.\n\n    Examples:\n    >>> separate_paren_groups('(abc) ((d)e(f)) (x(y))', keep_inner_content=True)\n    ['(abc)', '((d)e(f))', '(x(y))']\n    >>> separate_paren_groups('(abc) ((d)e(f)) (x(y))', keep_inner_content=False)\n    ['()', '((()))', '(())']\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "separate_paren_groups", "canonical_solution": "    result = []\n    current_string = []\n    current_depth = 0\n    paren_string = paren_string.replace(\" \", \"\")\n\n    for c in paren_string:\n        if c == '(':\n            if current_depth == 0:\n                current_string = []\n            current_depth += 1\n            current_string.append(c if keep_inner_content else '(')\n        elif c == ')':\n            current_depth -= 1\n            current_string.append(c if keep_inner_content else ')')\n            if current_depth == 0:\n                result.append(''.join(current_string))\n        elif current_depth > 0 and keep_inner_content:\n            current_string.append(c)\n\n    return result\n", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'test'\n}\n\ndef check(candidate):\n    assert candidate('(()()) ((())) () ((())()())') == ['(()())', '((()))', '()', '((())()())']\n    assert candidate('() (()) ((())) (((())))') == ['()', '(())', '((()))', '(((())))']\n    assert candidate('(()(())((())))') == ['(()(())((())))']\n    assert candidate('( ) (( )) (( )( ))') == ['()', '(())', '(()())']\n    assert candidate('(a)', True) == ['(a)']\n    assert candidate('(a)(b)', True) == ['(a)', '(b)']\n    assert candidate('(a(b)c)', True) == ['(a(b)c)']\n    assert candidate('((a)b)(c)', True) == ['((a)b)', '(c)']\n    assert candidate('(a) ((b)) (c(d)e)', True) == ['(a)', '((b))', '(c(d)e)']\n    assert candidate('(a)', False) == ['()']\n    assert candidate('(abc)(def)', False) == ['()', '()']\n    assert candidate('(a(b)c)', False) == ['((()))']\n    assert candidate('((a)b)(c)', False) == ['(())', '()']\n"}
{"task_id": "ExtendedEval/2", "prompt": "def truncate_number(number: float, precision: int = None, signed: bool = False) -> float:\n    \"\"\"\n    Given a floating point number, return only the decimal part (i.e. the part after the decimal point).\n    \n    If `signed` is True, return the decimal with the same sign as the original number.\n    If False, always return a positive fractional part.\n\n    If `precision` is provided, round the result to that number of decimal places.\n\n    Special values like NaN or infinity should raise a ValueError.\n\n    Examples:\n    >>> truncate_number(3.5)\n    0.5\n    >>> truncate_number(-3.5)\n    0.5\n    >>> truncate_number(-3.5, signed=True)\n    -0.5\n    >>> truncate_number(1.999, precision=2)\n    1.0\n    >>> truncate_number(123.456789, precision=3)\n    0.457\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "truncate_number", "canonical_solution": "    if number != number or number in (float('inf'), float('-inf')):\n        raise ValueError(\"Input must be a finite number\")\n\n    frac = number % 1.0\n    if signed and number < 0:\n        frac = -frac\n\n    if precision is not None:\n        frac = round(frac, precision)\n\n    return frac\n", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'test'\n}\n\ndef check(candidate):\n    assert candidate(3.5) == 0.5\n    assert abs(candidate(1.33) - 0.33) < 1e-6\n    assert abs(candidate(123.456) - 0.456) < 1e-6\n\n    # Negative numbers\n    assert abs(candidate(-3.5) - 0.5) < 1e-6\n    assert abs(candidate(-3.5, signed=True) + 0.5) < 1e-6\n\n    # Precision tests - CORRECTED\n    assert candidate(1.999, precision=2) == 1.0\n    assert candidate(1.991, precision=1) == 0.0\n    assert candidate(123.456789, precision=3) == 0.457\n    assert candidate(-0.123456, precision=4, signed=True) == -0.1235\n\n    # Edge cases\n    try:\n        candidate(float('inf'))\n        assert False\n    except ValueError:\n        assert True\n\n    try:\n        candidate(float('nan'))\n        assert False\n    except ValueError:\n        assert True\n"}
{"task_id": "ExtendedEval/2", "prompt": "def truncate_number(number: float) -> float:\n    \"\"\"\n    Given a positive floating-point number, return only the decimal part of the number\n    (i.e., everything after the decimal point). The integer part is discarded.\n\n    Examples:\n    >>> truncate_number(3.5)\n    0.5\n    >>> truncate_number(1.33)\n    0.33\n    >>> truncate_number(123.456)\n    0.456\n    >>> truncate_number(1e-9)\n    1e-9\n    >>> truncate_number(1e9 + 0.0001)\n    0.0001\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'ExtendedEval'\n}\n\ndef check(candidate):\n    assert abs(candidate(3.5) - 0.5) < 1e-6\n    assert abs(candidate(1.33) - 0.33) < 1e-6\n    assert abs(candidate(123.456) - 0.456) < 1e-6\n    assert abs(candidate(1e-9) - 1e-9) < 1e-12\n    assert abs(candidate(1e9 + 0.0001) - 0.0001) < 1e-9\n    assert abs(candidate(99999.000001) - 0.000001) < 1e-9\n", "entry_point": "truncate_number"}
{"task_id": "ExtendedEval/3", "prompt": "from typing import List\n\n\ndef below_zero(operations: List[int]) -> bool:\n    \"\"\" You're given a list of deposit and withdrawal operations on a bank account that starts with\n    zero balance. Your task is to detect if at any point the balance of account fallls below zero, and\n    at that point function should return True. Otherwise it should return False.\n    >>> below_zero([1, 2, 3])\n    False\n    >>> below_zero([1, 2, -4, 5])\n    True\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "below_zero", "canonical_solution": "    balance = 0\n\n    for op in operations:\n        balance += op\n        if balance < 0:\n            return True\n\n    return False\n", "test": "\n\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'test'\n}\n\n\ndef check(candidate):\n    assert candidate([]) == False\n    assert candidate([1, 2, -3, 1, 2, -3]) == False\n    assert candidate([1, 2, -4, 5, 6]) == True\n    assert candidate([1, -1, 2, -2, 5, -5, 4, -4]) == False\n    assert candidate([1, -1, 2, -2, 5, -5, 4, -5]) == True\n    assert candidate([1, -2, 2, -2, 5, -5, 4, -4]) == True\n"}
{"task_id": "ExtendedEval/4", "prompt": "from typing import List, Literal, Optional\n\ndef mean_absolute_deviation(\n    numbers: List[float],\n    center: Literal[\"mean\", \"median\"] = \"mean\",\n    weights: Optional[List[float]] = None\n) -> float:\n    \"\"\"\n    Computes the Mean Absolute Deviation (MAD) of a dataset.\n    \n    You can choose the center point as:\n    - 'mean': use the arithmetic mean\n    - 'median': use the median\n\n    You can also provide weights. If weights are provided, a weighted MAD is calculated.\n\n    Args:\n    - numbers: List of numeric values.\n    - center: The center point to compute absolute deviations from ('mean' or 'median').\n    - weights: Optional list of non-negative weights of same length as numbers.\n\n    Returns:\n    - The (weighted) mean absolute deviation.\n\n    Raises:\n    - ValueError if numbers is empty or weights are invalid.\n\n    Examples:\n    >>> mean_absolute_deviation([1, 2, 3, 4])\n    1.0\n    >>> mean_absolute_deviation([1, 2, 3, 4], center='median')\n    1.0\n    >>> mean_absolute_deviation([1, 2, 3, 4], weights=[1, 1, 2, 2])\n    0.9\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "mean_absolute_deviation", "canonical_solution": "import statistics\nfrom typing import List, Literal, Optional\n\ndef mean_absolute_deviation(\n    numbers: List[float],\n    center: Literal[\"mean\", \"median\"] = \"mean\",\n    weights: Optional[List[float]] = None\n) -> float:\n    if not numbers:\n        raise ValueError(\"Input list is empty.\")\n\n    if weights is not None:\n        if len(weights) != len(numbers) or any(w < 0 for w in weights):\n            raise ValueError(\"Weights must be non-negative and match the length of the data.\")\n\n    if center == \"mean\":\n        center_value = sum(numbers) / len(numbers)\n    elif center == \"median\":\n        center_value = statistics.median(numbers)\n    else:\n        raise ValueError(\"Invalid center. Use 'mean' or 'median'.\")\n\n    if weights is None:\n        return sum(abs(x - center_value) for x in numbers) / len(numbers)\n    else:\n        total_weight = sum(weights)\n        return sum(w * abs(x - center_value) for x, w in zip(numbers, weights)) / total_weight\n", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'test'\n}\n\ndef check(candidate):\n    assert abs(candidate([1.0, 2.0, 3.0]) - 2.0/3.0) < 1e-6\n    assert abs(candidate([1.0, 2.0, 3.0, 4.0]) - 1.0) < 1e-6\n    assert abs(candidate([1.0, 2.0, 3.0, 4.0, 5.0]) - 6.0/5.0) < 1e-6\n    assert abs(candidate([1.0, 2.0, 3.0, 4.0], center='median') - 1.0) < 1e-6\n    assert abs(candidate([1.0, 1.0, 10.0], center='median') - 6.0) < 1e-6\n    assert abs(candidate([1.0, 2.0, 3.0, 4.0], weights=[1, 1, 2, 2]) - 0.9) < 1e-6\n    assert abs(candidate([1.0, 2.0, 3.0, 4.0], center='median', weights=[1, 1, 2, 2]) - 0.9) < 1e-6\n    try:\n        candidate([])\n        assert False\n    except ValueError:\n        assert True\n    try:\n        candidate([1, 2, 3], weights=[1, -1, 1])\n        assert False\n    except ValueError:\n        assert True\n    try:\n        candidate([1, 2], center=\"mode\")\n        assert False\n    except ValueError:\n        assert True\n"}
{"task_id": "ExtendedEval/4", "prompt": "from typing import List\n\n\ndef mean_absolute_deviation(numbers: List[float]) -> float:\n    \"\"\"\n    Given a list of floating-point numbers, calculate the Mean Absolute Deviation (MAD) around the mean.\n    MAD is defined as the average absolute difference between each element and the mean of the list.\n\n    The function should handle:\n      • empty lists (return 0.0)\n      • single-element lists (return 0.0)\n      • large lists efficiently\n      • floating-point precision correctly\n    Examples:\n    >>> mean_absolute_deviation([])\n    0.0\n    >>> mean_absolute_deviation([5.0])\n    0.0\n    >>> mean_absolute_deviation([1.0, 2.0, 3.0, 4.0])\n    1.0\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "mean_absolute_deviation", "canonical_solution": "    if len(numbers) <= 1:\n        return 0.0\n    \n    mean = sum(numbers) / len(numbers)\n    return sum(abs(x - mean) for x in numbers) / len(numbers)", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'ExtendedEval'\n}\n\ndef check(candidate):\n    # empty and single-element cases\n    assert candidate([]) == 0.0\n    assert candidate([7.42]) == 0.0\n\n    # small lists\n    assert abs(candidate([1.0, 2.0, 3.0]) - (2.0/3.0)) < 1e-6\n    assert abs(candidate([1.0, 2.0, 3.0, 4.0]) - 1.0) < 1e-6\n\n    # asymmetric distribution\n    assert abs(candidate([1.0, 2.0, 10.0]) - ((9.0 + 8.0 + 2.0)/3.0)) < 1e-6\n\n    # negative and mixed values\n    nums = [-5.0, 0.0, 5.0, 10.0]\n    mean = sum(nums) / len(nums)\n    expected = sum(abs(x - mean) for x in nums) / len(nums)\n    assert abs(candidate(nums) - expected) < 1e-6\n\n    # float precision check - corrected calculation\n    arr = [1e-9, -1e-9, 2e-9]\n    mean_val = sum(arr) / len(arr)  # = 2e-9/3\n    expected_mad = sum(abs(x - mean_val) for x in arr) / len(arr)\n    assert abs(candidate(arr) - expected_mad) < 1e-12\n\n    # large list performance check\n    large = [float(i) for i in range(1000)]\n    result = candidate(large)\n    assert isinstance(result, float)\n    # Verify correctness for large list\n    expected_large = sum(abs(i - 499.5) for i in range(1000)) / 1000\n    assert abs(result - expected_large) < 1e-6"}
{"task_id": "ExtendedEval/5_pattern", "prompt": "from typing import List\n\ndef intersperse_pattern(numbers: List[int], pattern: List[int]) -> List[int]:\n    \"\"\"\n    Insert a repeating pattern between every pair of consecutive elements in the input list.\n    The pattern cycles through its elements for each insertion point.\n    \n    Args:\n    - numbers: List of integers to intersperse\n    - pattern: List of integers to insert between consecutive elements (cycles if needed)\n    \n    Returns:\n    - New list with pattern elements inserted between consecutive pairs\n    \n    Examples:\n    >>> intersperse_pattern([1, 2, 3], [9])\n    [1, 9, 2, 9, 3]\n    >>> intersperse_pattern([1, 2, 3, 4], [9, 8])\n    [1, 9, 8, 2, 9, 8, 3, 9, 8, 4]\n    >>> intersperse_pattern([5, 6, 7], [1, 2, 3])\n    [5, 1, 2, 3, 6, 1, 2, 3, 7]\n    >>> intersperse_pattern([1], [9, 8])\n    [1]\n    >>> intersperse_pattern([], [9, 8])\n    []\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "intersperse_pattern", "canonical_solution": "    if len(numbers) <= 1 or not pattern:\n        return numbers[:]\n    \n    result = []\n    \n    for i in range(len(numbers)):\n        result.append(numbers[i])\n        \n        # Add pattern between elements (but not after the last element)\n        if i < len(numbers) - 1:\n            result.extend(pattern)\n    \n    return result", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'test'\n}\n\ndef check(candidate):\n    # Basic functionality\n    assert candidate([1, 2, 3], [9]) == [1, 9, 2, 9, 3]\n    assert candidate([1, 2, 3, 4], [9, 8]) == [1, 9, 8, 2, 9, 8, 3, 9, 8, 4]\n    assert candidate([5, 6, 7], [1, 2, 3]) == [5, 1, 2, 3, 6, 1, 2, 3, 7]\n    \n    # Edge cases\n    assert candidate([1], [9, 8]) == [1]\n    assert candidate([], [9, 8]) == []\n    assert candidate([1, 2], []) == [1, 2]\n    \n    # Single element pattern\n    assert candidate([1, 2, 3, 4, 5], [0]) == [1, 0, 2, 0, 3, 0, 4, 0, 5]\n    \n    # Two elements\n    assert candidate([10, 20], [1, 2, 3]) == [10, 1, 2, 3, 20]\n    \n    # Negative numbers\n    assert candidate([-1, 0, 1], [-5, -6]) == [-1, -5, -6, 0, -5, -6, 1]\n    \n    # Large pattern\n    assert candidate([1, 2], [7, 8, 9, 10]) == [1, 7, 8, 9, 10, 2]\n    \n    # Same numbers in list and pattern\n    assert candidate([1, 1, 1], [1, 2]) == [1, 1, 2, 1, 1, 2, 1]\n    \n    # Longer sequence\n    nums = [1, 2, 3, 4, 5, 6]\n    pattern = [99, 88]\n    expected = [1, 99, 88, 2, 99, 88, 3, 99, 88, 4, 99, 88, 5, 99, 88, 6]\n    assert candidate(nums, pattern) == expected\n"}
{"task_id": "ExtendedEval/5", "prompt": "from typing import List\n\n\ndef intersperse(numbers: List[int], delimiter: int) -> List[int]:\n    \"\"\"\n    Insert the value `delimiter` between every pair of consecutive elements in `numbers`.\n\n    Examples:\n    >>> intersperse([], 4)\n    []\n    >>> intersperse([1], 4)\n    [1]\n    >>> intersperse([1, 2, 3], 4)\n    [1, 4, 2, 4, 3]\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'ExtendedEval'\n}\n\ndef check(candidate):\n    # Empty and single-element lists\n    assert candidate([], 42) == []\n    assert candidate([99], 1) == [99]\n\n    # Basic cases\n    assert candidate([1, 2], 0) == [1, 0, 2]\n    assert candidate([5, 6, 3, 2], 8) == [5, 8, 6, 8, 3, 8, 2]\n    assert candidate([2, 2, 2], 2) == [2, 2, 2, 2, 2]\n\n    # With negative numbers\n    assert candidate([-1, -2, -3], 0) == [-1, 0, -2, 0, -3]\n    assert candidate([-10, 0, 10], -5) == [-10, -5, 0, -5, 10]\n\n    # Large list\n    long_list = list(range(100))\n    result = candidate(long_list, -1)\n    assert result[1::2] == [-1] * (len(long_list) - 1)\n    assert result[0::2] == long_list\n", "entry_point": "intersperse"}
{"task_id": "ExtendedEval/6", "prompt": "from typing import List\n\ndef parse_nested_parens(paren_string: str, strict: bool = True) -> List[int]:\n    \"\"\"\n    Given a string with space-separated groups of nested parentheses, return a list\n    with the maximum nesting depth of each group.\n\n    If `strict` is True, the function raises an error for unbalanced or invalid characters.\n    If False, it ignores invalid groups and returns 0 for them.\n\n    Non-parenthesis characters are ignored inside groups, unless strict=True.\n\n    Examples:\n    >>> parse_nested_parens('(()()) ((())) () ((())()())')\n    [2, 3, 1, 3]\n    >>> parse_nested_parens('(a(b)c)', strict=False)\n    [2]\n    >>> parse_nested_parens('((())(())) (()', strict=False)\n    [3, 0]\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "parse_nested_parens", "canonical_solution": "def parse_nested_parens(paren_string: str, strict: bool = True) -> List[int]:\n    def parse_group(s: str) -> int:\n        depth = 0\n        max_depth = 0\n        for c in s:\n            if c == '(':\n                depth += 1\n                max_depth = max(max_depth, depth)\n            elif c == ')':\n                depth -= 1\n                if depth < 0:\n                    if strict:\n                        raise ValueError(f\"Unbalanced group: {s}\")\n                    else:\n                        return 0\n            elif strict and c not in '()':\n                raise ValueError(f\"Invalid character '{c}' in group: {s}\")\n        if depth != 0:\n            if strict:\n                raise ValueError(f\"Unbalanced group: {s}\")\n            else:\n                return 0\n        return max_depth\n\n    return [parse_group(group) for group in paren_string.split() if group]\n", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'test'\n}\n\ndef check(candidate):\n    assert candidate('(()()) ((())) () ((())()())') == [2, 3, 1, 3]\n    assert candidate('() (()) ((())) (((())))') == [1, 2, 3, 4]\n    assert candidate('(()(())((())))') == [4]\n    try:\n        candidate('((()) (()')\n        assert False\n    except ValueError:\n        assert True\n    try:\n        candidate('(a(b)c)')\n        assert False\n    except ValueError:\n        assert True\n    assert candidate('((()) (()', strict=False) == [3, 0]\n    assert candidate('(a(b)c)', strict=False) == [2]\n    assert candidate(')(', strict=False) == [0]\n"}
{"task_id": "ExtendedEval/6_balanced", "prompt": "from typing import List, Tuple\n\ndef analyze_bracket_balance(bracket_string: str) -> List[Tuple[str, bool, int]]:\n    \"\"\"\n    Analyze multiple bracket groups and return their type, balance status, and max depth.\n    Supports (), [], {}, and <> bracket types.\n    \n    Args:\n    - bracket_string: String with space-separated bracket groups\n    \n    Returns:\n    - List of tuples: (group_string, is_balanced, max_depth)\n    \n    Examples:\n    >>> analyze_bracket_balance('() [{}] ((()))')\n    [('()', True, 1), ('[{}]', True, 2), ('((()))', True, 3)]\n    >>> analyze_bracket_balance('([)] {{}}')\n    [('([)]', False, 2), ('{{}}', True, 2)]\n    >>> analyze_bracket_balance('< > <<>>')\n    [('<', False, 1), ('>', False, 0), ('<<>>', True, 2)]\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "analyze_bracket_balance", "canonical_solution": "    def analyze_group(group: str) -> Tuple[str, bool, int]:\n        stack = []\n        max_depth = 0\n        pairs = {'(': ')', '[': ']', '{': '}', '<': '>'}\n        \n        for char in group:\n            if char in pairs:  # Opening bracket\n                stack.append(char)\n                max_depth = max(max_depth, len(stack))\n            elif char in pairs.values():  # Closing bracket\n                if not stack:\n                    return (group, False, max_depth)\n                expected_opener = None\n                for opener, closer in pairs.items():\n                    if closer == char:\n                        expected_opener = opener\n                        break\n                if stack[-1] != expected_opener:\n                    return (group, False, max_depth)\n                stack.pop()\n        \n        is_balanced = len(stack) == 0\n        return (group, is_balanced, max_depth)\n    \n    if not bracket_string.strip():\n        return []\n    \n    groups = bracket_string.split()\n    return [analyze_group(group) for group in groups if group]", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'test'\n}\n\ndef check(candidate):\n    # Basic balanced cases\n    assert candidate('() [{}] ((()))') == [('()', True, 1), ('[{}]', True, 2), ('((()))', True, 3)]\n    assert candidate('[] {} <>') == [('[]', True, 1), ('{}', True, 1), ('<>', True, 1)]\n    \n    # Unbalanced cases\n    assert candidate('([)] {{}}') == [('([)]', False, 2), ('{{}}', True, 2)]\n    assert candidate('( ) )') == [('(', False, 1), (')', False, 0), (')', False, 0)]\n    \n    # Mixed bracket types\n    assert candidate('([{}]) <()>') == [('([{}])', True, 3), ('<()>', True, 2)]\n    assert candidate('([)] {[}]') == [('([)]', False, 2), ('{[}]', False, 2)]\n    \n    # Edge cases\n    assert candidate('') == []\n    assert candidate('   ') == []\n    assert candidate('(((())))') == [('(((()))))', True, 4)]\n    \n    # Single brackets\n    assert candidate('( [ { <') == [('(', False, 1), ('[', False, 1), ('{', False, 1), ('<', False, 1)]\n    assert candidate(') ] } >') == [(')', False, 0), (']', False, 0), ('}', False, 0), ('>', False, 0)]\n    \n    # Complex nesting\n    assert candidate('({[<>]}) ([)]') == [('({[<>]})', True, 4), ('([)]', False, 2)]\n    \n    # Empty brackets\n    assert candidate('() [] {} <>') == [('()', True, 1), ('[]', True, 1), ('{}', True, 1), ('<>', True, 1)]\n"}
{"task_id": "ExtendedEval/7", "prompt": "from typing import List, Literal\n\ndef filter_by_substring(\n    strings: List[str], \n    substring: str,\n    match_type: Literal['contains', 'startswith', 'endswith'] = 'contains',\n    ignore_case: bool = False\n) -> List[str]:\n    \"\"\"\n    Filters a list of strings, keeping only those that match a given substring condition.\n\n    Parameters:\n    - strings: list of input strings\n    - substring: the string pattern to search for\n    - match_type: one of 'contains', 'startswith', or 'endswith'\n    - ignore_case: if True, match is case-insensitive\n\n    Returns:\n    - List of strings from input that satisfy the matching rule.\n\n    Examples:\n    >>> filter_by_substring(['abc', 'BAC', 'Array'], 'a', ignore_case=True)\n    ['abc', 'BAC', 'Array']\n    >>> filter_by_substring(['abc', 'def', 'xyz'], 'a', match_type='startswith')\n    ['abc']\n    >>> filter_by_substring(['hello', 'world'], 'LD', match_type='endswith', ignore_case=True)\n    ['world']\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "filter_by_substring", "canonical_solution": "from typing import List, Literal\n\ndef filter_by_substring(\n    strings: List[str], \n    substring: str,\n    match_type: Literal['contains', 'startswith', 'endswith'] = 'contains',\n    ignore_case: bool = False\n) -> List[str]:\n    def match(s: str) -> bool:\n        a, b = (s, substring) if not ignore_case else (s.lower(), substring.lower())\n        if match_type == 'contains':\n            return b in a\n        elif match_type == 'startswith':\n            return a.startswith(b)\n        elif match_type == 'endswith':\n            return a.endswith(b)\n        else:\n            raise ValueError(f\"Invalid match_type: {match_type}\")\n\n    return [s for s in strings if match(s)]\n", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'test'\n}\n\ndef check(candidate):\n    assert candidate([], 'john') == []\n    assert candidate(['xxx', 'asd', 'xxy', 'john doe', 'xxxAAA', 'xxx'], 'xxx') == ['xxx', 'xxxAAA', 'xxx']\n    assert candidate(['grunt', 'trumpet', 'prune', 'gruesome'], 'run') == ['grunt', 'prune']\n    assert candidate(['abc', 'BAC', 'Array'], 'a', ignore_case=True) == ['abc', 'BAC', 'Array']\n    assert candidate(['abc', 'BAC', 'Array'], 'a', ignore_case=False) == ['abc', 'Array']\n    assert candidate(['abc', 'aBcde', 'bcda', 'def'], 'ab', match_type='startswith', ignore_case=True) == ['abc', 'aBcde']\n    assert candidate(['abc', 'aBcde', 'bcda', 'def'], 'bc', match_type='startswith') == ['bcda']\n    assert candidate(['hello', 'world', 'eld'], 'ld', match_type='endswith', ignore_case=True) == ['world', 'eld']\n    assert candidate(['x.txt', 'y.TXT', 'doc.md'], '.txt', match_type='endswith', ignore_case=True) == ['x.txt', 'y.TXT']\n    assert candidate(['file1.TXT', 'file2.txt'], '.TXT', match_type='endswith', ignore_case=False) == ['file1.TXT']\n    try:\n        candidate(['abc'], 'a', match_type='regex')\n        assert False\n    except ValueError:\n        assert True\n"}
{"task_id": "ExtendedEval/7", "prompt": "from typing import List\n\n\ndef filter_by_substring(strings: List[str], substring: str, case_sensitive: bool = False, exclude: bool = False) -> List[str]:\n    \"\"\"\n    Filters a list of strings based on whether they include (or exclude) a substring.\n\n    - Matching is case-insensitive by default.\n    - If `case_sensitive=True`, then matching is done in a case-sensitive way.\n    - If `exclude=True`, returns strings that do NOT contain the substring.\n\n    >>> filter_by_substring(['abc', 'BAC', 'xyz', 'aBcD'], 'ab')\n    ['abc', 'aBcD']\n    >>> filter_by_substring(['abc', 'BAC', 'xyz', 'aBcD'], 'ab', case_sensitive=True)\n    ['abc']\n    >>> filter_by_substring(['abc', 'BAC', 'xyz', 'aBcD'], 'ab', exclude=True)\n    ['BAC', 'xyz']\n    >>> filter_by_substring([], 'any')\n    []\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "filter_by_substring", "canonical_solution": "    def match(s: str, sub: str, case: bool):\n        return sub in s if case else sub.lower() in s.lower()\n    return [s for s in strings if match(s, substring, case_sensitive) != exclude]", "test": "\n\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'hard_tests'\n}\n\ndef check(candidate):\n    assert candidate([], 'abc') == []\n    assert candidate(['Alpha', 'beta', 'Gamma'], 'a') == ['Alpha', 'Gamma']\n    assert candidate(['Alpha', 'beta', 'Gamma'], 'a', case_sensitive=True) == ['Gamma']\n    assert candidate(['Alpha', 'beta', 'Gamma'], 'a', exclude=True) == ['beta']\n    assert candidate(['aaa', 'aba', 'bbb', 'aAa'], 'aa') == ['aaa', 'aAa']\n    assert candidate(['aaa', 'aba', 'bbb', 'aAa'], 'aa', case_sensitive=True) == ['aaa']\n    assert candidate(['abc', 'bac', 'cab', 'xyz'], 'AB', case_sensitive=False) == ['abc', 'cab']\n    assert candidate(['abc', 'bac', 'cab', 'xyz'], 'AB', case_sensitive=True) == []\n    assert candidate(['áβć', 'βγδ', 'alpha'], 'β') == ['áβć', 'βγδ']\n    assert candidate(['áβć', 'βγδ', 'alpha'], 'β', case_sensitive=True) == ['áβć', 'βγδ']\n    assert candidate(['áβć', 'ΒΓΔ', 'alpha'], 'β', case_sensitive=True) == ['áβć']\n    assert candidate(['abc', 'def', 'ghi'], 'j') == []\n"}
{"task_id": "ExtendedEval/7_regex", "prompt": "from typing import List\nimport re\n\ndef filter_by_pattern(strings: List[str], pattern: str, use_regex: bool = False, ignore_case: bool = False, invert: bool = False) -> List[str]:\n    \"\"\"\n    Filter strings based on pattern matching with optional regex support.\n    \n    Args:\n    - strings: List of strings to filter\n    - pattern: Pattern to match (literal string or regex if use_regex=True)\n    - use_regex: If True, treat pattern as regular expression\n    - ignore_case: If True, perform case-insensitive matching\n    - invert: If True, return strings that DON'T match the pattern\n    \n    Returns:\n    - List of strings matching the criteria\n    \n    Examples:\n    >>> filter_by_pattern(['abc', 'def', 'ghi'], 'a')\n    ['abc']\n    >>> filter_by_pattern(['test123', 'abc', 'test456'], r'test\\\\d+', use_regex=True)\n    ['test123', 'test456']\n    >>> filter_by_pattern(['Apple', 'banana', 'CHERRY'], 'a', ignore_case=True)\n    ['Apple', 'banana']\n    >>> filter_by_pattern(['hello', 'world', 'help'], 'hel', invert=True)\n    ['world']\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "filter_by_pattern", "canonical_solution": "    if use_regex:\n        flags = re.IGNORECASE if ignore_case else 0\n        try:\n            compiled_pattern = re.compile(pattern, flags)\n            def matches(s: str) -> bool:\n                return bool(compiled_pattern.search(s))\n        except re.error:\n            # If regex is invalid, treat as literal string\n            search_pattern = pattern.lower() if ignore_case else pattern\n            def matches(s: str) -> bool:\n                search_string = s.lower() if ignore_case else s\n                return search_pattern in search_string\n    else:\n        search_pattern = pattern.lower() if ignore_case else pattern\n        def matches(s: str) -> bool:\n            search_string = s.lower() if ignore_case else s\n            return search_pattern in search_string\n    \n    result = []\n    for s in strings:\n        match_found = matches(s)\n        if (match_found and not invert) or (not match_found and invert):\n            result.append(s)\n    \n    return result", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'test'\n}\n\ndef check(candidate):\n    # Basic literal matching\n    assert candidate(['abc', 'def', 'ghi'], 'a') == ['abc']\n    assert candidate(['hello', 'world', 'help'], 'hel') == ['hello', 'help']\n    \n    # Regex matching\n    assert candidate(['test123', 'abc', 'test456'], r'test\\d+', use_regex=True) == ['test123', 'test456']\n    assert candidate(['file.txt', 'image.jpg', 'doc.pdf'], r'\\.(txt|pdf)$', use_regex=True) == ['file.txt', 'doc.pdf']\n    \n    # Case insensitive\n    assert candidate(['Apple', 'banana', 'CHERRY'], 'a', ignore_case=True) == ['Apple', 'banana']\n    assert candidate(['Test', 'TEST', 'test'], 'test', ignore_case=True) == ['Test', 'TEST', 'test']\n    \n    # Invert matching\n    assert candidate(['hello', 'world', 'help'], 'hel', invert=True) == ['world']\n    assert candidate(['abc', 'def', 'xyz'], 'x', invert=True) == ['abc', 'def']\n    \n    # Combined options\n    assert candidate(['Hello', 'WORLD', 'help'], 'HEL', ignore_case=True, invert=True) == ['WORLD']\n    \n    # Edge cases\n    assert candidate([], 'test') == []\n    assert candidate(['abc'], '') == ['abc']\n    \n    # Invalid regex fallback\n    assert candidate(['test[abc', 'normal'], 'test[', use_regex=True) == ['test[abc']\n"}
{"task_id": "ExtendedEval/8", "prompt": "from typing import List, Tuple, Optional\n\n\ndef sum_product(\n    numbers: List[float],\n    absolute_product: bool = False,\n    modulus: Optional[int] = None\n) -> Tuple[float, float]:\n    \"\"\"\n    For a given list of numbers, return a tuple (sum, product).\n\n    Features:\n    - If absolute_product=True, the product is computed with absolute values of numbers.\n    - If modulus is provided, the product is computed modulo `modulus`.\n    - Empty sum is 0, empty product is 1.\n\n    Examples:\n    >>> sum_product([])\n    (0, 1)\n    >>> sum_product([1, 2, 3, 4])\n    (10, 24)\n    >>> sum_product([-1, 2, -3], absolute_product=True)\n    (-2, 6)\n    >>> sum_product([2, 3, 4], modulus=5)\n    (9, 4)\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "sum_product", "canonical_solution": "from typing import List, Tuple, Optional\n\ndef sum_product(\n    numbers: List[float],\n    absolute_product: bool = False,\n    modulus: Optional[int] = None\n) -> Tuple[float, float]:\n    sum_value = 0\n    prod_value = 1\n\n    for n in numbers:\n        sum_value += n\n        val = abs(n) if absolute_product else n\n        prod_value *= val\n        if modulus is not None:\n            prod_value %= modulus\n\n    return sum_value, prod_value\n", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'test'\n}\n\ndef check(candidate):\n    assert candidate([]) == (0, 1)\n    assert candidate([1, 1, 1]) == (3, 1)\n    assert candidate([100, 0]) == (100, 0)\n    assert candidate([3, 5, 7]) == (15, 105)\n    assert candidate([10]) == (10, 10)\n    assert candidate([-1, -2, -3]) == (-6, -6)\n    assert candidate([-1, -2, -3], absolute_product=True) == (-6, 6)\n    assert candidate([1.5, 2.5]) == (4.0, 3.75)\n    assert candidate([2, 3, 4], modulus=5) == (9, 4)\n    assert candidate([2, 5, 6], modulus=5) == (13, 0)\n    assert candidate([-2, 3], modulus=5, absolute_product=True) == (1, 1)\n"}
{"task_id": "ExtendedEval/8", "prompt": "from typing import List, Tuple\n\n\ndef sum_product(numbers: List[int]) -> Tuple[int, int]:\n    \"\"\" For a given list of integers, return a tuple consisting of a sum and a product of all the integers in a list.\n    Empty sum should be equal to 0 and empty product should be equal to 1.\n    >>> sum_product([])\n    (0, 1)\n    >>> sum_product([1, 2, 3, 4])\n    (10, 24)\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "sum_product", "canonical_solution": "    sum_value = 0\n    prod_value = 1\n    for n in numbers:\n        sum_value += n\n        prod_value *= n\n    return sum_value, prod_value", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'custom_difficult'\n}\n\ndef check(candidate):\n    assert candidate([]) == (0, 1)\n    assert candidate([1, 1, 1]) == (3, 1)\n    assert candidate([100, 0]) == (100, 0)\n    assert candidate([3, 5, 7]) == (15, 105)\n    assert candidate([10]) == (10, 10)\n    assert candidate([-1, -2, -3]) == (-6, -6)\n    assert candidate([0, 0, 0]) == (0, 0)\n    assert candidate([0, 1, 2, 3]) == (6, 0)\n    assert candidate([-1, 2, -3, 4]) == (2, 24)\n    assert candidate([9999, -9999]) == (0, -99980001)\n    assert candidate([1]*50) == (50, 1)\n    assert candidate([2]*20) == (40, 1048576)\n    assert candidate([1000000, 1000000]) == (2000000, 1000000000000)\n    assert candidate(list(range(1, 11))) == (55, 3628800)\n    assert candidate(list(range(-5, 6))) == (0, 0)\n"}
{"task_id": "ExtendedEval/9", "prompt": "from typing import List, Literal\n\ndef rolling_aggregate(\n    numbers: List[int], \n    mode: Literal[\"max\", \"min\", \"sum\"] = \"max\",\n    reverse: bool = False\n) -> List[int]:\n    \"\"\"\n    For a given list of integers, return a list with rolling aggregation \n    (max, min, or cumulative sum), calculated from left-to-right (or right-to-left if reverse=True).\n\n    Parameters:\n    - numbers: List of integers\n    - mode: Aggregation function to apply (\"max\", \"min\", or \"sum\")\n    - reverse: If True, apply aggregation from the end of the list\n\n    Returns:\n    - List of same length with rolling values\n\n    Examples:\n    >>> rolling_aggregate([1, 2, 3, 2, 3, 4, 2])\n    [1, 2, 3, 3, 3, 4, 4]\n    >>> rolling_aggregate([1, 2, 3, 2, 3, 4, 2], reverse=True)\n    [4, 4, 4, 4, 4, 4, 2]\n    >>> rolling_aggregate([1, 2, 3], mode='sum')\n    [1, 3, 6]\n    >>> rolling_aggregate([3, 2, 1], mode='min')\n    [3, 2, 1]\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "rolling_aggregate", "canonical_solution": "from typing import List, Literal\n\ndef rolling_aggregate(\n    numbers: List[int], \n    mode: Literal[\"max\", \"min\", \"sum\"] = \"max\",\n    reverse: bool = False\n) -> List[int]:\n    if not numbers:\n        return []\n\n    if mode not in {\"max\", \"min\", \"sum\"}:\n        raise ValueError(f\"Invalid mode: {mode}\")\n\n    result = []\n    agg = None\n\n    iterable = reversed(numbers) if reverse else numbers\n\n    for n in iterable:\n        if agg is None:\n            agg = n\n        else:\n            if mode == \"max\":\n                agg = max(agg, n)\n            elif mode == \"min\":\n                agg = min(agg, n)\n            elif mode == \"sum\":\n                agg = agg + n\n        result.append(agg)\n\n    return list(reversed(result)) if reverse else result\n", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'test'\n}\n\ndef check(candidate):\n    assert candidate([]) == []\n    assert candidate([1, 2, 3, 4]) == [1, 2, 3, 4]\n    assert candidate([4, 3, 2, 1]) == [4, 4, 4, 4]\n    assert candidate([3, 2, 3, 100, 3]) == [3, 3, 3, 100, 100]\n    assert candidate([1, 2, 3, 2, 3, 4, 2], reverse=True) == [4, 4, 4, 4, 4, 4, 2]\n    assert candidate([5, 4, 3, 2], reverse=True, mode='min') == [2, 2, 2, 2]\n    assert candidate([1, 2, 3], mode='sum') == [1, 3, 6]\n    assert candidate([3, 2, 1], mode='sum') == [3, 5, 6]\n    assert candidate([3, 2, 1], mode='sum', reverse=True) == [6, 3, 1]\n    assert candidate([3, 2, 4, 1], mode='min') == [3, 2, 2, 1]\n    assert candidate([10, 9, 8], mode='min', reverse=True) == [8, 8, 8]\n    try:\n        candidate([1, 2, 3], mode='avg')\n        assert False\n    except ValueError:\n        assert True\n"}
{"task_id": "ExtendedEval/9", "prompt": "from typing import List\n\n\ndef rolling_max(numbers: List[float]) -> List[float]:\n    \"\"\"\n    Given a list of numbers (ints or floats), return a list where each element is the maximum\n    of all elements seen so far.\n\n    Supports empty input, negative numbers, and float values.\n\n    >>> rolling_max([1, 2, 3, 2, 3, 4, 2])\n    [1, 2, 3, 3, 3, 4, 4]\n    >>> rolling_max([-1.0, -0.5, -2.0])\n    [-1.0, -0.5, -0.5]\n    >>> rolling_max([1.1, 1.5, 0.5])\n    [1.1, 1.5, 1.5]\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "rolling_max", "canonical_solution": "    if not numbers:\n        return []\n    \n    result = []\n    running_max = numbers[0]\n    \n    for n in numbers:\n        running_max = max(running_max, n)\n        result.append(running_max)\n    \n    return result", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'hard_tests'\n}\n\ndef check(candidate):\n    assert candidate([]) == []\n    assert candidate([1, 2, 3, 4]) == [1, 2, 3, 4]\n    assert candidate([4, 3, 2, 1]) == [4, 4, 4, 4]\n    assert candidate([3, 2, 3, 100, 3]) == [3, 3, 3, 100, 100]\n    assert candidate([-1.0, -2.0, -3.0]) == [-1.0, -1.0, -1.0]\n    assert candidate([-5, 0, -1, 10]) == [-5, 0, 0, 10]\n    assert candidate([0.1, 0.2, 0.15]) == [0.1, 0.2, 0.2]\n    # Corrected infinity test\n    assert candidate([float('-inf'), -1000, 0, 1000]) == [float('-inf'), -1000, 0, 1000]\n    assert candidate([1000, 1000, 1000]) == [1000, 1000, 1000]\n    assert candidate([10, 20, 10, 5, 25, 1, 26]) == [10, 20, 20, 20, 25, 25, 26]"}
{"task_id": "ExtendedEval/10_analyze", "prompt": "def is_palindrome(s: str) -> bool:\n    \"\"\"Checks whether a given string is a palindrome.\"\"\"\n    return s == s[::-1]\n\ndef analyze_palindrome_structure(string: str) -> dict:\n    \"\"\"\n    Analyze the palindromic properties of a string and return detailed information.\n    \n    Returns a dictionary with:\n    - 'is_palindrome': boolean indicating if the string is already a palindrome\n    - 'longest_palindromic_prefix': longest palindromic prefix of the string\n    - 'longest_palindromic_suffix': longest palindromic suffix of the string\n    - 'shortest_palindrome': shortest palindrome that starts with the string\n    - 'chars_to_add': number of characters needed to make it a palindrome\n    \n    Examples:\n    >>> analyze_palindrome_structure('abc')\n    {'is_palindrome': False, 'longest_palindromic_prefix': 'a', 'longest_palindromic_suffix': 'c', 'shortest_palindrome': 'abcba', 'chars_to_add': 2}\n    >>> analyze_palindrome_structure('racecar')\n    {'is_palindrome': True, 'longest_palindromic_prefix': 'racecar', 'longest_palindromic_suffix': 'racecar', 'shortest_palindrome': 'racecar', 'chars_to_add': 0}\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "analyze_palindrome_structure", "canonical_solution": "    if not string:\n        return {\n            'is_palindrome': True,\n            'longest_palindromic_prefix': '',\n            'longest_palindromic_suffix': '',\n            'shortest_palindrome': '',\n            'chars_to_add': 0\n        }\n    \n    # Check if already palindrome\n    already_palindrome = is_palindrome(string)\n    \n    # Find longest palindromic prefix\n    longest_prefix = ''\n    for i in range(1, len(string) + 1):\n        if is_palindrome(string[:i]):\n            longest_prefix = string[:i]\n    \n    # Find longest palindromic suffix\n    longest_suffix = ''\n    for i in range(len(string)):\n        if is_palindrome(string[i:]):\n            longest_suffix = string[i:]\n            break\n    \n    # Find shortest palindrome that starts with string\n    if already_palindrome:\n        shortest_palindrome = string\n        chars_to_add = 0\n    else:\n        # Find longest suffix that is palindrome, append reverse of remaining prefix\n        for i in range(len(string)):\n            if is_palindrome(string[i:]):\n                shortest_palindrome = string + string[:i][::-1]\n                chars_to_add = i\n                break\n    \n    return {\n        'is_palindrome': already_palindrome,\n        'longest_palindromic_prefix': longest_prefix,\n        'longest_palindromic_suffix': longest_suffix,\n        'shortest_palindrome': shortest_palindrome,\n        'chars_to_add': chars_to_add\n    }", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'test'\n}\n\ndef check(candidate):\n    # Basic cases\n    result = candidate('abc')\n    assert result['is_palindrome'] == False\n    assert result['longest_palindromic_prefix'] == 'a'\n    assert result['longest_palindromic_suffix'] == 'c'\n    assert result['shortest_palindrome'] == 'abcba'\n    assert result['chars_to_add'] == 2\n    \n    # Already palindrome\n    result = candidate('racecar')\n    assert result['is_palindrome'] == True\n    assert result['longest_palindromic_prefix'] == 'racecar'\n    assert result['longest_palindromic_suffix'] == 'racecar'\n    assert result['shortest_palindrome'] == 'racecar'\n    assert result['chars_to_add'] == 0\n    \n    # Single character\n    result = candidate('a')\n    assert result['is_palindrome'] == True\n    assert result['longest_palindromic_prefix'] == 'a'\n    assert result['longest_palindromic_suffix'] == 'a'\n    assert result['shortest_palindrome'] == 'a'\n    assert result['chars_to_add'] == 0\n    \n    # Empty string\n    result = candidate('')\n    assert result['is_palindrome'] == True\n    assert result['longest_palindromic_prefix'] == ''\n    assert result['longest_palindromic_suffix'] == ''\n    assert result['shortest_palindrome'] == ''\n    assert result['chars_to_add'] == 0\n    \n    # More complex case\n    result = candidate('abcd')\n    assert result['is_palindrome'] == False\n    assert result['longest_palindromic_prefix'] == 'a'\n    assert result['longest_palindromic_suffix'] == 'd'\n    assert result['shortest_palindrome'] == 'abcdcba'\n    assert result['chars_to_add'] == 3\n    \n    # Partial palindrome\n    result = candidate('aba')\n    assert result['is_palindrome'] == True\n    assert result['longest_palindromic_prefix'] == 'aba'\n    assert result['longest_palindromic_suffix'] == 'aba'\n    assert result['shortest_palindrome'] == 'aba'\n    assert result['chars_to_add'] == 0\n"}
{"task_id": "ExtendedEval/10", "prompt": "def is_palindrome(s: str) -> bool:\n    \"\"\"Checks whether a given string is a palindrome.\"\"\"\n    return s == s[::-1]\n\ndef make_palindrome(string: str) -> str:\n    \"\"\"\n    Given a string, find the shortest palindrome that starts with it.\n    - The palindrome must include the original string as a prefix.\n    - You must append the minimal reversed prefix to achieve it.\n\n    >>> make_palindrome('abc')\n    'abcba'\n    >>> make_palindrome('level')\n    'level'\n    >>> make_palindrome('race')\n    'racecar'\n    >>> make_palindrome('ΑΒΓ')\n    'ΑΒΓΒΑ'\n    >>> make_palindrome('abc#')\n    'abc#cba'\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "make_palindrome", "canonical_solution": "    if not string:\n        return ''\n    \n    # Find the longest suffix that is already a palindrome\n    for i in range(len(string)):\n        if is_palindrome(string[i:]):\n            # Append the reverse of the prefix that's not part of the palindromic suffix\n            return string + string[:i][::-1]\n    \n    # This should never be reached since string[-1:] is always a palindrome\n    return string + string[:-1][::-1]", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'hard_tests'\n}\n\ndef check(candidate):\n    assert candidate('') == ''\n    assert candidate('a') == 'a'\n    assert candidate('race') == 'racecar'\n    assert candidate('abc') == 'abcba'\n    assert candidate('abc#') == 'abc#cba'\n    assert candidate('jerry') == 'jerryrrej'\n    assert candidate('madam') == 'madam'\n    assert candidate('noonx') == 'noonxnoon'\n    assert candidate('ΑΒΓ') == 'ΑΒΓΒΑ'\n    assert candidate('a' * 1000) == 'a' * 1000\n    long_input = 'a' * 999 + 'b'\n    expected = long_input + 'a' * 999\n    assert candidate(long_input) == expected"}
{"task_id": "ExtendedEval/11", "prompt": "from typing import List\n\n\ndef string_xor(a: str, b: str) -> str:\n    \"\"\" Input are two strings a and b consisting only of 1s and 0s.\n    Perform binary XOR on these inputs and return result also as a string.\n    >>> string_xor('010', '110')\n    '100'\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "string_xor", "canonical_solution": "    def xor(i, j):\n        if i == j:\n            return '0'\n        else:\n            return '1'\n\n    return ''.join(xor(x, y) for x, y in zip(a, b))\n", "test": "\n\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'test'\n}\n\n\ndef check(candidate):\n    assert candidate('111000', '101010') == '010010'\n    assert candidate('1', '1') == '0'\n    assert candidate('0101', '0000') == '0101'\n"}
{"task_id": "ExtendedEval/11", "prompt": "def string_xor(a: str, b: str) -> str:\n    \"\"\"Perform bitwise XOR on two binary strings of equal length.\n    Raise ValueError if inputs are not same length or contain invalid characters.\n\n    >>> string_xor('010', '110')\n    '100'\n    >>> string_xor('111', '000')\n    '111'\n    >>> string_xor('1'*1000, '0'*1000) == '1'*1000\n    True\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "string_xor", "canonical_solution": "    if len(a) != len(b): raise ValueError('Input strings must be of equal length')\n    if not set(a + b).issubset({'0', '1'}): raise ValueError('Inputs must be binary strings')\n    return ''.join('1' if x != y else '0' for x, y in zip(a, b))", "test": "\n\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'hard_binary'\n}\n\ndef check(candidate):\n    assert candidate('0', '0') == '0'\n    assert candidate('1', '0') == '1'\n    assert candidate('1010', '0101') == '1111'\n    assert candidate('0000', '0000') == '0000'\n    assert candidate('111000', '101010') == '010010'\n    assert candidate('1'*1000, '0'*1000) == '1'*1000\n    assert candidate('1'*1000, '1'*1000) == '0'*1000\n    try:\n        candidate('101', '10')\n        assert False, 'Expected ValueError for unequal lengths'\n    except ValueError:\n        pass\n    try:\n        candidate('102', '110')\n        assert False, 'Expected ValueError for invalid characters'\n    except ValueError:\n        pass\n"}
{"task_id": "ExtendedEval/12", "prompt": "from typing import List, Optional\n\ndef longest(\n    strings: List[str],\n    prefer_last: bool = False,\n    min_length: int = 0\n) -> Optional[str]:\n    \"\"\"\n    Returns the longest string from the list.\n\n    - If multiple strings have the same length, return the first (or last if prefer_last=True).\n    - If min_length > 0, strings shorter than that are ignored.\n    - If no valid strings are found (e.g., list is empty or all filtered), return None.\n\n    Examples:\n    >>> longest(['a', 'b', 'c'])\n    'a'\n    >>> longest(['a', 'bb', 'ccc'])\n    'ccc'\n    >>> longest(['a', 'bb', 'ccc'], prefer_last=True)\n    'ccc'\n    >>> longest(['short', 'longer', 'longest', 'equal', 'otherlongest'], prefer_last=False)\n    'longest'\n    >>> longest(['short', 'longer', 'longest', 'equal', 'otherlongest'], prefer_last=True)\n    'otherlongest'\n    >>> longest(['a', 'ab', 'abc'], min_length=2)\n    'abc'\n    >>> longest(['a', 'ab', 'abc'], min_length=4)\n    None\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "longest", "canonical_solution": "from typing import List, Optional\n\ndef longest(\n    strings: List[str],\n    prefer_last: bool = False,\n    min_length: int = 0\n) -> Optional[str]:\n    filtered = [s for s in strings if len(s) >= min_length]\n    if not filtered:\n        return None\n\n    maxlen = max(len(s) for s in filtered)\n\n    if prefer_last:\n        for s in reversed(filtered):\n            if len(s) == maxlen:\n                return s\n    else:\n        for s in filtered:\n            if len(s) == maxlen:\n                return s\n", "test": "\nMETADATA = {\n    'author': 'modification',\n    'dataset': 'test'\n}\n\ndef check(candidate):\n    assert candidate([]) is None\n    assert candidate(['x', 'y', 'z']) == 'x'\n    assert candidate(['x', 'yyy', 'zzzz', 'www', 'kkkk', 'abc']) == 'zzzz'\n    assert candidate(['a', 'bb', 'ccc', 'abc', 'ccc'], prefer_last=False) == 'ccc'\n    assert candidate(['a', 'bb', 'ccc', 'abc', 'ccc'], prefer_last=True) == 'ccc'\n    assert candidate(['', 'a', 'ab', 'abc'], min_length=2) == 'abc'\n    assert candidate(['short', 'longer', 'longest', 'equal', 'otherlongest'], prefer_last=False) == 'longest'\n    assert candidate(['short', 'longer', 'longest', 'equal', 'otherlongest'], prefer_last=True) == 'otherlongest'\n    assert candidate(['tiny', 'smol'], min_length=6) is None\n    assert candidate(['a', 'ab'], min_length=3) is None\n    assert candidate([], min_length=1) is None\n"}
{"task_id": "ExtendedEval/13", "prompt": "from typing import Optional\n\n\ndef greatest_common_divisor(*numbers: int, strict: bool = False) -> Optional[int]:\n    \"\"\"\n    Returns the greatest common divisor (GCD) of two or more integers.\n\n    - If no numbers are given, return None.\n    - If `strict=True`, any occurrence of 0 raises ValueError.\n    - Handles negative integers.\n    - Ignores zeros if `strict=False`, unless all inputs are zero.\n\n    Examples:\n    >>> greatest_common_divisor(3, 5)\n    1\n    >>> greatest_common_divisor(25, 15)\n    5\n    >>> greatest_common_divisor(48, -18)\n    6\n    >>> greatest_common_divisor(0, 0)\n    0\n    >>> greatest_common_divisor(0, 5)\n    5\n    >>> greatest_common_divisor(0, 5, strict=True)\n    ValueError\n    >>> greatest_common_divisor()\n    None\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "greatest_common_divisor", "canonical_solution": "from math import gcd\nfrom typing import Optional\n\ndef greatest_common_divisor(*numbers: int, strict: bool = False) -> Optional[int]:\n    if not numbers:\n        return None\n\n    if strict and any(n == 0 for n in numbers):\n        raise ValueError(\"Zero not allowed in strict mode\")\n\n    filtered = [abs(n) for n in numbers if n != 0] if not strict else [abs(n) for n in numbers]\n\n    if not filtered:\n        return 0  # all were zero and not strict\n\n    result = filtered[0]\n    for n in filtered[1:]:\n        result = gcd(result, n)\n\n    return result\n", "test": "\nMETADATA = {\n    'author': 'modification',\n    'dataset': 'test'\n}\n\ndef check(candidate):\n    assert candidate(3, 7) == 1\n    assert candidate(10, 15) == 5\n    assert candidate(49, 14) == 7\n    assert candidate(144, 60) == 12\n    assert candidate(48, -18) == 6\n    assert candidate(-36, -60) == 12\n    assert candidate(8, 16, 24) == 8\n    assert candidate(27, 36, 45) == 9\n    assert candidate(0, 5) == 5\n    assert candidate(0, 0) == 0\n    assert candidate(0, 18, 0) == 18\n    try:\n        candidate(0, 5, strict=True)\n        assert False\n    except ValueError:\n        assert True\n    try:\n        candidate(0, 0, strict=True)\n        assert False\n    except ValueError:\n        assert True\n    assert candidate() is None\n"}
{"task_id": "ExtendedEval/13", "prompt": "def greatest_common_divisor(a: int, b: int) -> int:\n    \"\"\"Return the greatest common divisor (GCD) of two integers a and b.\n    The GCD is always a non-negative integer.\n    Supports negative numbers and handles cases where a or b is zero.\n\n    >>> greatest_common_divisor(3, 5)\n    1\n    >>> greatest_common_divisor(25, 15)\n    5\n    >>> greatest_common_divisor(-25, -10)\n    5\n    >>> greatest_common_divisor(0, 0)\n    0\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "greatest_common_divisor", "canonical_solution": "    a, b = abs(a), abs(b)\n    if a == 0: return b\n    if b == 0: return a\n    while b: a, b = b, a % b\n    return a", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'custom_test'\n}\n\ndef check(candidate):\n    assert candidate(3, 7) == 1\n    assert candidate(10, 15) == 5\n    assert candidate(49, 14) == 7\n    assert candidate(144, 60) == 12\n    assert candidate(-50, 20) == 10\n    assert candidate(0, 7) == 7\n    assert candidate(7, 0) == 7\n    assert candidate(0, 0) == 0\n    assert candidate(-8, -12) == 4\n    assert candidate(270, 192) == 6\n"}
{"task_id": "ExtendedEval/14", "prompt": "from typing import List\n\n\ndef all_prefixes(\n    string: str,\n    min_length: int = 1,\n    max_length: int = -1,\n    step: int = 1,\n    reverse: bool = False\n) -> List[str]:\n    \"\"\"\n    Returns a list of all prefixes of the input string, optionally constrained by:\n    - min_length: minimum prefix length (default=1)\n    - max_length: maximum prefix length (default=-1 means full length)\n    - step: include every `step`-th prefix only\n    - reverse: if True, return from longest to shortest\n\n    Examples:\n    >>> all_prefixes('abc')\n    ['a', 'ab', 'abc']\n    >>> all_prefixes('abcdef', min_length=2, step=2)\n    ['ab', 'abcd', 'abcdef']\n    >>> all_prefixes('abcde', reverse=True)\n    ['abcde', 'abcd', 'abc', 'ab', 'a']\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "all_prefixes", "canonical_solution": "from typing import List\n\ndef all_prefixes(\n    string: str,\n    min_length: int = 1,\n    max_length: int = -1,\n    step: int = 1,\n    reverse: bool = False\n) -> List[str]:\n    n = len(string)\n    if max_length == -1 or max_length > n:\n        max_length = n\n\n    if min_length > max_length or step <= 0:\n        return []\n\n    indices = list(range(min_length, max_length + 1, step))\n    result = [string[:i] for i in indices]\n\n    if reverse:\n        result.reverse()\n\n    return result\n", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'test'\n}\n\ndef check(candidate):\n    assert candidate('') == []\n    assert candidate('abc') == ['a', 'ab', 'abc']\n    assert candidate('asdfgh') == ['a', 'as', 'asd', 'asdf', 'asdfg', 'asdfgh']\n    assert candidate('WWW') == ['W', 'WW', 'WWW']\n    assert candidate('abcdef', step=2) == ['ab', 'abcd', 'abcdef']\n    assert candidate('abcdefg', step=3) == ['abc', 'abcdef']\n    assert candidate('abcdef', min_length=2, max_length=4) == ['ab', 'abc', 'abcd']\n    assert candidate('abcdef', min_length=3, max_length=3) == ['abc']\n    assert candidate('abcdef', min_length=5, max_length=3) == []\n    assert candidate('abc', reverse=True) == ['abc', 'ab', 'a']\n    assert candidate('abcd', min_length=2, reverse=True) == ['abcd', 'abc', 'ab']\n    assert candidate('abc', step=0) == []\n    assert candidate('abc', step=-1) == []\n    assert candidate('abc', min_length=3, max_length=3) == ['abc']\n"}
{"task_id": "ExtendedEval/15", "prompt": "def string_sequence(\n    end: int,\n    start: int = 0,\n    step: int = 1\n) -> str:\n    \"\"\"\n    Returns a space-delimited string of numbers from start to end (inclusive), using the given step.\n\n    If start > end and step > 0, the result is empty.\n    If start < end and step < 0, the result is also empty.\n\n    Raises ValueError if step is 0.\n\n    Examples:\n    >>> string_sequence(5)\n    '0 1 2 3 4 5'\n    >>> string_sequence(5, start=2)\n    '2 3 4 5'\n    >>> string_sequence(5, start=0, step=2)\n    '0 2 4'\n    >>> string_sequence(-3)\n    '0 -1 -2 -3'\n    >>> string_sequence(0, start=3, step=-1)\n    '3 2 1 0'\n    >>> string_sequence(5, start=10, step=-2)\n    '10 8 6'\n    >>> string_sequence(0, 0, 0)\n    ValueError\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "string_sequence", "canonical_solution": "def string_sequence(end: int, start: int = 0, step: int = 1) -> str:\n    if step == 0:\n        raise ValueError(\"Step cannot be zero.\")\n\n    if (start > end and step > 0) or (start < end and step < 0):\n        return \"\"\n\n    return \" \".join(str(x) for x in range(start, end + (1 if step > 0 else -1), step))\n", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'test'\n}\n\ndef check(candidate):\n    assert candidate(0) == \"0\"\n    assert candidate(3) == \"0 1 2 3\"\n    assert candidate(10) == \"0 1 2 3 4 5 6 7 8 9 10\"\n    assert candidate(5, start=2) == \"2 3 4 5\"\n    assert candidate(5, start=0, step=2) == \"0 2 4\"\n    assert candidate(-3) == \"0 -1 -2 -3\"\n    assert candidate(0, start=3, step=-1) == \"3 2 1 0\"\n    assert candidate(5, start=10, step=-2) == \"10 8 6\"\n    assert candidate(3, start=5) == \"\"\n    assert candidate(-5, start=-2) == \"\"\n    try:\n        candidate(0, 0, 0)\n        assert False\n    except ValueError:\n        assert True\n"}
{"task_id": "ExtendedEval/15", "prompt": "def string_sequence(n: int, step: int = 1, reverse: bool = False, sep: str = ' ') -> str:\n    \"\"\"\n    Return a string containing numbers from 0 to n (inclusive), space-delimited by default.\n    - `step` determines the interval between numbers.\n    - `reverse` if True, returns the sequence in descending order.\n    - `sep` is the delimiter string (default is space).\n\n    >>> string_sequence(5)\n    '0 1 2 3 4 5'\n    >>> string_sequence(5, step=2)\n    '0 2 4'\n    >>> string_sequence(5, reverse=True)\n    '5 4 3 2 1 0'\n    >>> string_sequence(5, step=2, reverse=True)\n    '4 2 0'\n    >>> string_sequence(3, sep='-')\n    '0-1-2-3'\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "string_sequence", "canonical_solution": "    rng = range(0, n + 1, step)\n    if reverse:\n        rng = reversed(list(rng))\n    return sep.join(str(i) for i in rng)", "test": "\nMETADATA = {\n    'author': 'openai-advanced',\n    'dataset': 'custom_test'\n}\n\ndef check(candidate):\n    assert candidate(0) == '0'\n    assert candidate(3) == '0 1 2 3'\n    assert candidate(10) == '0 1 2 3 4 5 6 7 8 9 10'\n    assert candidate(5, step=2) == '0 2 4'\n    assert candidate(5, step=2, reverse=True) == '4 2 0'\n    assert candidate(3, sep='-') == '0-1-2-3'\n    assert candidate(4, reverse=True, sep='|') == '4|3|2|1|0'\n    assert candidate(1, step=3) == '0'\n"}
{"task_id": "ExtendedEval/16", "prompt": "import string as pystring\nfrom typing import Optional\n\ndef count_distinct_characters(\n    text: str,\n    ignore_case: bool = True,\n    letters_only: bool = False\n) -> int:\n    \"\"\"\n    Counts how many distinct characters are in the input string.\n\n    - If `ignore_case` is True, comparison is case-insensitive (uses Unicode-aware `.casefold()`).\n    - If `letters_only` is True, filters out non-letter characters (e.g., digits, punctuation, emojis).\n      Uses str.isalpha() for filtering.\n    - An empty string returns 0.\n\n    Examples:\n    >>> count_distinct_characters('xyzXYZ')\n    3\n    >>> count_distinct_characters('Jerry')\n    4\n    >>> count_distinct_characters('123!!!')\n    0\n    >>> count_distinct_characters('aAaA', ignore_case=False)\n    2\n    >>> count_distinct_characters('Jęrry JĘRRY', ignore_case=True, letters_only=True)\n    5\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "count_distinct_characters", "canonical_solution": "def count_distinct_characters(\n    text: str,\n    ignore_case: bool = True,\n    letters_only: bool = False\n) -> int:\n    if ignore_case:\n        text = text.casefold()\n    if letters_only:\n        text = ''.join(c for c in text if c.isalpha())\n    return len(set(text))\n", "test": "\nMETADATA = {\n    'author': 'modification',\n    'dataset': 'test'\n}\n\ndef check(candidate):\n    assert candidate('') == 0\n    assert candidate('abcde') == 5\n    assert candidate('abcde' + 'CADE' + 'cade') == 5\n    assert candidate('aaaaAAAAaaaa') == 1\n    assert candidate('Jerry jERRY JeRRRY') == 5\n    assert candidate('aAaA', ignore_case=False) == 2\n    assert candidate('aAaA', ignore_case=True) == 1\n    assert candidate('abc123', letters_only=True) == 3\n    assert candidate('123!!!', letters_only=True) == 0\n    assert candidate('Jęrry JĘRRY', ignore_case=True, letters_only=True) == 5\n    assert candidate('éÉeE', ignore_case=True, letters_only=True) == 1\n    assert candidate('a😊b😊c', letters_only=True) == 3\n    assert candidate('😊😊😊', letters_only=True) == 0\n    assert candidate('😊😊😊') == 1\n    assert candidate('Aa!!', ignore_case=False, letters_only=False) == 4\n"}
{"task_id": "ExtendedEval/16", "prompt": "def count_distinct_characters(string: str, case_sensitive: bool = False, ignore_whitespace: bool = True, ignore_digits: bool = False) -> int:\n    \"\"\"\n    Count how many distinct characters are in the input string.\n    - If `case_sensitive` is False, treat uppercase and lowercase as equal.\n    - If `ignore_whitespace` is True, ignore all whitespace.\n    - If `ignore_digits` is True, ignore digits (0-9).\n\n    >>> count_distinct_characters('xyzXYZ')\n    3\n    >>> count_distinct_characters('Jerry 123', ignore_digits=True)\n    4\n    >>> count_distinct_characters('aA', case_sensitive=True)\n    2\n    >>> count_distinct_characters('a A', case_sensitive=False, ignore_whitespace=False)\n    2\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "count_distinct_characters", "canonical_solution": "    processed_string = string\n    \n    if not case_sensitive:\n        processed_string = processed_string.lower()\n    \n    if ignore_whitespace:\n        processed_string = ''.join(c for c in processed_string if not c.isspace())\n    \n    if ignore_digits:\n        processed_string = ''.join(c for c in processed_string if not c.isdigit())\n    \n    return len(set(processed_string))", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'custom_test'\n}\n\ndef check(candidate):\n    assert candidate('') == 0\n    assert candidate('abcABC') == 3\n    assert candidate('abcABC', case_sensitive=True) == 6\n    assert candidate('a a a a') == 1\n    assert candidate('a a a a', ignore_whitespace=False) == 2\n    assert candidate('123abc', ignore_digits=True) == 3\n    assert candidate('abc123', ignore_digits=False) == 6\n    assert candidate('   ') == 0\n    assert candidate('   ', ignore_whitespace=False) == 1\n    assert candidate('Hello WORLD', case_sensitive=True, ignore_whitespace=True) == 9\n    # Additional edge cases\n    assert candidate('!@#$%', case_sensitive=False) == 5\n    assert candidate('AaAa', case_sensitive=False) == 1\n    assert candidate('123 456', ignore_digits=True, ignore_whitespace=True) == 0"}
{"task_id": "ExtendedEval/17", "prompt": "from typing import List, Optional\n\n\ndef parse_music(music_string: str, tempo: Optional[int] = 1) -> List[int]:\n    \"\"\"\n    Parses a music string with the following legend:\n    - 'o'   : whole note (4 beats)\n    - 'o|'  : half note (2 beats)\n    - '.|'  : quarter note (1 beat)\n    - '-'   : rest/silence (0 beats)\n\n    Notes can be separated by any whitespace. Invalid symbols are ignored.\n    Optional parameter `tempo` scales the durations (e.g. tempo=2 doubles durations).\n\n    >>> parse_music('o o| .| o| - .|', tempo=1)\n    [4, 2, 1, 2, 0, 1]\n    >>> parse_music('o o| .| o|', tempo=2)\n    [8, 4, 2, 4]\n    >>> parse_music('o o| wrong .| o| - invalid', tempo=1)\n    [4, 2, 1, 2, 0]\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "parse_music", "canonical_solution": "    note_map = {'o': 4, 'o|': 2, '.|': 1, '-': 0}\n    return [note_map[token] * tempo for token in music_string.split() if token in note_map]", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'custom_test'\n}\n\ndef check(candidate):\n    assert candidate('') == []\n    assert candidate('   o o|   .|') == [4, 2, 1]\n    assert candidate('o o o o', tempo=2) == [8, 8, 8, 8]\n    assert candidate('o| .| o| .| o o| o o|', tempo=1) == [2, 1, 2, 1, 4, 2, 4, 2]\n    assert candidate('o .| - .|') == [4, 1, 0, 1]\n    assert candidate('invalid_symbol .|') == [1]\n    assert candidate('\\n o\\t.| o|  -', tempo=2) == [8, 2, 4, 0]\n"}
{"task_id": "ExtendedEval/18", "prompt": "def how_many_times(string: str, substring: str, overlapping: bool = True, case_sensitive: bool = True) -> int:\n    \"\"\"\n    Count how many times a substring appears in a string.\n    - If `overlapping=True`, count overlapping matches.\n    - If `case_sensitive=False`, the match is case-insensitive.\n\n    >>> how_many_times('aaaa', 'aa')\n    3\n    >>> how_many_times('aaaa', 'aa', overlapping=False)\n    2\n    >>> how_many_times('AbcabcABC', 'abc', case_sensitive=False)\n    3\n    \"\"\"\n", "entry_point": "how_many_times", "canonical_solution": "    if not substring:\n        return 0\n    \n    search_string = string\n    search_substring = substring\n    \n    if not case_sensitive:\n        search_string = string.lower()\n        search_substring = substring.lower()\n    \n    count = 0\n    start = 0\n    \n    while start <= len(search_string) - len(search_substring):\n        pos = search_string.find(search_substring, start)\n        if pos == -1:\n            break\n        count += 1\n        start = pos + (1 if overlapping else len(search_substring))\n    \n    return count", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'custom_test'\n}\n\ndef check(candidate):\n    assert candidate('', 'a') == 0\n    assert candidate('abc', '') == 0\n    assert candidate('aaaa', 'aa') == 3\n    assert candidate('aaaa', 'aa', overlapping=False) == 2\n    assert candidate('AbcabcABC', 'abc', case_sensitive=False) == 3\n    assert candidate('AbcabcABC', 'abc', case_sensitive=True) == 1\n    assert candidate('mississippi', 'issi') == 1\n    assert candidate('ababababa', 'aba') == 4\n    assert candidate('ababababa', 'aba', overlapping=False) == 2\n    assert candidate('AAAA', 'aa', case_sensitive=False) == 3\n    assert candidate('Hello World', 'l', case_sensitive=False) == 3\n    assert candidate('Hello World', 'l', case_sensitive=True) == 3"}
{"task_id": "ExtendedEval/18", "prompt": "def how_many_times(string: str, substring: str, overlapping: bool = True, case_sensitive: bool = True) -> int:\n    \"\"\"\n    Count how many times a substring appears in a string.\n    - If `overlapping=True`, count overlapping matches.\n    - If `case_sensitive=False`, the match is case-insensitive.\n\n    >>> how_many_times('aaaa', 'aa')\n    3\n    >>> how_many_times('aaaa', 'aa', overlapping=False)\n    2\n    >>> how_many_times('AbcabcABC', 'abc', case_sensitive=False)\n    3\n    \"\"\"\n", "entry_point": "how_many_times", "canonical_solution": "    if not substring:\n        return 0\n    \n    search_string = string\n    search_substring = substring\n    \n    if not case_sensitive:\n        search_string = string.lower()\n        search_substring = substring.lower()\n    \n    count = 0\n    start = 0\n    \n    while start <= len(search_string) - len(search_substring):\n        pos = search_string.find(search_substring, start)\n        if pos == -1:\n            break\n        count += 1\n        start = pos + (1 if overlapping else len(search_substring))\n    \n    return count", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'custom_test'\n}\n\ndef check(candidate):\n    assert candidate('', 'a') == 0\n    assert candidate('abc', '') == 0\n    assert candidate('aaaa', 'aa') == 3\n    assert candidate('aaaa', 'aa', overlapping=False) == 2\n    assert candidate('AbcabcABC', 'abc', case_sensitive=False) == 3\n    assert candidate('AbcabcABC', 'abc', case_sensitive=True) == 1\n    assert candidate('mississippi', 'issi') == 1\n    assert candidate('ababababa', 'aba') == 4\n    assert candidate('ababababa', 'aba', overlapping=False) == 2\n    assert candidate('AAAA', 'aa', case_sensitive=False) == 3\n    assert candidate('Hello World', 'l', case_sensitive=False) == 3\n    assert candidate('Hello World', 'l', case_sensitive=True) == 3"}
{"task_id": "ExtendedEval/19", "prompt": "from typing import List\n\ndef sort_numbers(numbers: str, reverse: bool = False, case_sensitive: bool = False) -> str:\n    \"\"\"\n    Sorts a space-delimited string of number words ('zero' to 'nine') in ascending or descending order.\n    - Ignores invalid number words.\n    - If case_sensitive is False, matching is case-insensitive but output uses lowercase.\n    - If reverse is True, sort in descending order.\n\n    >>> sort_numbers('Three ONE five')\n    'one three five'\n    >>> sort_numbers('Three ONE five', reverse=True)\n    'five three one'\n    >>> sort_numbers('Three ELEVEN five')\n    'three five'\n    \"\"\"\n", "entry_point": "sort_numbers", "canonical_solution": "    value_map = {'zero': 0,'one': 1,'two': 2,'three': 3,'four': 4,'five': 5,'six': 6,'seven': 7,'eight': 8,'nine': 9}\n    \n    if not numbers.strip():\n        return ''\n    \n    tokens = numbers.split()\n    valid_tokens = []\n    \n    for token in tokens:\n        if case_sensitive:\n            if token in value_map:\n                valid_tokens.append(token)\n        else:\n            if token.lower() in value_map:\n                valid_tokens.append(token.lower())\n    \n    # Sort by numerical value\n    valid_tokens.sort(key=lambda x: value_map[x], reverse=reverse)\n    \n    return ' '.join(valid_tokens)", "test": "\nMETADATA = {\n  'author': 'openai-extended',\n  'dataset': 'custom_test'\n}\n\ndef check(candidate):\n    assert candidate('') == ''\n    assert candidate('   ') == ''\n    assert candidate('Three ONE five') == 'one three five'\n    assert candidate('Three ONE five', reverse=True) == 'five three one'\n    assert candidate('three five nine', reverse=True) == 'nine five three'\n    assert candidate('six FIVE four Three TWO one ZERO', case_sensitive=False) == 'zero one two three four five six'\n    assert candidate('eleven twelve five six') == 'five six'\n    assert candidate('   zero  one   two ') == 'zero one two'\n    assert candidate('ZerO ONE Nine', reverse=True, case_sensitive=False) == 'nine one zero'\n    # Case sensitive test\n    assert candidate('Zero one TWO', case_sensitive=True) == 'one'\n    assert candidate('zero one two', case_sensitive=True) == 'zero one two'"}
{"task_id": "ExtendedEval/20", "prompt": "from typing import List, Tuple, Optional\nimport math\n\ndef find_closest_elements(numbers: List[float], include_equal: bool = True, target_distance: Optional[float] = None) -> Optional[Tuple[float, float]]:\n    \"\"\"\n    Finds the pair of elements with the smallest distance (or equal to target_distance if given).\n    - Returns pair as (smaller, larger).\n    - If include_equal is False, skips identical values.\n    - Ignores invalid floats (inf, -inf, NaN).\n\n    >>> find_closest_elements([1.0, 2.0, 3.9, 4.0, 5.0, 2.2])\n    (3.9, 4.0)\n    >>> find_closest_elements([1.0, 2.0, 2.0, 3.0], include_equal=False)\n    (2.0, 3.0)\n    >>> find_closest_elements([1.0, 2.0, 2.2, 4.0], target_distance=0.2)\n    (2.0, 2.2)\n    \"\"\"\n", "entry_point": "find_closest_elements", "canonical_solution": "    # Filter out invalid floats\n    filtered = [x for x in numbers if not (math.isinf(x) or math.isnan(x))]\n    \n    if len(filtered) < 2:\n        return None\n    \n    best_pair = None\n    best_distance = None\n    \n    for i in range(len(filtered)):\n        for j in range(i + 1, len(filtered)):\n            a, b = filtered[i], filtered[j]\n            \n            if not include_equal and a == b:\n                continue\n                \n            distance = abs(a - b)\n            \n            if target_distance is not None:\n                # Use small epsilon for floating point comparison\n                if abs(distance - target_distance) < 1e-9:\n                    return tuple(sorted([a, b]))\n            else:\n                if best_distance is None or distance < best_distance:\n                    best_distance = distance\n                    best_pair = tuple(sorted([a, b]))\n    \n    return best_pair", "test": "\nMETADATA = {\n  'author': 'taniakaragiannidi',\n  'dataset': 'custom_test'\n}\n\ndef check(candidate):\n    assert candidate([1.0, 2.0, 2.0, 3.0]) == (2.0, 2.0)\n    assert candidate([1.0, 2.0, 2.0, 3.0], include_equal=False) == (2.0, 3.0)\n    assert candidate([1.0, 5.0, 3.0, 7.0, 6.9]) == (6.9, 7.0)\n    assert candidate([1.0, float('inf'), 2.0, float('-inf'), 2.1]) == (2.0, 2.1)\n    assert candidate([1.0, 2.2, 4.0, 2.0], target_distance=0.2) == (2.0, 2.2)\n    assert candidate([float('nan'), 1.0, 1.1]) == (1.0, 1.1)\n    assert candidate([1.0, 3.0, 2.0], target_distance=1.0) == (1.0, 2.0)\n    # Edge cases\n    assert candidate([]) == None\n    assert candidate([1.0]) == None\n    assert candidate([float('inf'), float('-inf')]) == None\n    # Test exact target distance matching\n    assert candidate([1.0, 1.5, 2.0, 3.0], target_distance=0.5) == (1.0, 1.5)"}
{"task_id": "ExtendedEval/21", "prompt": "from typing import List\n\n\ndef rescale_to_unit(numbers: List[float]) -> List[float]:\n    \"\"\"\n    Given a list of numbers (of at least two elements), apply a linear transform to rescale\n    all values such that the minimum becomes 0.0 and the maximum becomes 1.0.\n\n    If all numbers are the same, return a list of 0.0s.\n    Raise ValueError if the list contains fewer than two elements.\n\n    Examples:\n    >>> rescale_to_unit([1.0, 2.0, 3.0, 4.0, 5.0])\n    [0.0, 0.25, 0.5, 0.75, 1.0]\n    >>> rescale_to_unit([3.0, 3.0, 3.0])\n    [0.0, 0.0, 0.0]\n    >>> rescale_to_unit([2.0, 49.9])\n    [0.0, 1.0]\n    \"\"\"\n", "entry_point": "rescale_to_unit", "canonical_solution": "def rescale_to_unit(numbers: List[float]) -> List[float]:\n    if len(numbers) < 2:\n        raise ValueError(\"List must contain at least two elements\")\n    min_number = min(numbers)\n    max_number = max(numbers)\n    if min_number == max_number:\n        return [0.0 for _ in numbers]\n    return [(x - min_number) / (max_number - min_number) for x in numbers]", "test": "\nMETADATA = {\n  'author': 'taniakaragiannidi',\n  'dataset': 'test'\n}\n\ndef check(candidate):\n    assert candidate([2.0, 49.9]) == [0.0, 1.0]\n    assert candidate([100.0, 49.9]) == [1.0, 0.0]\n    assert candidate([1.0, 2.0, 3.0, 4.0, 5.0]) == [0.0, 0.25, 0.5, 0.75, 1.0]\n    assert candidate([2.0, 1.0, 5.0, 3.0, 4.0]) == [0.25, 0.0, 1.0, 0.5, 0.75]\n    assert candidate([12.0, 11.0, 15.0, 13.0, 14.0]) == [0.25, 0.0, 1.0, 0.5, 0.75]\n    # All same\n    assert candidate([3.0, 3.0, 3.0]) == [0.0, 0.0, 0.0]\n    assert candidate([7.7, 7.7]) == [0.0, 0.0]\n    # Error case: too short\n    try:\n        candidate([1.0])\n        assert False\n    except ValueError:\n        assert True\n    try:\n        candidate([])\n        assert False\n    except ValueError:\n        assert True\n"}
{"task_id": "ExtendedEval/21", "prompt": "from typing import List, Optional\nimport math\n\ndef rescale_to_unit(numbers: List[Optional[float]], unique_only: bool = False) -> List[Optional[float]]:\n    \"\"\"\n    Rescale numeric values to [0,1].\n    - Ignores None and NaN entries (preserves them in output).\n    - If unique_only=True, duplicates are ignored in scaling (but returned scaled normally).\n    - If all valid numbers are equal, return 0.5 for those entries (centered).\n    \"\"\"\n", "entry_point": "rescale_to_unit", "canonical_solution": "    valid = [x for x in numbers if x is not None and not math.isnan(x)]\n    if not valid:\n        return [None if x is None or math.isnan(x) else 0.5 for x in numbers]\n    values = list(set(valid)) if unique_only else valid\n    min_val, max_val = min(values), max(values)\n    if math.isclose(min_val, max_val):\n        return [None if x is None or math.isnan(x) else 0.5 for x in numbers]\n    return [None if x is None or math.isnan(x) else (x - min_val) / (max_val - min_val) for x in numbers]", "test": "\nMETADATA = {\n  'author': 'openai-extended',\n  'dataset': 'custom_test'\n}\n\ndef check(candidate):\n    assert candidate([2.0, 4.0, 6.0]) == [0.0, 0.5, 1.0]\n    assert candidate([5.0, 5.0, 5.0]) == [0.5, 0.5, 0.5]\n    assert candidate([1.0, None, 3.0, float('nan')]) == [0.0, None, 1.0, None]\n    assert candidate([7.0, 3.0, 3.0, 9.0], unique_only=True) == [0.6666666666666666, 0.0, 0.0, 1.0]\n    assert candidate([float('nan'), float('inf'), -float('inf')]) == [None, None, None]\n    assert candidate([None, None]) == [None, None]\n"}
{"task_id": "ExtendedEval/22", "prompt": "from typing import List, Any\n\n\ndef filter_integers(values: List[Any], strict: bool = False) -> List[int]:\n    \"\"\"\n    Filters the given list, returning only integer values.\n\n    If `strict=True`, excludes boolean values (which are technically integers in Python).\n\n    Examples:\n    >>> filter_integers(['a', 3.14, 5])\n    [5]\n    >>> filter_integers([1, 2, 3, 'abc', {}, []])\n    [1, 2, 3]\n    >>> filter_integers([True, 2, False, 3], strict=True)\n    [2, 3]\n    \"\"\"\n", "entry_point": "filter_integers", "canonical_solution": "def filter_integers(values: List[Any], strict: bool = False) -> List[int]:\n    if strict:\n        return [x for x in values if isinstance(x, int) and not isinstance(x, bool)]\n    return [x for x in values if isinstance(x, int)]", "test": "\nMETADATA = {\n  'author': 'taniakaragiannidi',\n  'dataset': 'test'\n}\n\ndef check(candidate):\n    assert candidate([]) == []\n    assert candidate([4, {}, [], 23.2, 9, 'adasd']) == [4, 9]\n    assert candidate([3, 'c', 3, 3, 'a', 'b']) == [3, 3, 3]\n    assert candidate([True, 5, False]) == [True, 5, False]\n    assert candidate([True, 5, False], strict=True) == [5]\n    assert candidate(['x', 1.1, {'a': 1}, -3], strict=True) == [-3]\n"}
{"task_id": "ExtendedEval/23", "prompt": "import unicodedata\n\ndef strlen(string: str, *, mode: str = 'all', ignore_whitespace: bool = False, normalize_unicode: bool = False) -> int:\n    \"\"\"\n    Return length of the given string with options:\n    - mode: 'all' (default), 'alpha', 'numeric', 'alnum' to count specific character types.\n    - ignore_whitespace: if True, spaces/tabs/newlines are excluded.\n    - normalize_unicode: if True, apply Unicode NFC normalization.\n\n    >>> strlen('abc 123')\n    7\n    >>> strlen('abc 123', mode='alpha')\n    3\n    >>> strlen('abc 123', mode='numeric')\n    3\n    >>> strlen('abc 123', mode='alnum', ignore_whitespace=True)\n    6\n    >>> strlen(' a\\u0301 ', normalize_unicode=True)  # accented 'á'\n    2\n    \"\"\"\n", "entry_point": "strlen", "canonical_solution": "    if normalize_unicode:\n        string = unicodedata.normalize('NFC', string)\n    if ignore_whitespace:\n        string = ''.join(c for c in string if not c.isspace())\n    if mode == 'alpha':\n        string = ''.join(c for c in string if c.isalpha())\n    elif mode == 'numeric':\n        string = ''.join(c for c in string if c.isdigit())\n    elif mode == 'alnum':\n        string = ''.join(c for c in string if c.isalnum())\n    return len(string)", "test": "\nMETADATA = {\n  'author': 'taniakaragiannidi',\n  'dataset': 'custom_test'\n}\n\ndef check(candidate):\n    assert candidate('') == 0\n    assert candidate('abc') == 3\n    assert candidate('abc 123') == 7\n    assert candidate('abc 123', ignore_whitespace=True) == 6\n    assert candidate('abc 123', mode='alpha') == 3\n    assert candidate('abc 123', mode='numeric') == 3\n    assert candidate('abc 123', mode='alnum', ignore_whitespace=True) == 6\n    assert candidate(' a\\u0301 ', normalize_unicode=True) == 2  # á with combining acute accent\n    assert candidate(' a\\u0301 ', normalize_unicode=False) == 3\n"}
{"task_id": "ExtendedEval/28", "prompt": "from typing import List\n\n\ndef concatenate(strings: List[str]) -> str:\n    \"\"\" Concatenate list of strings into a single string\n    >>> concatenate([])\n    ''\n    >>> concatenate(['a', 'b', 'c'])\n    'abc'\n    \"\"\"\n", "entry_point": "concatenate", "canonical_solution": "    return ''.join(strings)\n", "test": "\n\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'test'\n}\n\n\ndef check(candidate):\n    assert candidate([]) == ''\n    assert candidate(['x', 'y', 'z']) == 'xyz'\n    assert candidate(['x', 'y', 'z', 'w', 'k']) == 'xyzwk'\n"}
{"task_id": "ExtendedEval/29", "prompt": "from typing import List\n\ndef filter_by_prefix(strings: List[str], prefix: str, case_sensitive: bool = True) -> List[str]:\n    \"\"\"\n    Filter an input list of strings only for those that start with a given prefix.\n    Case-sensitivity can be toggled.\n\n    >>> filter_by_prefix(['abc', 'Abc', 'array'], 'a')\n    ['abc', 'array']\n    >>> filter_by_prefix(['abc', 'Abc', 'array'], 'a', case_sensitive=True)\n    ['abc', 'array']\n    >>> filter_by_prefix(['abc', 'Abc', 'array'], 'a', case_sensitive=False)\n    ['abc', 'Abc', 'array']\n    \"\"\"\n", "entry_point": "filter_by_prefix", "canonical_solution": "    if not case_sensitive:\n        prefix = prefix.lower()\n        return [s for s in strings if s.lower().startswith(prefix)]\n    return [s for s in strings if s.startswith(prefix)]", "test": "\nMETADATA = {\n  'author': 'taniakaragiannidi',\n  'dataset': 'custom_test'\n}\n\ndef check(candidate):\n    assert candidate([], 'john') == []\n    assert candidate(['xxx', 'asd', 'xxy', 'john doe', 'xxxAAA', 'xxx'], 'xxx') == ['xxx', 'xxxAAA', 'xxx']\n    assert candidate(['abc', 'Abc', 'array'], 'a', case_sensitive=True) == ['abc', 'array']\n    assert candidate(['abc', 'Abc', 'array'], 'a', case_sensitive=False) == ['abc', 'Abc', 'array']\n    assert candidate(['Αλφα', 'αλχημεία', 'Βήτα'], 'αλ', case_sensitive=False) == ['Αλφα', 'αλχημεία']  # Unicode\n    assert candidate(['😀test', '😃happy', '😂lol'], '😀') == ['😀test']\n    assert candidate(['  abc', 'abc', ' abc'], 'abc') == ['abc']\n    assert candidate(['prefix', 'preX', 'pre'], 'pre') == ['prefix', 'preX', 'pre']\n    assert candidate(['pre', 'prefix', 'prefixextra'], 'prefix') == ['prefix', 'prefixextra']\n    long_list = ['a' * i for i in range(100)] + ['prefixExample']\n    assert candidate(long_list, 'prefix') == ['prefixExample']"}
{"task_id": "ExtendedEval/21", "prompt": "from typing import List\n\ndef rescale_to_unit(numbers: List[float]) -> List[float]:\n    \"\"\"\n    Given a list of numbers (of at least two elements), apply a linear transform to rescale\n    all values such that the minimum becomes 0.0 and the maximum becomes 1.0.\n\n    If all numbers are the same, return a list of 0.0s.\n    Raise ValueError if the list contains fewer than two elements.\n\n    Provide only the Python function code with no explanations.\n\n    Examples:\n    >>> rescale_to_unit([1.0, 2.0, 3.0, 4.0, 5.0])\n    [0.0, 0.25, 0.5, 0.75, 1.0]\n    >>> rescale_to_unit([3.0, 3.0, 3.0])\n    [0.0, 0.0, 0.0]\n    >>> rescale_to_unit([2.0, 49.9])\n    [0.0, 1.0]\n    \"\"\"\n", "entry_point": "rescale_to_unit", "canonical_solution": "def rescale_to_unit(numbers: List[float]) -> List[float]:\n    if len(numbers) < 2:\n        raise ValueError(\"List must contain at least two elements\")\n    min_number = min(numbers)\n    max_number = max(numbers)\n    if min_number == max_number:\n        return [0.0 for _ in numbers]\n    return [(x - min_number) / (max_number - min_number) for x in numbers]", "test": "\nMETADATA = {\n  'author': 'taniakaragiannidi',\n  'dataset': 'test'\n}\n\ndef check(candidate):\n    assert candidate([2.0, 49.9]) == [0.0, 1.0]\n    assert candidate([100.0, 49.9]) == [1.0, 0.0]\n    assert candidate([1.0, 2.0, 3.0, 4.0, 5.0]) == [0.0, 0.25, 0.5, 0.75, 1.0]\n    assert candidate([2.0, 1.0, 5.0, 3.0, 4.0]) == [0.25, 0.0, 1.0, 0.5, 0.75]\n    assert candidate([12.0, 11.0, 15.0, 13.0, 14.0]) == [0.25, 0.0, 1.0, 0.5, 0.75]\n    # All same\n    assert candidate([3.0, 3.0, 3.0]) == [0.0, 0.0, 0.0]\n    assert candidate([7.7, 7.7]) == [0.0, 0.0]\n    # Error case: too short\n    try:\n        candidate([1.0])\n        assert False\n    except ValueError:\n        assert True\n    try:\n        candidate([])\n        assert False\n    except ValueError:\n        assert True"}
{"task_id": "ExtendedEval/24", "prompt": "\ndef largest_divisor(n: int, k: int = 1) -> int:\n    \"\"\"\n    Return the k-th largest proper divisor of n (i.e., less than n).\n    If k=1, return the largest proper divisor.\n    Raise ValueError if k is larger than number of available divisors.\n\n    Provide only the Python function code with no explanations.\n\n    >>> largest_divisor(15)\n    5\n    >>> largest_divisor(15, k=2)\n    3\n    >>> largest_divisor(15, k=3)\n    1\n    \"\"\"\n", "entry_point": "largest_divisor", "canonical_solution": "def largest_divisor(n: int, k: int = 1) -> int:\n    if n <= 1:\n        raise ValueError(\"n must be > 1\")\n\n    divisors = [i for i in range(1, n) if n % i == 0]\n    divisors.sort(reverse=True)\n    \n    if k > len(divisors):\n        raise ValueError(\"Not enough divisors\")\n\n    return divisors[k - 1]", "test": "\nMETADATA = {\n  'author': 'taniakaragiannidi',\n  'dataset': 'test'\n}\n\ndef check(candidate):\n    assert candidate(15) == 5\n    assert candidate(15, 2) == 3\n    assert candidate(15, 3) == 1\n    assert candidate(10) == 5\n    assert candidate(10, 2) == 2\n    assert candidate(10, 3) == 1\n    assert candidate(100) == 50\n    assert candidate(49, 1) == 7\n    assert candidate(49, 2) == 1\n    try:\n        candidate(7, 3)\n        assert False\n    except ValueError:\n        assert True\n    try:\n        candidate(1)\n        assert False\n    except ValueError:\n        assert True"}
{"task_id": "ExtendedEval/30", "prompt": "from typing import List, Union, Literal\n\ndef get_positive(\n    l: List[Union[int, float]],\n    type_filter: Literal['all', 'int', 'float'] = 'all',\n    strict: bool = False\n) -> List[Union[int, float]]:\n    \"\"\"\n    Return only positive numbers in the list.\n\n    - If type_filter is 'int', include only positive integers.\n    - If type_filter is 'float', include only positive floats.\n    - If type_filter is 'all', include both.\n\n    - If strict=True, zero is not considered positive.\n\n    Provide only the Python function code with no explanations.\n\n    >>> get_positive([-1, 2, -4.5, 5.0, 6], type_filter='int')\n    [2, 6]\n    >>> get_positive([0.0, -1.2, 3.5], type_filter='float', strict=True)\n    [3.5]\n    >>> get_positive([-2, 0, 4, 5.5], strict=True)\n    [4, 5.5]\n    \"\"\"\n", "entry_point": "get_positive", "canonical_solution": "from typing import List, Union, Literal\n\ndef get_positive(\n    l: List[Union[int, float]],\n    type_filter: Literal['all', 'int', 'float'] = 'all',\n    strict: bool = False\n) -> List[Union[int, float]]:\n    result = []\n    for x in l:\n        if type_filter == 'int' and not isinstance(x, int):\n            continue\n        if type_filter == 'float' and not isinstance(x, float):\n            continue\n        if strict:\n            if x > 0:\n                result.append(x)\n        else:\n            if x >= 0:\n                result.append(x)\n    return result", "test": "\nMETADATA = {\n  'author': 'taniakaragiannidi',\n  'dataset': 'test'\n}\n\ndef check(candidate):\n    assert candidate([-1, 2, -4, 5, 6]) == [2, 5, 6]\n    assert candidate([5, 3, -5, 2, -3, 3, 9, 0, 123, 1, -10]) == [5, 3, 2, 3, 9, 0, 123, 1]\n    assert candidate([-1, -2]) == []\n    assert candidate([]) == []\n    # type_filter='int'\n    assert candidate([1.5, -2, 3, 0, 4.5], type_filter='int') == [3, 0]\n    # type_filter='float'\n    assert candidate([1.5, -2, 3, 0.0, 4.5], type_filter='float') == [1.5, 0.0, 4.5]\n    # strict=True (exclude zero)\n    assert candidate([0, -1, 1, 2.0], strict=True) == [1, 2.0]\n    # strict + type_filter\n    assert candidate([0, 1, 2.2, -3], strict=True, type_filter='int') == [1]\n    assert candidate([0.0, 1.1, -2.0], strict=True, type_filter='float') == [1.1]"}
{"task_id": "ExtendedEval/31", "prompt": "import math\n\ndef is_prime(n: int, method: str = 'trial') -> bool:\n    \"\"\"\n    Return True if a given number is prime, False otherwise.\n    - Supports method='trial' (basic check) and method='6k' (6k ± 1 optimization).\n    - Raises ValueError for non-positive or non-integer input.\n\n    Provide only the Python function code with no explanations.\n\n    >>> is_prime(6)\n    False\n    >>> is_prime(101)\n    True\n    >>> is_prime(13441, method='6k')\n    True\n    \"\"\"\n", "entry_point": "is_prime", "canonical_solution": "import math\n\ndef is_prime(n: int, method: str = 'trial') -> bool:\n    if not isinstance(n, int) or n < 2:\n        return False\n\n    if method == 'trial':\n        for i in range(2, int(math.isqrt(n)) + 1):\n            if n % i == 0:\n                return False\n        return True\n\n    elif method == '6k':\n        if n <= 3:\n            return n > 1\n        if n % 2 == 0 or n % 3 == 0:\n            return False\n        i = 5\n        while i * i <= n:\n            if n % i == 0 or n % (i + 2) == 0:\n                return False\n            i += 6\n        return True\n\n    else:\n        raise ValueError(\"Unsupported method\")", "test": "\nMETADATA = {\n  'author': 'taniakaragiannidi',\n  'dataset': 'test'\n}\n\ndef check(candidate):\n    assert candidate(6) == False\n    assert candidate(101) == True\n    assert candidate(11) == True\n    assert candidate(13441) == True\n    assert candidate(61) == True\n    assert candidate(4) == False\n    assert candidate(1) == False\n    assert candidate(5) == True\n    assert candidate(11) == True\n    assert candidate(17) == True\n    assert candidate(5 * 17) == False\n    assert candidate(11 * 7) == False\n    assert candidate(13441 * 19) == False\n    # method='6k'\n    assert candidate(97, method='6k') == True\n    assert candidate(49, method='6k') == False\n    # edge cases\n    assert candidate(2) == True\n    assert candidate(3, method='6k') == True\n    assert candidate(0) == False\n    assert candidate(-7) == False"}
{"task_id": "ExtendedEval/32", "prompt": "import math\n\ndef poly(xs: list, x: float):\n    \"\"\"\n    Evaluates polynomial with coefficients xs at point x.\n    return xs[0] + xs[1] * x + xs[1] * x^2 + .... xs[n] * x^n\n    \"\"\"\n    return sum([coeff * math.pow(x, i) for i, coeff in enumerate(xs)])\n\ndef find_zero(xs: list):\n    \"\"\" xs are coefficients of a polynomial.\n    find_zero find x such that poly(x) = 0.\n    find_zero returns only only zero point, even if there are many.\n    Moreover, find_zero only takes list xs having even number of coefficients\n    and largest non zero coefficient as it guarantees\n    a solution.\n    \n    Provide only the Python function code with no explanations.\n    \n    >>> round(find_zero([1, 2]), 2) # f(x) = 1 + 2x\n    -0.5\n    >>> round(find_zero([-6, 11, -6, 1]), 2) # (x - 1) * (x - 2) * (x - 3) = -6 + 11x - 6x^2 + x^3\n    1.0\n    \"\"\"\n", "entry_point": "find_zero", "canonical_solution": "    begin, end = -1., 1.\n    while poly(xs, begin) * poly(xs, end) > 0:\n        begin *= 2.0\n        end *= 2.0\n    while end - begin > 1e-10:\n        center = (begin + end) / 2.0\n        if poly(xs, center) * poly(xs, begin) > 0:\n            begin = center\n        else:\n            end = center\n    return begin", "test": "\nMETADATA = {}\n\ndef check(candidate):\n    import math\n    import random\n    rng = random.Random(42)\n    import copy\n    for _ in range(100):\n        ncoeff = 2 * rng.randint(1, 4)\n        coeffs = []\n        for _ in range(ncoeff):\n            coeff = rng.randint(-10, 10)\n            if coeff == 0:\n                coeff = 1\n            coeffs.append(coeff)\n        solution = candidate(copy.deepcopy(coeffs))\n        assert math.fabs(poly(coeffs, solution)) < 1e-4"}
{"task_id": "ExtendedEval/35", "prompt": "from typing import List, Callable, Any, Union\n\ndef max_element(l: List[Any], *, key: Callable[[Any], Any] = lambda x: x, return_all: bool = False) -> Union[Any, List[Any]]:\n    \"\"\"\n    Return the maximum element(s) in the list according to a key function.\n    - If return_all=True, return a list of all elements tied for max.\n    - Raises ValueError on empty list.\n\n    >>> max_element([1, 2, 3])\n    3\n    >>> max_element([1, 2, 3], key=lambda x: -x)\n    1\n    >>> max_element(['a', 'bb', 'ccc'], key=len)\n    'ccc'\n    >>> max_element(['a', 'bb', 'ccc', 'dd'], key=len, return_all=True)\n    ['ccc', 'dd']\n    \"\"\"\n", "entry_point": "max_element", "canonical_solution": "def max_element(l, *, key=lambda x: x, return_all=False):\n    if not l:\n        raise ValueError(\"Empty list\")\n\n    max_val = key(l[0])\n    result = [l[0]]\n\n    for item in l[1:]:\n        val = key(item)\n        if val > max_val:\n            max_val = val\n            result = [item]\n        elif val == max_val:\n            result.append(item)\n\n    return result if return_all else result[0]", "test": "\nMETADATA = { 'author': 'taniakaragiannidi', 'dataset': 'test' }\n\ndef check(candidate):\n    assert candidate([1, 2, 3]) == 3\n    assert candidate([1, 2, 3], key=lambda x: -x) == 1\n    assert candidate(['a', 'bb', 'ccc'], key=len) == 'ccc'\n    assert candidate(['a', 'bb', 'ccc', 'dd'], key=len, return_all=True) == ['ccc', 'dd']\n    assert candidate(['apple', 'banana', 'pear'], key=lambda x: x[-1]) == 'banana'\n    try:\n        candidate([])\n        assert False\n    except ValueError:\n        assert True"}
{"task_id": "ExtendedEval/35", "prompt": "from decimal import Decimal\nfrom fractions import Fraction\nfrom numbers import Number\n\n\ndef max_mixed_number(numbers: list):\n    \"\"\"\n    Given a list containing numeric elements (ints, floats, Decimals, Fractions),\n    return the one with the highest value, preserving its original type.\n    Raises ValueError if list is empty or contains no numeric elements.\n\n    >>> max_mixed_number([1, 2.5, Decimal('3.1'), Fraction(7, 2)])\n    Fraction(7, 2)\n    >>> max_mixed_number([Decimal('5.5'), 5, 3.14])\n    Decimal('5.5')\n    >>> max_mixed_number([Fraction(5, 4), Fraction(9, 4), 2.1])\n    Fraction(9, 4)\n    \"\"\"\n", "entry_point": "max_mixed_number", "canonical_solution": "    if not numbers:\n        raise ValueError(\"Empty list\")\n    numeric_values = [x for x in numbers if isinstance(x, Number)]\n    if not numeric_values:\n        raise ValueError(\"No numeric elements\")\n    max_val = numeric_values[0]\n    for val in numeric_values[1:]:\n        if val > max_val:\n            max_val = val\n    return max_val", "test": "\n\nMETADATA = {}\n\ndef check(candidate):\n    from decimal import Decimal\n    from fractions import Fraction\n\n    assert candidate([1, 2.5, Decimal('3.1'), Fraction(7, 2)]) == Fraction(7, 2)\n    assert candidate([Decimal('5.5'), 5, 3.14]) == Decimal('5.5')\n    assert candidate([Fraction(5, 4), Fraction(9, 4), 2.1]) == Fraction(9, 4)\n    assert candidate([3, 3.0, Decimal('3.0'), Fraction(3, 1)]) == 3  # preserves first max seen\n    try:\n        candidate([])\n        assert False, \"Expected ValueError on empty list\"\n    except ValueError:\n        pass\n    try:\n        candidate(['a', {}, None])\n        assert False, \"Expected ValueError on list with no numeric elements\"\n    except ValueError:\n        pass"}
{"task_id": "ExtendedEval/36_hard", "prompt": "def fizz_buzz(n: int) -> int:\n    \"\"\"\n    Return the number of occurrences of the digit 7 in numbers less than `n`\n    that are divisible by either 11 or 13, but NOT both.\n    Exclude numbers that end in 7 from consideration.\n\n    >>> fizz_buzz(50)\n    0\n    >>> fizz_buzz(78)\n    1\n    >>> fizz_buzz(79)\n    2\n    \"\"\"\n", "entry_point": "fizz_buzz", "canonical_solution": "    count = 0\n    for i in range(n):\n        # Check if divisible by 11 XOR 13\n        div_11 = i % 11 == 0\n        div_13 = i % 13 == 0\n        if div_11 ^ div_13:  # XOR - either one but not both\n            # Exclude numbers ending in 7\n            if i % 10 != 7:\n                # Count occurrences of digit 7\n                count += str(i).count('7')\n    return count", "test": "def check(candidate):\n    assert candidate(50) == 0\n    assert candidate(78) == 1\n    assert candidate(79) == 2\n    assert candidate(100) == 2\n    assert candidate(200) == 4\n    assert candidate(4000) == 129\n    assert candidate(10000) == 442\n"}
{"task_id": "ExtendedEval/37", "prompt": "def sort_even(l: list) -> list:\n    \"\"\"\n    Returns a new list where values at even indices are sorted in descending order,\n    while odd-indexed elements remain in their original positions.\n    The input list must not be mutated.\n\n    >>> sort_even([1, 2, 3])\n    [3, 2, 1]\n    >>> sort_even([5, 6, 3, 4])\n    [5, 6, 3, 4]\n    \"\"\"\n", "entry_point": "sort_even", "canonical_solution": "    result = l.copy()\n    # Extract values at even indices\n    even_values = [l[i] for i in range(0, len(l), 2)]\n    # Sort in descending order\n    even_values.sort(reverse=True)\n    # Put them back at even indices\n    for i, val in enumerate(even_values):\n        result[i * 2] = val\n    return result", "test": "def check(candidate):\n    assert candidate([1, 2, 3]) == [3, 2, 1]\n    assert candidate([5, 3, -5, 2, -3, 3, 9, 0, 123, 1, -10]) == [123, 3, 9, 2, 5, 3, -3, 0, -5, 1, -10]\n    assert candidate([5, 8, -12, 4, 23, 2, 3, 11, 12, -10]) == [23, 8, 12, 4, 5, 2, -12, 11, 3, -10]\n    assert candidate([]) == []\n    assert candidate([2]) == [2]\n"}
{"task_id": "ExtendedEval/38", "prompt": "def encode_cyclic(s: str) -> str:\n    \"\"\"\n    Encodes the input string by rotating each group of 3 characters:\n    - Even-indexed groups: rotate left (e.g., 'abc' → 'bca')\n    - Odd-indexed groups: rotate right (e.g., 'abc' → 'cab')\n    Groups with fewer than 3 characters remain unchanged.\n    \"\"\"\n    groups = [s[i:i+3] for i in range(0, len(s), 3)]\n    result = []\n    for idx, group in enumerate(groups):\n        if len(group) < 3:\n            result.append(group)\n        elif idx % 2 == 0:\n            result.append(group[1:] + group[0])  # left\n        else:\n            result.append(group[-1] + group[:-1])  # right\n    return ''.join(result)\n\n\ndef decode_cyclic(s: str) -> str:\n    \"\"\"\n    Decodes a string previously encoded using encode_cyclic().\n    Reverses alternating left/right rotations.\n    \"\"\"\n    groups = [s[i:i+3] for i in range(0, len(s), 3)]\n    result = []\n    for idx, group in enumerate(groups):\n        if len(group) < 3:\n            result.append(group)\n        elif idx % 2 == 0:\n            result.append(group[-1] + group[:-1])  # reverse left → right\n        else:\n            result.append(group[1:] + group[0])  # reverse right → left\n    return ''.join(result)\n", "entry_point": "decode_cyclic", "canonical_solution": "    groups = [s[i:i+3] for i in range(0, len(s), 3)]\n    result = []\n    for idx, group in enumerate(groups):\n        if len(group) < 3:\n            result.append(group)\n        elif idx % 2 == 0:\n            result.append(group[-1] + group[:-1])  # reverse left → right\n        else:\n            result.append(group[1:] + group[0])  # reverse right → left\n    return ''.join(result)", "test": "def check(candidate):\n    import string, random\n\n    def reference_encode(s):\n        groups = [s[i:i+3] for i in range(0, len(s), 3)]\n        result = []\n        for i, g in enumerate(groups):\n            if len(g) < 3:\n                result.append(g)\n            else:\n                if i % 2 == 0:\n                    result.append(g[1:] + g[0])  # left\n                else:\n                    result.append(g[-1] + g[:-1])  # right\n        return ''.join(result)\n\n    for _ in range(100):\n        s = ''.join(random.choice(string.ascii_letters) for _ in range(random.randint(10, 30)))\n        encoded = reference_encode(s)\n        assert candidate(encoded) == s\n"}
{"task_id": "ExtendedEval/39_hard", "prompt": "def prime_fib(n: int) -> int:\n    \"\"\"\n    Returns the n-th number that is both a Fibonacci number and a prime,\n    and whose sum of digits is also a prime number.\n\n    >>> prime_fib(1)\n    2\n    >>> prime_fib(2)\n    3\n    >>> prime_fib(3)\n    5\n    >>> prime_fib(4)\n    89\n    >>> prime_fib(5)\n    514229\n    \n    Return only the Python function code.\n    \"\"\"\n", "entry_point": "prime_fib", "canonical_solution": "    import math\n\n    def is_prime(p):\n        if p < 2:\n            return False\n        for k in range(2, int(p ** 0.5) + 1):\n            if p % k == 0:\n                return False\n        return True\n\n    def digit_sum_is_prime(x):\n        return is_prime(sum(int(d) for d in str(x)))\n\n    f1, f2 = 0, 1\n    count = 0\n    while True:\n        f1, f2 = f2, f1 + f2\n        if is_prime(f2) and digit_sum_is_prime(f2):\n            count += 1\n            if count == n:\n                return f2\n", "test": "def check(candidate):\n    assert candidate(1) == 2\n    assert candidate(2) == 3\n    assert candidate(3) == 5\n    assert candidate(4) == 89\n    assert candidate(5) == 514229\n    assert candidate(6) == 433494437\n    assert candidate(7) == 2971215073\n"}
{"task_id": "ExtendedEval/38_hard", "prompt": "def encode_cyclic(s: str) -> str:\n    \"\"\"\n    Encodes the input string by rotating each group of 3 characters:\n    - Even-indexed groups: rotate left (e.g., 'abc' → 'bca')\n    - Odd-indexed groups: rotate right (e.g., 'abc' → 'cab')\n    Groups with fewer than 3 characters remain unchanged.\n    \"\"\"\n    groups = [s[i:i+3] for i in range(0, len(s), 3)]\n    result = []\n    for idx, group in enumerate(groups):\n        if len(group) < 3:\n            result.append(group)\n        elif idx % 2 == 0:\n            result.append(group[1:] + group[0])  # left\n        else:\n            result.append(group[-1] + group[:-1])  # right\n    return ''.join(result)\n\n\ndef decode_cyclic(s: str) -> str:\n    \"\"\"\n    Decodes a string previously encoded using encode_cyclic().\n    Reverses alternating left/right rotations.\n    \"\"\"\n    groups = [s[i:i+3] for i in range(0, len(s), 3)]\n    result = []\n    for idx, group in enumerate(groups):\n        if len(group) < 3:\n            result.append(group)\n        elif idx % 2 == 0:\n            result.append(group[-1] + group[:-1])  # reverse left → right\n        else:\n            result.append(group[1:] + group[0])  # reverse right → left\n    return ''.join(result)\n", "entry_point": "decode_cyclic", "canonical_solution": "    groups = [s[i:i+3] for i in range(0, len(s), 3)]\n    result = []\n    for idx, group in enumerate(groups):\n        if len(group) < 3:\n            result.append(group)\n        elif idx % 2 == 0:\n            result.append(group[-1] + group[:-1])  # reverse left → right\n        else:\n            result.append(group[1:] + group[0])  # reverse right → left\n    return ''.join(result)", "test": "def check(candidate):\n    import string, random\n\n    def reference_encode(s):\n        groups = [s[i:i+3] for i in range(0, len(s), 3)]\n        result = []\n        for i, g in enumerate(groups):\n            if len(g) < 3:\n                result.append(g)\n            else:\n                if i % 2 == 0:\n                    result.append(g[1:] + g[0])  # left\n                else:\n                    result.append(g[-1] + g[:-1])  # right\n        return ''.join(result)\n\n    for _ in range(100):\n        s = ''.join(random.choice(string.ascii_letters) for _ in range(random.randint(10, 30)))\n        encoded = reference_encode(s)\n        assert candidate(encoded) == s\n"}
{"task_id": "ExtendedEval/40", "prompt": "def triples_sum_to_zero(l: list):\n    \"\"\"\n    triples_sum_to_zero takes a list of integers as an input.\n    it returns True if there are three distinct elements in the list that\n    sum to zero, and False otherwise.\n\n    >>> triples_sum_to_zero([1, 3, 5, 0])\n    False\n    >>> triples_sum_to_zero([1, 3, -2, 1])\n    True\n    >>> triples_sum_to_zero([1, 2, 3, 7])\n    False\n    >>> triples_sum_to_zero([2, 4, -5, 3, 9, 7])\n    True\n    >>> triples_sum_to_zero([1])\n    False\n    \"\"\"\n", "entry_point": "triples_sum_to_zero", "canonical_solution": "    n = len(l)\n    if n < 3:\n        return False\n    \n    # Check all combinations of three distinct elements\n    for i in range(n):\n        for j in range(i + 1, n):\n            for k in range(j + 1, n):\n                if l[i] + l[j] + l[k] == 0:\n                    return True\n    return False", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate([1, 3, 5, 0]) == False\n    assert candidate([1, 3, 5, -1]) == False\n    assert candidate([1, 3, -2, 1]) == True\n    assert candidate([1, 2, 3, 7]) == False\n    assert candidate([1, 2, 5, 7]) == False\n    assert candidate([2, 4, -5, 3, 9, 7]) == True\n    assert candidate([1]) == False\n    assert candidate([1, 3, 5, -100]) == False\n    assert candidate([100, 3, 5, -100]) == False\n\n"}
{"task_id": "ExtendedEval/40", "prompt": "def triples_sum_to_zero(l: list):\n    \"\"\"\n    triples_sum_to_zero takes a list of integers as an input.\n    it returns True if there are three distinct elements in the list that\n    sum to zero, and False otherwise.\n\n    >>> triples_sum_to_zero([1, 3, 5, 0])\n    False\n    >>> triples_sum_to_zero([1, 3, -2, 1])\n    True\n    >>> triples_sum_to_zero([1, 2, 3, 7])\n    False\n    >>> triples_sum_to_zero([2, 4, -5, 3, 9, 7])\n    True\n    >>> triples_sum_to_zero([1])\n    False\n    \"\"\"\n", "entry_point": "triples_sum_to_zero", "canonical_solution": "    n = len(l)\n    if n < 3:\n        return False\n    \n    # Check all combinations of three distinct elements\n    for i in range(n):\n        for j in range(i + 1, n):\n            for k in range(j + 1, n):\n                if l[i] + l[j] + l[k] == 0:\n                    return True\n    return False", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate([1, 3, 5, 0]) == False\n    assert candidate([1, 3, 5, -1]) == False\n    assert candidate([1, 3, -2, 1]) == True\n    assert candidate([1, 2, 3, 7]) == False\n    assert candidate([1, 2, 5, 7]) == False\n    assert candidate([2, 4, -5, 3, 9, 7]) == True\n    assert candidate([1]) == False\n    assert candidate([1, 3, 5, -100]) == False\n    assert candidate([100, 3, 5, -100]) == False\n\n"}
{"task_id": "ExtendedEval/41", "prompt": "def car_race_collision(left_cars: int, right_cars: int) -> int:\n    \"\"\"\n    On an infinitely long straight road, `left_cars` cars are moving left to right,\n    while `right_cars` cars are moving right to left. All cars move with the same speed.\n\n    Each car moving left to right will collide once with each car moving right to left.\n    Collisions don't affect movement. Return the total number of collisions.\n\n    >>> car_race_collision(2, 3)\n    6\n    >>> car_race_collision(4, 1)\n    4\n    >>> car_race_collision(0, 5)\n    0\n    \"\"\"\n", "entry_point": "car_race_collision", "canonical_solution": "    return left_cars * right_cars\n", "test": "def check(candidate):\n    assert candidate(2, 2) == 4\n    assert candidate(3, 3) == 9\n    assert candidate(4, 4) == 16\n    assert candidate(8, 8) == 64\n    assert candidate(10, 10) == 100\n    assert candidate(2, 3) == 6\n    assert candidate(0, 5) == 0\n    assert candidate(5, 0) == 0\n    assert candidate(7, 2) == 14\n    assert candidate(123, 456) == 56088\n"}
{"task_id": "ExtendedEval/42", "prompt": "def incr_list(l: list) -> list:\n    \"\"\"\n    Returns a new list where:\n    - Each even number becomes (abs(n) + 2)\n    - Each odd number becomes (abs(n) + 3)\n\n    >>> incr_list([1, 2, 3])\n    [4, 4, 6]\n    >>> incr_list([-5, 4, -2, 7])\n    [8, 6, 4, 10]\n    >>> incr_list([])\n    []\n    \"\"\"\n", "entry_point": "incr_list", "canonical_solution": "    def transform(n):\n        n = abs(n)\n        return n + 2 if n % 2 == 0 else n + 3\n    return [transform(x) for x in l]\n", "test": "def check(candidate):\n    assert candidate([]) == []\n    assert candidate([1, 2, 3]) == [4, 4, 6]\n    assert candidate([-5, 4, -2, 7]) == [8, 6, 4, 10]\n    assert candidate([0, -1, -2, -3]) == [2, 4, 4, 6]\n    assert candidate([10, 15, -8]) == [12, 18, 10]\n    assert candidate([100]) == [102]\n"}
{"task_id": "ExtendedEval/43", "prompt": "def k_sum_to_zero(l: list, k: int) -> bool:\n    \"\"\"\n    Returns True if there are at least `k` distinct pairs of integers in the list `l`\n    that sum to zero. A pair (a, b) is considered the same as (b, a) and only counts once.\n\n    >>> k_sum_to_zero([2, -2, 4, -4, 1], 2)\n    True\n    >>> k_sum_to_zero([1, -1, 2, 3], 2)\n    False\n    >>> k_sum_to_zero([1, -1, 2, -2, 3, -3], 3)\n    True\n    >>> k_sum_to_zero([1, 2, 3], 1)\n    False\n    >>> k_sum_to_zero([], 1)\n    False\n    \"\"\"\n", "entry_point": "k_sum_to_zero", "canonical_solution": "    seen = set(l)\n    pairs = set()\n    for x in seen:\n        if -x in seen and x != 0:\n            pairs.add(frozenset((x, -x)))\n    if l.count(0) > 1:\n        pairs.add(frozenset((0, 0)))\n    return len(pairs) >= k\n", "test": "def check(candidate):\n    assert candidate([2, -2, 4, -4, 1], 2) == True  # (2,-2), (4,-4)\n    assert candidate([1, -1, 2, 3], 2) == False     # only (1,-1)\n    assert candidate([1, -1, 2, -2, 3, -3], 3) == True\n    assert candidate([1, -1, 2, -2, 3, -3], 4) == False\n    assert candidate([1, 2, 3], 1) == False\n    assert candidate([], 1) == False\n    assert candidate([0, 0], 1) == True\n    assert candidate([0, 0], 2) == False\n    assert candidate([0, 0, 0], 1) == True\n    assert candidate([0, 0, 0], 2) == False\n    assert candidate([0, 0, 0], 0) == True\n"}
{"task_id": "ExtendedEval/44", "prompt": "def change_base_ext(x: int, base: int) -> str:\n    \"\"\"\n    Convert integer `x` to a string representation in a given `base` (from 2 up to 36).\n    For digits >= 10, use uppercase letters (e.g., 10 → 'A', 15 → 'F', 35 → 'Z').\n    Supports negative numbers.\n\n    >>> change_base_ext(8, 3)\n    '22'\n    >>> change_base_ext(8, 2)\n    '1000'\n    >>> change_base_ext(31, 16)\n    '1F'\n    >>> change_base_ext(-10, 2)\n    '-1010'\n    >>> change_base_ext(35, 36)\n    'Z'\n    \"\"\"\n", "entry_point": "change_base_ext", "canonical_solution": "    if base < 2 or base > 36:\n        raise ValueError(\"Base must be between 2 and 36\")\n    digits = \"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ\"\n    negative = x < 0\n    x = abs(x)\n    if x == 0:\n        return '0'\n    result = ''\n    while x > 0:\n        result = digits[x % base] + result\n        x //= base\n    return '-' + result if negative else result\n", "test": "def check(candidate):\n    assert candidate(8, 3) == \"22\"\n    assert candidate(9, 3) == \"100\"\n    assert candidate(234, 2) == \"11101010\"\n    assert candidate(16, 2) == \"10000\"\n    assert candidate(31, 16) == \"1F\"\n    assert candidate(255, 16) == \"FF\"\n    assert candidate(35, 36) == \"Z\"\n    assert candidate(10, 10) == \"10\"\n    assert candidate(0, 2) == \"0\"\n    assert candidate(-10, 2) == \"-1010\"\n    assert candidate(-255, 16) == \"-FF\"\n    assert candidate(1, 2) == \"1\"\n    assert candidate(15, 16) == \"F\"\n"}
{"task_id": "ExtendedEval/44_hard", "prompt": "def change_base_ext(x: int, base: int) -> str:\n    \"\"\"\n    Convert integer `x` to a string representation in a given `base` (from 2 up to 36).\n    For digits >= 10, use uppercase letters (e.g., 10 → 'A', 15 → 'F', 35 → 'Z').\n    Supports negative numbers.\n\n    >>> change_base_ext(8, 3)\n    '22'\n    >>> change_base_ext(8, 2)\n    '1000'\n    >>> change_base_ext(31, 16)\n    '1F'\n    >>> change_base_ext(-10, 2)\n    '-1010'\n    >>> change_base_ext(35, 36)\n    'Z'\n    \"\"\"\n", "entry_point": "change_base_ext", "canonical_solution": "    if base < 2 or base > 36:\n        raise ValueError(\"Base must be between 2 and 36\")\n    digits = \"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ\"\n    negative = x < 0\n    x = abs(x)\n    if x == 0:\n        return '0'\n    result = ''\n    while x > 0:\n        result = digits[x % base] + result\n        x //= base\n    return '-' + result if negative else result\n", "test": "def check(candidate):\n    assert candidate(8, 3) == \"22\"\n    assert candidate(9, 3) == \"100\"\n    assert candidate(234, 2) == \"11101010\"\n    assert candidate(16, 2) == \"10000\"\n    assert candidate(31, 16) == \"1F\"\n    assert candidate(255, 16) == \"FF\"\n    assert candidate(35, 36) == \"Z\"\n    assert candidate(10, 10) == \"10\"\n    assert candidate(0, 2) == \"0\"\n    assert candidate(-10, 2) == \"-1010\"\n    assert candidate(-255, 16) == \"-FF\"\n    assert candidate(1, 2) == \"1\"\n    assert candidate(15, 16) == \"F\"\n"}
{"task_id": "ExtendedEval/45", "prompt": "def triangle_area(a, h):\n    \"\"\"Given length of a side and high return area for a triangle.\n    >>> triangle_area(5, 3)\n    7.5\n    \"\"\"\n", "entry_point": "triangle_area", "canonical_solution": "    return a * h / 2.0", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate(5, 3) == 7.5\n    assert candidate(2, 2) == 2.0\n    assert candidate(10, 8) == 40.0\n\n"}
{"task_id": "ExtendedEval/46", "prompt": "def fib4(n: int):\n    \"\"\"The Fib4 number sequence is a sequence similar to the Fibbonacci sequnece that's defined as follows:\n    fib4(0) -> 0\n    fib4(1) -> 0\n    fib4(2) -> 2\n    fib4(3) -> 0\n    fib4(n) -> fib4(n-1) + fib4(n-2) + fib4(n-3) + fib4(n-4).\n    Please write a function to efficiently compute the n-th element of the fib4 number sequence.  Do not use recursion.\n    >>> fib4(5)\n    4\n    >>> fib4(6)\n    8\n    >>> fib4(7)\n    14\n    \"\"\"\n", "entry_point": "fib4", "canonical_solution": "    if n < 4:\n        return [0, 0, 2, 0][n]\n    a, b, c, d = 0, 0, 2, 0\n    for _ in range(4, n + 1):\n        a, b, c, d = b, c, d, a + b + c + d\n    return d", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate(5) == 4\n    assert candidate(8) == 28\n    assert candidate(10) == 104\n    assert candidate(12) == 386\n\n"}
{"task_id": "ExtendedEval/48", "prompt": "import re\n\ndef is_clean_palindrome(text: str) -> bool:\n    \"\"\"\n    Returns True if the input string is a palindrome,\n    ignoring spaces, punctuation, and case.\n\n    >>> is_clean_palindrome('A man, a plan, a canal: Panama')\n    True\n    >>> is_clean_palindrome('No lemon, no melon!')\n    True\n    >>> is_clean_palindrome('Was it a car or a cat I saw?')\n    True\n    >>> is_clean_palindrome('Palindrome')\n    False\n    >>> is_clean_palindrome('')\n    True\n    \"\"\"\n", "entry_point": "is_clean_palindrome", "canonical_solution": "    cleaned = re.sub(r'[^a-z0-9]', '', text.lower())\n    return cleaned == cleaned[::-1]\n", "test": "def check(candidate):\n    assert candidate('A man, a plan, a canal: Panama') == True\n    assert candidate('No lemon, no melon!') == True\n    assert candidate('Was it a car or a cat I saw?') == True\n    assert candidate('Step on no pets') == True\n    assert candidate('Eva, can I see bees in a cave?') == True\n    assert candidate('Palindrome') == False\n    assert candidate('This is not one') == False\n    assert candidate('') == True\n    assert candidate('123321') == True\n    assert candidate('123 321') == True\n    assert candidate('123421') == False\n"}
{"task_id": "ExtendedEval/48_hard", "prompt": "import re\n\ndef is_clean_palindrome(text: str) -> bool:\n    \"\"\"\n    Returns True if the input string is a palindrome,\n    ignoring spaces, punctuation, and case.\n\n    >>> is_clean_palindrome('A man, a plan, a canal: Panama')\n    True\n    >>> is_clean_palindrome('No lemon, no melon!')\n    True\n    >>> is_clean_palindrome('Was it a car or a cat I saw?')\n    True\n    >>> is_clean_palindrome('Palindrome')\n    False\n    >>> is_clean_palindrome('')\n    True\n    \"\"\"\n", "entry_point": "is_clean_palindrome", "canonical_solution": "    cleaned = re.sub(r'[^a-z0-9]', '', text.lower())\n    return cleaned == cleaned[::-1]\n", "test": "def check(candidate):\n    assert candidate('A man, a plan, a canal: Panama') == True\n    assert candidate('No lemon, no melon!') == True\n    assert candidate('Was it a car or a cat I saw?') == True\n    assert candidate('Step on no pets') == True\n    assert candidate('Eva, can I see bees in a cave?') == True\n    assert candidate('Palindrome') == False\n    assert candidate('This is not one') == False\n    assert candidate('') == True\n    assert candidate('123321') == True\n    assert candidate('123 321') == True\n    assert candidate('123421') == False\n"}
{"task_id": "ExtendedEval/49", "prompt": "def modp(n: int, p: int):\n    \"\"\"Return 2^n modulo p (be aware of numerics).\n    >>> modp(3, 5)\n    3\n    >>> modp(1101, 101)\n    2\n    >>> modp(0, 101)\n    1\n    >>> modp(3, 11)\n    8\n    >>> modp(100, 101)\n    1\n    \"\"\"\n", "entry_point": "modp", "canonical_solution": "    return pow(2, n, p)", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate(3, 5) == 3\n    assert candidate(1101, 101) == 2\n    assert candidate(0, 101) == 1\n    assert candidate(3, 11) == 8\n    assert candidate(100, 101) == 1\n    assert candidate(30, 5) == 4\n    assert candidate(31, 5) == 3\n\n"}
{"task_id": "ExtendedEval/50", "prompt": "def encode_shift(s: str):\n    \"\"\"\n    returns encoded string by shifting every character by 5 in the alphabet.\n    \"\"\"\n    return \"\".join([chr(((ord(ch) + 5 - ord(\"a\")) % 26) + ord(\"a\")) for ch in s])\n\n\ndef decode_shift(s: str):\n    \"\"\"\n    takes as input string encoded with encode_shift function. Returns decoded string.\n    \"\"\"\n", "entry_point": "decode_shift", "canonical_solution": "    return \"\".join([chr(((ord(ch) - 5 - ord(\"a\")) % 26) + ord(\"a\")) for ch in s])", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    from random import randint, choice\n    import copy\n    import string\n\n    letters = string.ascii_lowercase\n    for _ in range(100):\n        str = ''.join(choice(letters) for i in range(randint(10, 20)))\n        encoded_str = encode_shift(str)\n        assert candidate(copy.deepcopy(encoded_str)) == str\n\n"}
{"task_id": "ExtendedEval/51", "prompt": "def remove_vowels_safely(text: str) -> str:\n    \"\"\"\n    Removes all vowels from the input string `text`,\n    except for characters inside single or double quotes.\n\n    Vowels are: a, e, i, o, u (case-insensitive).\n    The content within quotes (both '...' and \"...\") should remain untouched.\n\n    >>> remove_vowels_safely(\"hello world\")\n    'hll wrld'\n    >>> remove_vowels_safely(\"'hello' world\")\n    \"'hello' wrld\"\n    >>> remove_vowels_safely('She said \"amazing idea\" loudly.')\n    'Sh sd \"amazing idea\" ldly.'\n    >>> remove_vowels_safely('')\n    ''\n    >>> remove_vowels_safely(\"aeiou AEIOU\")\n    ' '\n    >>> remove_vowels_safely(\"'AEIOU' AEIOU\")\n    \"'AEIOU' \"\n    \"\"\"\n", "entry_point": "remove_vowels_safely", "canonical_solution": "    vowels = set('aeiouAEIOU')\n    result = []\n    in_single = False\n    in_double = False\n    for c in text:\n        if c == \"'\" and not in_double:\n            in_single = not in_single\n            result.append(c)\n        elif c == '\"' and not in_single:\n            in_double = not in_double\n            result.append(c)\n        elif in_single or in_double:\n            result.append(c)\n        elif c not in vowels:\n            result.append(c)\n    return ''.join(result)\n", "test": "def check(candidate):\n    assert candidate(\"hello world\") == \"hll wrld\"\n    assert candidate(\"'hello' world\") == \"'hello' wrld\"\n    assert candidate('She said \"amazing idea\" loudly.') == 'Sh sd \"amazing idea\" ldly.'\n    assert candidate('') == ''\n    assert candidate(\"aeiou AEIOU\") == ' '\n    assert candidate(\"'AEIOU' AEIOU\") == \"'AEIOU' \"\n    assert candidate('\"AEIOU\" aeiou') == '\"AEIOU\" '\n    assert candidate(\"This is 'only' a test\") == \"Ths s 'only'  tst\"\n    assert candidate(\"'vowels' and 'CONSONANTS'\") == \"'vowels' nd 'CONSONANTS'\"\n"}
{"task_id": "ExtendedEval/47", "prompt": "def median(l: list):\n    \"\"\"Return median of elements in the list l.\n    >>> median([3, 1, 2, 4, 5])\n    3\n    >>> median([-10, 4, 6, 1000, 10, 20])\n    8.0\n    \n    Return only the Python function code.\n    \"\"\"\n", "entry_point": "median", "canonical_solution": "    sorted_l = sorted(l)\n    n = len(sorted_l)\n    if n % 2 == 1:\n        return sorted_l[n // 2]\n    else:\n        return (sorted_l[n // 2 - 1] + sorted_l[n // 2]) / 2.0", "test": "def check(candidate):\n    assert candidate([3, 1, 2, 4, 5]) == 3\n    assert candidate([-10, 4, 6, 1000, 10, 20]) == 8.0\n    assert candidate([5]) == 5\n    assert candidate([6, 5]) == 5.5\n    assert candidate([8, 1, 3, 9, 9, 2, 7]) == 7\n"}
{"task_id": "ExtendedEval/51_hard", "prompt": "def remove_vowels_safely(text: str) -> str:\n    \"\"\"\n    Removes all vowels from the input string `text`,\n    except for characters inside single or double quotes.\n\n    Vowels are: a, e, i, o, u (case-insensitive).\n    The content within quotes (both '...' and \"...\") should remain untouched.\n\n    >>> remove_vowels_safely(\"hello world\")\n    'hll wrld'\n    >>> remove_vowels_safely(\"'hello' world\")\n    \"'hello' wrld\"\n    >>> remove_vowels_safely('She said \"amazing idea\" loudly.')\n    'Sh sd \"amazing idea\" ldly.'\n    >>> remove_vowels_safely('')\n    ''\n    >>> remove_vowels_safely(\"aeiou AEIOU\")\n    ' '\n    >>> remove_vowels_safely(\"'AEIOU' AEIOU\")\n    \"'AEIOU' \"\n    \"\"\"\n", "entry_point": "remove_vowels_safely", "canonical_solution": "    vowels = set('aeiouAEIOU')\n    result = []\n    in_single = False\n    in_double = False\n    for c in text:\n        if c == \"'\" and not in_double:\n            in_single = not in_single\n            result.append(c)\n        elif c == '\"' and not in_single:\n            in_double = not in_double\n            result.append(c)\n        elif in_single or in_double:\n            result.append(c)\n        elif c not in vowels:\n            result.append(c)\n    return ''.join(result)\n", "test": "def check(candidate):\n    assert candidate(\"hello world\") == \"hll wrld\"\n    assert candidate(\"'hello' world\") == \"'hello' wrld\"\n    assert candidate('She said \"amazing idea\" loudly.') == 'Sh sd \"amazing idea\" ldly.'\n    assert candidate('') == ''\n    assert candidate(\"aeiou AEIOU\") == ' '\n    assert candidate(\"'AEIOU' AEIOU\") == \"'AEIOU' \"\n    assert candidate('\"AEIOU\" aeiou') == '\"AEIOU\" '\n    assert candidate(\"This is 'only' a test\") == \"Ths s 'only'  tst\"\n    assert candidate(\"'vowels' and 'CONSONANTS'\") == \"'vowels' nd 'CONSONANTS'\"\n"}
{"task_id": "ExtendedEval/52", "prompt": "def below_threshold(l: list, t: int):\n    \"\"\"Return True if all numbers in the list l are below threshold t.\n    >>> below_threshold([1, 2, 4, 10], 100)\n    True\n    >>> below_threshold([1, 20, 4, 10], 5)\n    False\n    \"\"\"\n", "entry_point": "below_threshold", "canonical_solution": "    return all(x < t for x in l)", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate([1, 2, 4, 10], 100)\n    assert not candidate([1, 20, 4, 10], 5)\n    assert candidate([1, 20, 4, 10], 21)\n    assert candidate([1, 20, 4, 10], 22)\n    assert candidate([1, 8, 4, 10], 11)\n    assert not candidate([1, 8, 4, 10], 10)\n\n"}
{"task_id": "ExtendedEval/53", "prompt": "def bitwise_add(x: int, y: int) -> int:\n    \"\"\"\n    Adds two integers `x` and `y` without using the `+` operator or `sum()`.\n    Only bitwise operations are allowed.\n\n    Handles positive and negative integers (32-bit signed).\n\n    >>> bitwise_add(2, 3)\n    5\n    >>> bitwise_add(5, 7)\n    12\n    >>> bitwise_add(-5, 7)\n    2\n    >>> bitwise_add(-10, -20)\n    -30\n    \"\"\"\n", "entry_point": "bitwise_add", "canonical_solution": "    MAX = 0x7FFFFFFF\n    MASK = 0xFFFFFFFF\n    while y != 0:\n        carry = (x & y) & MASK\n        x = (x ^ y) & MASK\n        y = (carry << 1) & MASK\n    return x if x <= MAX else ~(x ^ MASK)\n", "test": "def check(candidate):\n    assert candidate(2, 3) == 5\n    assert candidate(5, 7) == 12\n    assert candidate(-5, 7) == 2\n    assert candidate(0, 0) == 0\n    assert candidate(1, 0) == 1\n    assert candidate(0, 1) == 1\n    assert candidate(-10, -20) == -30\n    assert candidate(123, -23) == 100\n    assert candidate(-123, 23) == -100\n    assert candidate(999999, 1) == 1000000\n    assert candidate(-1000000, -1) == -1000001\n\n    import random\n    for _ in range(100):\n        x = random.randint(-10**6, 10**6)\n        y = random.randint(-10**6, 10**6)\n        assert candidate(x, y) == x + y\n"}
{"task_id": "ExtendedEval/53_hard", "prompt": "def bitwise_add(x: int, y: int) -> int:\n    \"\"\"\n    Adds two integers `x` and `y` without using the `+` operator or `sum()`.\n    Only bitwise operations are allowed.\n\n    Handles positive and negative integers (32-bit signed).\n\n    >>> bitwise_add(2, 3)\n    5\n    >>> bitwise_add(5, 7)\n    12\n    >>> bitwise_add(-5, 7)\n    2\n    >>> bitwise_add(-10, -20)\n    -30\n    \"\"\"\n", "entry_point": "bitwise_add", "canonical_solution": "    MAX = 0x7FFFFFFF\n    MASK = 0xFFFFFFFF\n    while y != 0:\n        carry = (x & y) & MASK\n        x = (x ^ y) & MASK\n        y = (carry << 1) & MASK\n    return x if x <= MAX else ~(x ^ MASK)\n", "test": "def check(candidate):\n    assert candidate(2, 3) == 5\n    assert candidate(5, 7) == 12\n    assert candidate(-5, 7) == 2\n    assert candidate(0, 0) == 0\n    assert candidate(1, 0) == 1\n    assert candidate(0, 1) == 1\n    assert candidate(-10, -20) == -30\n    assert candidate(123, -23) == 100\n    assert candidate(-123, 23) == -100\n    assert candidate(999999, 1) == 1000000\n    assert candidate(-1000000, -1) == -1000001\n\n    import random\n    for _ in range(100):\n        x = random.randint(-10**6, 10**6)\n        y = random.randint(-10**6, 10**6)\n        assert candidate(x, y) == x + y\n"}
{"task_id": "ExtendedEval/54", "prompt": "def same_chars(s0: str, s1: str):\n    \"\"\"\n    Check if two words have the same characters.\n    >>> same_chars('eabcdzzzz', 'dddzzzzzzzddeddabc')\n    True\n    >>> same_chars('abcd', 'dddddddabc')\n    True\n    >>> same_chars('dddddddabc', 'abcd')\n    True\n    >>> same_chars('eabcd', 'dddddddabc')\n    False\n    >>> same_chars('abcd', 'dddddddabce')\n    False\n    >>> same_chars('eabcdzzzz', 'dddzzzzzzzddddabc')\n    False\n    \"\"\"\n", "entry_point": "same_chars", "canonical_solution": "    return set(s0) == set(s1)", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate('eabcdzzzz', 'dddzzzzzzzddeddabc') == True\n    assert candidate('abcd', 'dddddddabc') == True\n    assert candidate('dddddddabc', 'abcd') == True\n    assert candidate('eabcd', 'dddddddabc') == False\n    assert candidate('abcd', 'dddddddabcf') == False\n    assert candidate('eabcdzzzz', 'dddzzzzzzzddddabc') == False\n    assert candidate('aabb', 'aaccc') == False\n\n"}
{"task_id": "ExtendedEval/55", "prompt": "def fib(n: int):\n    \"\"\"Return n-th Fibonacci number.\n    >>> fib(10)\n    55\n    >>> fib(1)\n    1\n    >>> fib(8)\n    21\n    \"\"\"\n", "entry_point": "fib", "canonical_solution": "    if n == 0:\n        return 0\n    if n == 1:\n        return 1\n    a, b = 0, 1\n    for _ in range(2, n + 1):\n        a, b = b, a + b\n    return b", "test": "\n\nMETADATA = {}\n\n\ndef check(candidate):\n    assert candidate(10) == 55\n    assert candidate(1) == 1\n    assert candidate(8) == 21\n    assert candidate(11) == 89\n    assert candidate(12) == 144\n\n"}
{"task_id": "ExtendedEval/56", "prompt": "def correct_bracketing_extended(s: str) -> bool:\n    \"\"\"\n    Checks if all types of brackets in the string are correctly balanced and nested.\n    Valid brackets: (), {}, [], <>\n\n    >>> correct_bracketing_extended(\"<([])>\")\n    True\n    >>> correct_bracketing_extended(\"<([)]>\")\n    False\n    >>> correct_bracketing_extended(\"(((([[]]))))\")\n    True\n    >>> correct_bracketing_extended(\"((([[[]])))\")\n    False\n    >>> correct_bracketing_extended(\"\")\n    True\n    \"\"\"\n", "entry_point": "correct_bracketing_extended", "canonical_solution": "    stack = []\n    pairs = {')': '(', ']': '[', '}': '{', '>': '<'}\n    for c in s:\n        if c in '([{<':\n            stack.append(c)\n        elif c in ')]}>':\n            if not stack or stack[-1] != pairs[c]:\n                return False\n            stack.pop()\n    return not stack\n", "test": "def check(candidate):\n    assert candidate(\"<([])>\") == True\n    assert candidate(\"<([)]>\") == False\n    assert candidate(\"(((([[]]))))\") == True\n    assert candidate(\"((([[[]]))))\") == False\n    assert candidate(\"\") == True\n    assert candidate(\"[({(<()>)}])\") == True\n    assert candidate(\"[({(<()>)}]>\") == False\n    assert candidate(\"([{}])\") == True\n    assert candidate(\"([{})\") == False\n    assert candidate(\"(()()())\") == True\n    assert candidate(\"(()(()\") == False\n    assert candidate(\"<{[()]}>\") == True\n    assert candidate(\"<{[(])}>\") == False\n"}
{"task_id": "ExtendedEval/57", "prompt": "def monotonic(lst: list):\n    \"\"\"\n    Return True if the list elements are monotonically increasing, decreasing,\n    or contain plateaus (equal adjacent elements) while maintaining order.\n    The list is considered monotonic if it does not switch between increasing and decreasing.\n    >>> monotonic([1, 2, 2, 3])\n    True\n    >>> monotonic([5, 4, 4, 3])\n    True\n    >>> monotonic([1, 3, 2])\n    False\n    >>> monotonic([1])\n    True\n    >>> monotonic([])\n    True\n    \n    Return only the Python function code.\n    \"\"\"\n", "entry_point": "monotonic", "canonical_solution": "    return lst == sorted(lst) or lst == sorted(lst, reverse=True)", "test": "def check(candidate):\n    assert candidate([1, 2, 2, 3]) == True\n    assert candidate([5, 4, 4, 3]) == True\n    assert candidate([1, 3, 2]) == False\n    assert candidate([1]) == True\n    assert candidate([]) == True\n    assert candidate([10, 9, 9, 9, 2]) == True\n    assert candidate([3, 3, 3, 3]) == True\n    assert candidate([1, 2, 3, 2, 1]) == False\n    assert candidate([1, 1, 2, 1]) == False\n"}
{"task_id": "ExtendedEval/58", "prompt": "def common(l1: list, l2: list):\n    \"\"\"\n    Return a sorted list of unique elements that are common to both input lists.\n    The comparison should ignore types when comparing (e.g., 1 == 1.0).\n    >>> common([1, 2.0, 3], [3, 1.0, 4])\n    [1, 3]\n    >>> common(['1', 2, 3], [1, 2, 3])\n    [2, 3]\n    \n    Return only the Python function code.\n    \"\"\"\n", "entry_point": "common", "canonical_solution": "    return sorted(set(x for x in l1 for y in l2 if x == y))", "test": "def check(candidate):\n    assert candidate([1, 2.0, 3], [3, 1.0, 4]) == [1, 3]\n    assert candidate(['1', 2, 3], [1, 2, 3]) == [2, 3]\n    assert candidate([5, 3, 2, 8], [3, 2]) == [2, 3]\n    assert candidate([4, 3, 2, 8], [3, 2, 4]) == [2, 3, 4]\n    assert candidate([4, 3, 2, 8], []) == []\n    assert candidate(['a', 'b', 'c'], ['c', 'a']) == ['a', 'c']\n    assert candidate(['x', 'y'], ['z']) == []\n    assert candidate([], ['a']) == []\n"}
{"task_id": "ExtendedEval/59", "prompt": "def largest_prime_factor(n: int):\n    \"\"\"\n    Return the largest prime factor of n using trial division.\n    Handles very large numbers efficiently by iterating only up to sqrt(n).\n    >>> largest_prime_factor(13195)\n    29\n    >>> largest_prime_factor(2048)\n    2\n    >>> largest_prime_factor(1000003 * 2)\n    1000003\n    \n    Return only the Python function code.\n    \"\"\"\n", "entry_point": "largest_prime_factor", "canonical_solution": "    i, max_prime = 2, -1\n    while i * i <= n:\n        if n % i == 0:\n            max_prime = i\n            while n % i == 0: n //= i\n        i += 1\n    return max(max_prime, n)", "test": "def check(candidate):\n    assert candidate(15) == 5\n    assert candidate(27) == 3\n    assert candidate(63) == 7\n    assert candidate(330) == 11\n    assert candidate(13195) == 29\n    assert candidate(2048) == 2\n    assert candidate(2 * 3 * 5 * 7 * 11 * 13) == 13\n    assert candidate(99991 * 101) == 99991\n    assert candidate(1000003 * 2) == 1000003\n    assert candidate(1000000000039) == 1000000000039\n"}
{"task_id": "ExtendedEval/60", "prompt": "def sum_to_n(n: int):\n    \"\"\"\n    Return the sum of all integers from 1 up to and including n.\n    If n is 0 or negative, the function should return 0.\n    The function should be efficient and handle large n values.\n\n    >>> sum_to_n(30)\n    465\n    >>> sum_to_n(0)\n    0\n    >>> sum_to_n(-10)\n    0\n    >>> sum_to_n(1000000)\n    500000500000\n    \n    Return only the Python function code.\n    \"\"\"\n", "entry_point": "sum_to_n", "canonical_solution": "    return n * (n + 1) // 2 if n > 0 else 0", "test": "def check(candidate):\n    assert candidate(1) == 1\n    assert candidate(6) == 21\n    assert candidate(11) == 66\n    assert candidate(30) == 465\n    assert candidate(100) == 5050\n    assert candidate(0) == 0\n    assert candidate(-5) == 0\n    assert candidate(1000) == 500500\n    assert candidate(10**6) == 500000500000\n"}
{"task_id": "ExtendedEval/61", "prompt": "def correct_bracketing(brackets: str):\n    \"\"\"\n    Given a string composed of '(' and ')', return True if the brackets are correctly matched.\n    That is, each opening bracket must be closed in the correct order.\n    An empty string is considered valid.\n\n    >>> correct_bracketing(\"(()())\")\n    True\n    >>> correct_bracketing(\"(()\")\n    False\n    >>> correct_bracketing(\"\")\n    True\n    >>> correct_bracketing(\"(()(()))\")\n    True\n    >>> correct_bracketing(\")(\")\n    False\n    \n    Return only the Python function code.\n    \"\"\"\n", "entry_point": "correct_bracketing", "canonical_solution": "    d=0\n    for b in brackets:\n        d+=(1 if b==\"(\" else -1)\n        if d<0: return False\n    return d==0", "test": "def check(candidate):\n    assert candidate(\"\") == True\n    assert candidate(\"(\") == False\n    assert candidate(\")\") == False\n    assert candidate(\"()\") == True\n    assert candidate(\"(()())\") == True\n    assert candidate(\"()()(()())()\") == True\n    assert candidate(\"(()\") == False\n    assert candidate(\")(()\") == False\n    assert candidate(\"((()())))\") == False\n    assert candidate(\"(((())))\") == True\n    assert candidate(\"(()(()))\") == True\n    assert candidate(\"(()(()(())))\") == True\n    assert candidate(\"(()(()(()))\") == False\n"}
{"task_id": "ExtendedEval/62", "prompt": "def derivative(xs: list):\n    \"\"\"\n    Given a list of coefficients representing a polynomial\n    (e.g., xs[0] + xs[1]*x + xs[2]*x^2 + ...), return a list of the\n    coefficients of its first derivative.\n\n    The resulting list should be empty for constant polynomials.\n    >>> derivative([3, 1, 2, 4, 5])\n    [1, 4, 12, 20]\n    >>> derivative([1])\n    []\n    >>> derivative([0, 0, 0, 4])\n    [0, 0, 12]\n    \n    Return only the Python function code.\n    \"\"\"\n", "entry_point": "derivative", "canonical_solution": "    return [i*x for i,x in enumerate(xs)][1:]", "test": "def check(candidate):\n    assert candidate([3, 1, 2, 4, 5]) == [1, 4, 12, 20]\n    assert candidate([1, 2, 3]) == [2, 6]\n    assert candidate([3, 2, 1]) == [2, 2]\n    assert candidate([3, 2, 1, 0, 4]) == [2, 2, 0, 16]\n    assert candidate([1]) == []\n    assert candidate([0, 0, 0, 4]) == [0, 0, 12]\n    assert candidate([5, 0, -2]) == [0, -4]\n    assert candidate([0]*10) == [0]*9\n"}
{"task_id": "ExtendedEval/63", "prompt": "def fibfib(n: int):\n    \"\"\"\n    Return the n-th number in the 'fibfib' sequence:\n    - fibfib(0) = 0\n    - fibfib(1) = 0\n    - fibfib(2) = 1\n    - fibfib(n) = fibfib(n-1) + fibfib(n-2) + fibfib(n-3)\n\n    The implementation must be efficient for n > 30.\n    >>> fibfib(5)\n    4\n    >>> fibfib(8)\n    24\n    >>> fibfib(10)\n    81\n    >>> fibfib(0)\n    0\n    \n    Return only the Python function code.\n    \"\"\"\n", "entry_point": "fibfib", "canonical_solution": "    a,b,c=0,0,1\n    for _ in range(n):a,b,c=b,c,a+b+c\n    return a", "test": "def check(candidate):\n    assert candidate(0) == 0\n    assert candidate(1) == 0\n    assert candidate(2) == 1\n    assert candidate(3) == 1\n    assert candidate(4) == 2\n    assert candidate(5) == 4\n    assert candidate(6) == 7\n    assert candidate(7) == 13\n    assert candidate(8) == 24\n    assert candidate(9) == 44\n    assert candidate(10) == 81\n    assert candidate(12) == 274\n    assert candidate(14) == 927\n"}
{"task_id": "ExtendedEval/64", "prompt": "def vowels_count(s: str):\n    \"\"\"\n    Count the number of vowels in a string (a, e, i, o, u).\n    Additionally count 'y' or 'Y' as a vowel if it appears at the end of the string.\n    The function is case-insensitive.\n\n    >>> vowels_count(\"abcde\")\n    2\n    >>> vowels_count(\"key\")\n    2\n    >>> vowels_count(\"Y\")\n    1\n    >>> vowels_count(\"myth\")\n    0\n    >>> vowels_count(\"sympathy\")\n    3\n    \n    Return only the Python function code.\n    \"\"\"\n", "entry_point": "vowels_count", "canonical_solution": "    return sum(c.lower() in 'aeiou' for c in s) + int(s.lower().endswith('y'))", "test": "def check(candidate):\n    assert candidate(\"abcde\") == 2\n    assert candidate(\"Alone\") == 3\n    assert candidate(\"key\") == 2\n    assert candidate(\"bye\") == 1\n    assert candidate(\"keY\") == 2\n    assert candidate(\"bYe\") == 1\n    assert candidate(\"ACEDY\") == 3\n    assert candidate(\"myth\") == 0\n    assert candidate(\"sympathy\") == 3\n    assert candidate(\"Y\") == 1\n    assert candidate(\"tricky\") == 2\n"}
{"task_id": "ExtendedEval/65", "prompt": "\ndef circular_shift(x, shift):\n    \"\"\"Given an integer x, perform a circular right shift on its decimal digits by 'shift' positions.\n    If 'shift' is greater than the number of digits, return the digits reversed.\n    The output must preserve leading zeros from rotation, and always be returned as a string.\n    >>> circular_shift(987654321, 3)\n    '321987654'\n    >>> circular_shift(1200, 2)\n    '0012'\n    >>> circular_shift(11, 100)  # shift > digits -> reversed\n    '11'\n    \"\"\"\n", "entry_point": "circular_shift", "canonical_solution": "    s = str(x)\n    return s[::-1] if shift > len(s) else s[-shift:] + s[:-shift]", "test": "def check(candidate):\n    assert candidate(987654321, 3) == '321987654'\n    assert candidate(1200, 2) == '0012'\n    assert candidate(98765, 5) == '98765'\n    assert candidate(11, 100) == '11'\n    assert candidate(8, 1) == '8'\n    assert candidate(111000, 2) == '001110'\n    assert candidate(123456, 0) == '123456'\n"}
{"task_id": "ExtendedEval/66", "prompt": "\ndef digitSum(s):\n    \"\"\"Return the total ASCII sum of uppercase letters only from a string 's'.\n    Any digits, symbols or lowercase characters must be ignored. The string may contain Unicode characters.\n    >>> digitSum('abCDZ')\n    201\n    >>> digitSum('hello, 世界A')\n    65\n    >>> digitSum('123ABCabc!@#')\n    198\n    \"\"\"\n", "entry_point": "digitSum", "canonical_solution": "    return sum(ord(c) for c in s if 'A' <= c <= 'Z')", "test": "def check(candidate):\n    assert candidate('abCDZ') == 201\n    assert candidate('hello, 世界A') == 65\n    assert candidate('123ABCabc!@#') == 198\n    assert candidate('') == 0\n    assert candidate('αβγΔΕΖ') == 0\n    assert candidate('ABCDEF') == 65+66+67+68+69+70\n    assert candidate('lowerUPPER') == sum(ord(c) for c in 'UPPER')\n"}
{"task_id": "ExtendedEval/67", "prompt": "\ndef fruit_distribution(s, n):\n    \"\"\"Given a string with quantities of apples and oranges like '23 apples and 17 oranges'\n    and an integer n representing the total number of fruits in the basket,\n    return the number of mangoes (n - apples - oranges).\n    The function should ignore malformed words and parse the first two integers in the string.\n    >>> fruit_distribution('23 apples and 17 oranges', 50)\n    10\n    >>> fruit_distribution('2 apples and 3 oranges plus some pears', 10)\n    5\n    >>> fruit_distribution('No apples or oranges', 5)\n    5\n    \"\"\"\n", "entry_point": "fruit_distribution", "canonical_solution": "    nums = [int(x) for x in s.split() if x.isdigit()]\n    return n - sum(nums[:2]) if len(nums) >= 2 else n - sum(nums)", "test": "def check(candidate):\n    assert candidate('23 apples and 17 oranges', 50) == 10\n    assert candidate('2 apples and 3 oranges plus some pears', 10) == 5\n    assert candidate('0 apples and 0 oranges', 5) == 5\n    assert candidate('123 apples and', 200) == 77\n    assert candidate('apples oranges bananas', 30) == 30\n    assert candidate('5 6', 15) == 4\n    assert candidate('1 apples and 100 oranges', 120) == 19\n"}
{"task_id": "ExtendedEval/68", "prompt": "\ndef pluck(arr):\n    \"\"\"Given a list of integers representing nodes on a tree branch, return the [smallest even number, its index].\n    - If no even numbers exist, return [].\n    - If multiple even numbers have the same minimum value, pick the one with the lowest index.\n    - Negative numbers and very large integers can appear.\n    >>> pluck([13, 4, 2, -4, 3, 4])\n    [-4, 3]\n    >>> pluck([7, 9, 11])\n    []\n    >>> pluck([0, 0, -2, 2])\n    [-2, 2]\n    \"\"\"\n", "entry_point": "pluck", "canonical_solution": "    if not arr: return []\n    min_val, idx = float('inf'), -1\n    for i, val in enumerate(arr):\n        if val % 2 == 0 and val < min_val:\n            min_val, idx = val, i\n    return [min_val, idx] if idx != -1 else []", "test": "def check(candidate):\n    assert candidate([13, 4, 2, -4, 3, 4]) == [-4, 3]\n    assert candidate([7, 9, 11]) == []\n    assert candidate([0, 0, -2, 2]) == [-2, 2]\n    assert candidate([-8, -6, -6]) == [-8, 0]\n    assert candidate([]) == []\n"}
{"task_id": "ExtendedEval/69", "prompt": "\ndef search(lst):\n    '''Given a non-empty list of positive integers, return the largest value x such that x appears at least x times.\n    Return -1 if no such value exists.\n    >>> search([3,3,3,2,2,1])\n    3\n    >>> search([1,1,2,2,2,3,3,3,3])\n    3\n    >>> search([5,4,4])\n    -1\n    '''\n", "entry_point": "search", "canonical_solution": "    from collections import Counter\n    c = Counter(lst)\n    return max((x for x in c if c[x] >= x), default=-1)", "test": "def check(candidate):\n    assert candidate([3,3,3,2,2,1]) == 3\n    assert candidate([1,1,2,2,2,3,3,3,3]) == 3\n    assert candidate([5,4,4]) == -1\n    assert candidate([6]*7 + [5]*5 + [4]*4) == 6\n    assert candidate([1]*1 + [2]*1 + [3]*2 + [4]*3) == 3\n"}
{"task_id": "ExtendedEval/70", "prompt": "\ndef strange_sort_list(lst):\n    '''Sort the list in alternating order: smallest, largest, second smallest, second largest, etc.\n    If numbers repeat, they should still be treated as distinct entries.\n    The final result must contain all original values.\n    >>> strange_sort_list([10, 20, 30, 40])\n    [10, 40, 20, 30]\n    >>> strange_sort_list([1,1,1,1])\n    [1,1,1,1]\n    >>> strange_sort_list([])\n    []\n    '''\n", "entry_point": "strange_sort_list", "canonical_solution": "    lst = sorted(lst)\n    res = []\n    while lst:\n        res.append(lst.pop(0))\n        if lst:\n            res.append(lst.pop(-1))\n    return res", "test": "def check(candidate):\n    assert candidate([10, 20, 30, 40]) == [10, 40, 20, 30]\n    assert candidate([1,1,1,1]) == [1,1,1,1]\n    assert candidate([]) == []\n    assert candidate([3,1,2]) == [1,3,2]\n    assert candidate([7,2,8,4,1]) == [1,8,2,7,4]\n"}
{"task_id": "ExtendedEval/71", "prompt": "def triangle_area(a, b, c):\n    '''\n    Given the lengths of three triangle sides (float or int), return the area rounded to 2 decimals.\n    Return -1 if the sides do not form a valid triangle based on triangle inequality.\n    Handles very small or very large float inputs.\n    Example:\n        triangle_area(3, 4, 5) == 6.00\n        triangle_area(1e-9, 1e-9, 2e-9) == -1\n    '''\n", "entry_point": "triangle_area", "canonical_solution": "    if a + b <= c or a + c <= b or b + c <= a:\n        return -1\n    s = (a + b + c) / 2\n    area = (s * (s - a) * (s - b) * (s - c)) ** 0.5\n    return round(area, 2)", "test": "def check(candidate):\n    assert candidate(3, 4, 5) == 6.00\n    assert candidate(1, 2, 10) == -1\n    assert candidate(4, 8, 5) == 8.18\n    assert candidate(2, 2, 2) == 1.73\n    assert candidate(1e-9, 1e-9, 1e-9) == 0.0\n    assert candidate(10, 5, 7) == 16.25\n    assert candidate(1, 1, 1) == 0.43\n    assert candidate(1e-9, 1e-9, 2e-9) == -1\n"}
{"task_id": "ExtendedEval/72", "prompt": "def will_it_fly(q, w):\n    '''\n    Return True if list q is a palindrome AND sum(q) <= w.\n    Handles floats, negative numbers, and precision-sensitive weights.\n    The list is considered balanced only if q == q[::-1].\n    Example:\n        will_it_fly([3,2,3], 9) == True\n        will_it_fly([1,2], 5) == False\n    '''\n", "entry_point": "will_it_fly", "canonical_solution": "    return q == q[::-1] and sum(q) <= w", "test": "def check(candidate):\n    assert candidate([3, 2, 3], 9) == True\n    assert candidate([1, 2], 5) == False\n    assert candidate([3], 5) == True\n    assert candidate([3, 2, 3], 1) == False\n    assert candidate([1, 2, 1], 4) == True\n    assert candidate([1.5, 0.5, 1.5], 3.5) == True\n    assert candidate([1e-9, 2e-9, 1e-9], 5e-9) == True\n    assert candidate([1e-9, 2e-9, 1e-9], 1e-9) == False\n    assert candidate([1, 1, 2], 4) == False\n"}
{"task_id": "ExtendedEval/73", "prompt": "def smallest_change(arr):\n    '''\n    Given a list of integers, return the minimum number of changes needed to make it a palindrome.\n    A change means replacing an element with any other integer.\n    Handles very large and negative numbers.\n    Example:\n        smallest_change([1, 2, 3, 5, 4, 7, 9, 6]) == 4\n        smallest_change([1, 2, 3, 2, 1]) == 0\n    \n    Return only the Python function code.\n    '''\n", "entry_point": "smallest_change", "canonical_solution": "    return sum(1 for i in range(len(arr)//2) if arr[i] != arr[-(i+1)])", "test": "def check(candidate):\n    assert candidate([1, 2, 3, 5, 4, 7, 9, 6]) == 4\n    assert candidate([1, 2, 3, 4, 3, 2, 2]) == 1\n    assert candidate([1, 4, 2]) == 1\n    assert candidate([1, 4, 4, 2]) == 1\n    assert candidate([1, 2, 3, 2, 1]) == 0\n    assert candidate([3, 1, 1, 3]) == 0\n    assert candidate([1]) == 0\n    assert candidate([0, 1]) == 1\n    assert candidate([1e9, 2e9, 1e9]) == 0\n    assert candidate([-1, -2, -1]) == 0\n    assert candidate([-1, 0, -2]) == 2\n"}
{"task_id": "ExtendedEval/74", "prompt": "def total_match(lst1, lst2):\n    '''\n    Write a function that accepts two lists of strings and returns the list with fewer total characters.\n    If both lists contain the same number of characters, return the lexicographically smaller one.\n    \n    Return only the Python function code.\n    '''\n", "entry_point": "total_match", "canonical_solution": "    len1 = sum(len(s) for s in lst1)\n    len2 = sum(len(s) for s in lst2)\n    if len1 < len2:\n        return lst1\n    elif len2 < len1:\n        return lst2\n    return min(lst1, lst2) if lst1 != lst2 else lst1", "test": "def check(candidate):\n    assert candidate([], []) == []\n    assert candidate(['abc'], ['a', 'b', 'c']) == ['a', 'b', 'c']\n    assert candidate(['ab', 'cd'], ['abcd']) == ['abcd']\n    assert candidate(['1', '22', '333'], ['1', '2', '3']) == ['1', '2', '3']\n    assert candidate(['x' * 5], ['y' * 5]) == ['x' * 5]\n    assert candidate(['hi', 'admin'], ['hi', 'hi']) == ['hi', 'hi']\n    assert candidate(['z'], ['a', 'b', 'c', 'd']) == ['z']\n"}
{"task_id": "ExtendedEval/75", "prompt": "def is_multiply_prime(a):\n    '''\n    Return True if integer a is the product of three *distinct* prime numbers.\n    Numbers like 2*2*3 (not distinct) or 2*2*2 are not valid.\n    \n    Return only the Python function code.\n    '''\n", "entry_point": "is_multiply_prime", "canonical_solution": "    def is_prime(n):\n        if n < 2: return False\n        for i in range(2, int(n ** 0.5) + 1):\n            if n % i == 0:\n                return False\n        return True\n    primes = [i for i in range(2, 100) if is_prime(i)]\n    for i in range(len(primes)):\n        for j in range(i + 1, len(primes)):\n            for k in range(j + 1, len(primes)):\n                if primes[i] * primes[j] * primes[k] == a:\n                    return True\n    return False", "test": "def check(candidate):\n    assert candidate(30) == True  # 2*3*5\n    assert candidate(2*3*5) == True\n    assert candidate(2*2*3) == False\n    assert candidate(11*13*17) == True\n    assert candidate(7*7*7) == False\n    assert candidate(2*3*7) == True\n    assert candidate(97) == False\n    assert candidate(1) == False\n    assert candidate(2*3*5*7) == False\n"}
{"task_id": "ExtendedEval/76", "prompt": "def is_simple_power(x, n):\n    '''\n    Return True if x is an exact power of n (i.e., exists integer k such that n**k == x), False otherwise.\n    Handle edge cases: x=1 is a power of any n (except n=0), n=1 means x must be 1.\n    \n    Return only the Python function code.\n    '''\n", "entry_point": "is_simple_power", "canonical_solution": "    if n <= 1:\n        return x == 1 if n == 1 else False\n    p = 1\n    while p < x:\n        p *= n\n    return p == x", "test": "def check(candidate):\n    assert candidate(1, 1) == True\n    assert candidate(1, 2) == True\n    assert candidate(16, 2) == True\n    assert candidate(9, 3) == True\n    assert candidate(81, 3) == True\n    assert candidate(82, 3) == False\n    assert candidate(100, 10) == True\n    assert candidate(64, 8) == True\n    assert candidate(128, 2) == True\n    assert candidate(1024, 2) == True\n    assert candidate(243, 3) == True\n    assert candidate(3, 1) == False\n    assert candidate(4, 0) == False\n"}
{"task_id": "ExtendedEval/77", "prompt": "def iscube(a):\n    '''\n    Determine whether the given integer 'a' is a perfect cube of an integer,\n    including for large magnitude negative numbers and edge cases. Account for potential\n    floating point rounding issues in cube root calculation.\n    Examples:\n    iscube(27) => True\n    iscube(-27) => True\n    iscube(26) => False\n    iscube(1000000) => True\n    iscube(999999) => False\n    \n    Return only the Python function code.\n    '''\n", "entry_point": "iscube", "canonical_solution": "    if a == 0:\n        return True\n    abs_cbrt = round(abs(a) ** (1. / 3))\n    return abs_cbrt ** 3 == abs(a) and (a >= 0 or (-abs_cbrt) ** 3 == a)", "test": "def check(candidate):\n    assert candidate(1) == True\n    assert candidate(8) == True\n    assert candidate(-8) == True\n    assert candidate(27) == True\n    assert candidate(28) == False\n    assert candidate(-27) == True\n    assert candidate(0) == True\n    assert candidate(1000000) == True\n    assert candidate(999999) == False\n    assert candidate(-1000000) == True\n    assert candidate(-729) == True\n    assert candidate(2) == False\n    assert candidate(17) == False\n"}
{"task_id": "ExtendedEval/78", "prompt": "def hex_key(num):\n    '''\n    Count the number of prime digits in a hexadecimal string.\n    Consider digits 2, 3, 5, 7, B (11), and D (13) as primes. The input string may include\n    a mix of digits and uppercase hexadecimal letters. Ignore any invalid characters.\n    Examples:\n    hex_key('ABCDEF') => 2\n    hex_key('BEEF') => 2\n    hex_key('1234567890') => 3\n    \n    Return only the Python function code.\n    '''\n", "entry_point": "hex_key", "canonical_solution": "    primes = {'2', '3', '5', '7', 'B', 'D'}\n    return sum(1 for ch in num if ch in primes)", "test": "def check(candidate):\n    assert candidate('AB') == 1\n    assert candidate('1077E') == 2\n    assert candidate('ABED1A33') == 4\n    assert candidate('2020') == 2\n    assert candidate('123456789ABCDEF0') == 6\n    assert candidate('BEEF') == 2\n    assert candidate('') == 0\n    assert candidate('FFFFFFFF') == 0\n    assert candidate('2357BD') == 6\n    assert candidate('CAFED00D') == 2\n"}
{"task_id": "ExtendedEval/79", "prompt": "def decimal_to_binary(decimal):\n    '''\n    Convert a given non-negative integer to its binary representation and wrap the result\n    with 'db' prefix and suffix. Handle edge cases like 0 and large values.\n    Example:\n    decimal_to_binary(5) => 'db101db'\n    decimal_to_binary(0) => 'db0db'\n    decimal_to_binary(255) => 'db11111111db'\n    \n    Return only the Python function code.\n    '''\n", "entry_point": "decimal_to_binary", "canonical_solution": "    if decimal < 0:\n        raise ValueError('Only non-negative integers allowed')\n    return 'db' + bin(decimal)[2:] + 'db'", "test": "def check(candidate):\n    assert candidate(0) == 'db0db'\n    assert candidate(1) == 'db1db'\n    assert candidate(2) == 'db10db'\n    assert candidate(15) == 'db1111db'\n    assert candidate(32) == 'db100000db'\n    assert candidate(255) == 'db11111111db'\n    assert candidate(1023) == 'db1111111111db'\n    assert candidate(1048576) == 'db100000000000000000000db'\n"}
{"task_id": "ExtendedEval/80", "prompt": "def is_happy_extended(s, k=3, allow_overlap=False):\n    \"\"\"Check if a string is 'happy' based on more complex rules.\n    \n    A string is happy if:\n    1. Its length is at least k\n    2. Every k consecutive letters are distinct (no duplicates)\n    3. If allow_overlap is True, also check overlapping windows of size k\n    4. Handle Unicode characters correctly\n    \n    Additionally, return a tuple (is_happy, unhappy_positions) where unhappy_positions\n    is a list of starting indices where the k-window constraint is violated.\n    \n    Examples:\n    is_happy_extended('abcd', 3) => (True, [])\n    is_happy_extended('aabb', 3) => (False, [1])\n    is_happy_extended('abcabc', 3, True) => (True, [])\n    is_happy_extended('ππππ', 2) => (False, [0, 1, 2])\n    is_happy_extended('abcdefg', 4) => (True, [])\n    is_happy_extended('aabcde', 2) => (False, [0])\n    \n    Return only the Python function code.\n    \"\"\"\n", "entry_point": "is_happy_extended", "canonical_solution": "    if len(s) < k:\n        return (False, [])\n    \n    unhappy_positions = []\n    \n    # Check non-overlapping windows\n    for i in range(0, len(s) - k + 1, 1 if allow_overlap else k):\n        window = s[i:i + k]\n        if len(set(window)) != k:\n            unhappy_positions.append(i)\n    \n    return (len(unhappy_positions) == 0, unhappy_positions)", "test": "def check(candidate):\n    # Basic tests\n    assert candidate('abcd', 3) == (True, [])\n    assert candidate('aabb', 3) == (False, [1])\n    assert candidate('abcabc', 3, True) == (True, [])\n    assert candidate('abcabc', 3, False) == (True, [])\n    \n    # Unicode tests\n    assert candidate('ππππ', 2) == (False, [0, 1, 2])\n    assert candidate('αβγδ', 2) == (True, [])\n    assert candidate('你好世界', 2) == (True, [])\n    assert candidate('🌟🌟🌙', 2) == (False, [0])\n    \n    # Variable k tests\n    assert candidate('abcdefg', 4) == (True, [])\n    assert candidate('aabcde', 2) == (False, [0])\n    assert candidate('abcabc', 6) == (False, [0])\n    assert candidate('abcdefghijk', 5) == (True, [])\n    \n    # Edge cases\n    assert candidate('', 1) == (False, [])\n    assert candidate('a', 2) == (False, [])\n    assert candidate('ab', 2) == (True, [])\n    assert candidate('aa', 2) == (False, [0])\n    \n    # Overlap vs non-overlap\n    assert candidate('aabcd', 2, False) == (False, [0])\n    assert candidate('aabcd', 2, True) == (False, [0])\n    assert candidate('abaca', 3, True) == (False, [2])\n    assert candidate('abcabcabc', 3, False) == (True, [])\n    \n    # Complex patterns\n    assert candidate('xyxyxyxy', 2) == (True, [])\n    assert candidate('xyxyxyxy', 3) == (False, [2, 5])\n    assert candidate('abcdefghijklmnop', 10) == (True, [])\n    assert candidate('aabbccddee', 3, True) == (False, [0, 1, 3, 4, 6, 7])\n"}
{"task_id": "ExtendedEval/81", "prompt": "def grade_calculator(grades, custom_scale=None, include_stats=False, round_mode='standard'):\n    \"\"\"Enhanced grading system with multiple features.\n    \n    Parameters:\n    - grades: list of GPAs (can include None for absent students)\n    - custom_scale: optional dict mapping GPA ranges to grades\n    - include_stats: if True, return (grades, statistics_dict)\n    - round_mode: 'standard', 'up', 'down', or 'banker' (round half to even)\n    \n    Default scale remains the same, but now handles:\n    - Invalid GPAs (negative or > 4.0) → 'Invalid'\n    - None values → 'Absent'\n    - Different rounding modes affect grade boundaries\n    \n    If include_stats is True, also return:\n    - mean, median, std_dev, grade_distribution\n    \n    Examples:\n    grade_calculator([4.0, None, -1, 5.0]) => ['A+', 'Absent', 'Invalid', 'Invalid']\n    grade_calculator([3.65], round_mode='up') => ['A-']\n    grade_calculator([3.65], round_mode='down') => ['B+']\n    grade_calculator([2.5, 3.5], include_stats=True) => (['B-', 'A-'], {'mean': 3.0, ...})\n    \n    Return only the Python function code.\n    \"\"\"\n", "entry_point": "grade_calculator", "canonical_solution": "    import statistics\n    import math\n    \n    def round_grade(gpa, mode):\n        if mode == 'standard':\n            return round(gpa, 2)\n        elif mode == 'up':\n            return math.ceil(gpa * 100) / 100\n        elif mode == 'down':\n            return math.floor(gpa * 100) / 100\n        elif mode == 'banker':\n            # Round half to even\n            return round(gpa * 100) / 100\n        return gpa\n    \n    default_scale = {\n        (4.0, 4.0): 'A+',\n        (3.7, 3.99): 'A',\n        (3.3, 3.69): 'A-',\n        (3.0, 3.29): 'B+',\n        (2.7, 2.99): 'B',\n        (2.3, 2.69): 'B-',\n        (2.0, 2.29): 'C+',\n        (1.7, 1.99): 'C',\n        (1.3, 1.69): 'C-',\n        (1.0, 1.29): 'D+',\n        (0.7, 0.99): 'D',\n        (0.0, 0.69): 'D-',\n        (0.0, 0.0): 'E'\n    }\n    \n    scale = custom_scale or default_scale\n    letter_grades = []\n    valid_grades = []\n    \n    for gpa in grades:\n        if gpa is None:\n            letter_grades.append('Absent')\n        elif isinstance(gpa, (int, float)) and gpa < 0:\n            letter_grades.append('Invalid')\n        elif isinstance(gpa, (int, float)) and gpa > 4.0:\n            letter_grades.append('Invalid')\n        else:\n            rounded_gpa = round_grade(gpa, round_mode)\n            valid_grades.append(gpa)\n            \n            grade_assigned = False\n            for (low, high), grade in scale.items():\n                if low <= rounded_gpa <= high:\n                    letter_grades.append(grade)\n                    grade_assigned = True\n                    break\n            \n            if not grade_assigned:\n                # Default grade assignment\n                if rounded_gpa == 4.0:\n                    letter_grades.append('A+')\n                elif rounded_gpa > 3.7:\n                    letter_grades.append('A')\n                elif rounded_gpa > 3.3:\n                    letter_grades.append('A-')\n                elif rounded_gpa > 3.0:\n                    letter_grades.append('B+')\n                elif rounded_gpa > 2.7:\n                    letter_grades.append('B')\n                elif rounded_gpa > 2.3:\n                    letter_grades.append('B-')\n                elif rounded_gpa > 2.0:\n                    letter_grades.append('C+')\n                elif rounded_gpa > 1.7:\n                    letter_grades.append('C')\n                elif rounded_gpa > 1.3:\n                    letter_grades.append('C-')\n                elif rounded_gpa > 1.0:\n                    letter_grades.append('D+')\n                elif rounded_gpa > 0.7:\n                    letter_grades.append('D')\n                elif rounded_gpa > 0.0:\n                    letter_grades.append('D-')\n                else:\n                    letter_grades.append('E')\n    \n    if include_stats and valid_grades:\n        stats = {\n            'mean': statistics.mean(valid_grades),\n            'median': statistics.median(valid_grades),\n            'std_dev': statistics.stdev(valid_grades) if len(valid_grades) > 1 else 0,\n            'grade_distribution': {grade: letter_grades.count(grade) for grade in set(letter_grades)}\n        }\n        return letter_grades, stats\n    \n    return letter_grades", "test": "def check(candidate):\n    # Basic tests\n    assert candidate([4.0, 3, 1.7, 2, 3.5]) == ['A+', 'B+', 'C', 'C+', 'A-']\n    assert candidate([1.2]) == ['D+']\n    assert candidate([0.5]) == ['D-']\n    assert candidate([0.0]) == ['E']\n    \n    # Invalid inputs\n    assert candidate([4.0, None, -1, 5.0]) == ['A+', 'Absent', 'Invalid', 'Invalid']\n    assert candidate([None, None]) == ['Absent', 'Absent']\n    assert candidate([-5.0, 10.0]) == ['Invalid', 'Invalid']\n    \n    # Rounding mode tests\n    assert candidate([3.65], round_mode='up') == ['A-']\n    assert candidate([3.65], round_mode='down') == ['B+']\n    assert candidate([2.335], round_mode='standard') == ['B-']\n    assert candidate([1.995], round_mode='up') == ['C+']\n    \n    # Statistics tests\n    result, stats = candidate([2.5, 3.5], include_stats=True)\n    assert result == ['B-', 'A-']\n    assert abs(stats['mean'] - 3.0) < 0.01\n    assert stats['median'] == 3.0\n    assert 'std_dev' in stats\n    assert stats['grade_distribution']['B-'] == 1\n    assert stats['grade_distribution']['A-'] == 1\n    \n    # Edge cases\n    assert candidate([]) == []\n    assert candidate([4.0, 4.0, 4.0]) == ['A+', 'A+', 'A+']\n    \n    # Complex cases\n    grades, stats = candidate([3.7, 3.3, 2.7, 2.3, None, -1], include_stats=True)\n    assert grades == ['A', 'A-', 'B', 'B-', 'Absent', 'Invalid']\n    assert len(stats['grade_distribution']) == 6\n    assert abs(stats['mean'] - 3.0) < 0.01\n"}
{"task_id": "ExtendedEval/82", "prompt": "def advanced_prime_check(string, check_type='length', encoding='utf-8'):\n    \"\"\"Advanced prime checking for strings with multiple modes.\n    \n    Parameters:\n    - string: input string\n    - check_type: 'length', 'bytes', 'unique_chars', or 'unicode_points'\n    - encoding: encoding to use for byte counting\n    \n    check_type determines what to check for primality:\n    - 'length': number of characters (original behavior)\n    - 'bytes': number of bytes in the encoded string\n    - 'unique_chars': number of unique characters\n    - 'unicode_points': sum of Unicode code points modulo 1000\n    \n    Also handle:\n    - Empty string edge case\n    - Multi-byte Unicode characters\n    - Return tuple (is_prime, value_checked) for debugging\n    \n    Examples:\n    advanced_prime_check('Hello') => (True, 5)\n    advanced_prime_check('ππ', 'bytes', 'utf-8') => (False, 8)\n    advanced_prime_check('aabbcc', 'unique_chars') => (True, 3)\n    advanced_prime_check('ABC', 'unicode_points') => (False, 198)\n    \n    Return only the Python function code.\n    \"\"\"\n", "entry_point": "advanced_prime_check", "canonical_solution": "    def is_prime(n):\n        if n < 2:\n            return False\n        if n == 2:\n            return True\n        if n % 2 == 0:\n            return False\n        for i in range(3, int(n**0.5) + 1, 2):\n            if n % i == 0:\n                return False\n        return True\n    \n    if check_type == 'length':\n        value = len(string)\n    elif check_type == 'bytes':\n        value = len(string.encode(encoding))\n    elif check_type == 'unique_chars':\n        value = len(set(string))\n    elif check_type == 'unicode_points':\n        value = sum(ord(char) for char in string) % 1000\n    else:\n        raise ValueError(f\"Invalid check_type: {check_type}\")\n    \n    return (is_prime(value), value)", "test": "def check(candidate):\n    # Original behavior tests\n    assert candidate('Hello') == (True, 5)\n    assert candidate('abcdcba') == (True, 7)\n    assert candidate('kittens') == (True, 7)\n    assert candidate('orange') == (False, 6)\n    \n    # Empty string\n    assert candidate('') == (False, 0)\n    assert candidate('', 'bytes') == (False, 0)\n    assert candidate('', 'unique_chars') == (False, 0)\n    \n    # Bytes mode with Unicode\n    assert candidate('ππ', 'bytes', 'utf-8') == (False, 8)\n    assert candidate('Hello', 'bytes', 'utf-8') == (True, 5)\n    assert candidate('你好', 'bytes', 'utf-8') == (False, 6)\n    assert candidate('🌟', 'bytes', 'utf-8') == (False, 4)\n    \n    # Unique chars mode\n    assert candidate('aabbcc', 'unique_chars') == (True, 3)\n    assert candidate('abcdef', 'unique_chars') == (False, 6)\n    assert candidate('aaaaaa', 'unique_chars') == (False, 1)\n    assert candidate('ab', 'unique_chars') == (True, 2)\n    \n    # Unicode points mode\n    assert candidate('ABC', 'unicode_points') == (False, 198)  # 65+66+67=198\n    assert candidate('a', 'unicode_points') == (True, 97)\n    assert candidate('Hello World', 'unicode_points') == (True, 53)  # Sum % 1000\n    \n    # Edge cases\n    assert candidate('HI') == (True, 2)\n    assert candidate('go') == (True, 2)\n    assert candidate('gogo') == (False, 4)\n    \n    # Different encodings\n    assert candidate('café', 'bytes', 'utf-8') == (True, 5)\n    assert candidate('café', 'bytes', 'latin-1') == (False, 4)\n    \n    # Large strings\n    assert candidate('a' * 17, 'length') == (True, 17)\n    assert candidate('a' * 100, 'unique_chars') == (False, 1)\n"}
{"task_id": "ExtendedEval/8_stats", "prompt": "from typing import List, Tuple\n\ndef sum_product_stats(numbers: List[int]) -> Tuple[int, int, float, int]:\n    \"\"\" For a given list of integers, return a tuple (sum, product, mean, count).\n    Empty list returns (0, 1, 0.0, 0).\n    Mean is calculated as sum/count, or 0.0 for empty list.\n    \n    Args:\n    - numbers: List of integers\n    \n    Returns:\n    - Tuple containing (sum, product, mean, count)\n    \n    Examples:\n    >>> sum_product_stats([])\n    (0, 1, 0.0, 0)\n    >>> sum_product_stats([1, 2, 3, 4])\n    (10, 24, 2.5, 4)\n    >>> sum_product_stats([5])\n    (5, 5, 5.0, 1)\n    >>> sum_product_stats([-2, 3, -1])\n    (0, 6, 0.0, 3)\n    \n    Please provide only the Python function code with no explanations.\n    \"\"\"\n", "entry_point": "sum_product_stats", "canonical_solution": "    if not numbers:\n        return (0, 1, 0.0, 0)\n    \n    sum_value = 0\n    prod_value = 1\n    count = len(numbers)\n    \n    for n in numbers:\n        sum_value += n\n        prod_value *= n\n    \n    mean = sum_value / count\n    \n    return (sum_value, prod_value, mean, count)", "test": "\nMETADATA = {\n    'author': 'taniakaragiannidi',\n    'dataset': 'test'\n}\n\ndef check(candidate):\n    # Basic functionality\n    assert candidate([]) == (0, 1, 0.0, 0)\n    assert candidate([1, 2, 3, 4]) == (10, 24, 2.5, 4)\n    assert candidate([5]) == (5, 5, 5.0, 1)\n    \n    # Negative numbers\n    assert candidate([-2, 3, -1]) == (0, 6, 0.0, 3)\n    assert candidate([-1, -2, -3]) == (-6, -6, -2.0, 3)\n    \n    # Zero handling\n    assert candidate([0]) == (0, 0, 0.0, 1)\n    assert candidate([1, 0, 2]) == (3, 0, 1.0, 3)\n    assert candidate([0, 0, 0]) == (0, 0, 0.0, 3)\n    \n    # Larger lists\n    assert candidate([1, 1, 1, 1, 1]) == (5, 1, 1.0, 5)\n    assert candidate([2, 2, 2]) == (6, 8, 2.0, 3)\n    \n    # Mixed positive/negative\n    assert candidate([10, -5, 2]) == (7, -100, 7/3, 3)\n    assert candidate([-10, 10]) == (0, -100, 0.0, 2)\n    \n    # Large numbers\n    assert candidate([100, 200]) == (300, 20000, 150.0, 2)\n    \n    # Edge case with mean calculation\n    result = candidate([1, 2, 3])\n    assert result == (6, 6, 2.0, 3)\n    \n    result = candidate([7, 8, 9])\n    assert result == (24, 504, 8.0, 3)\n"}
{"task_id": "ExtendedEval/95", "prompt": "\ndef check_dict_case(dict):\n    \"\"\"\n    Given a dictionary, return True if all keys are strings in lower \n    case or all keys are strings in upper case, else return False.\n    The function should return False if the given dictionary is empty.\n    Additionally, the function should return False if any key contains\n    mixed alphanumeric characters or special characters.\n    Keys must be purely alphabetic to be considered valid.\n    Examples:\n    check_dict_case({\"a\":\"apple\", \"b\":\"banana\"}) should return True.\n    check_dict_case({\"a\":\"apple\", \"A\":\"banana\", \"B\":\"banana\"}) should return False.\n    check_dict_case({\"a\":\"apple\", 8:\"banana\", \"a\":\"apple\"}) should return False.\n    check_dict_case({\"Name\":\"John\", \"Age\":\"36\", \"City\":\"Houston\"}) should return False.\n    check_dict_case({\"STATE\":\"NC\", \"ZIP\":\"12345\" }) should return True.\n    check_dict_case({\"test1\":\"value\", \"test\":\"value\"}) should return False.\n    check_dict_case({\"ABC\":\"test\", \"DEF123\":\"value\"}) should return False.\n    \"\"\"\n", "entry_point": "check_dict_case", "canonical_solution": "    if len(dict.keys()) == 0:\n        return False\n    else:\n        state = \"start\"\n        for key in dict.keys():\n            if isinstance(key, str) == False:\n                state = \"mixed\"\n                break\n            if not key.isalpha():\n                state = \"mixed\"\n                break\n            if state == \"start\":\n                if key.isupper():\n                    state = \"upper\"\n                elif key.islower():\n                    state = \"lower\"\n                else:\n                    break\n            elif (state == \"upper\" and not key.isupper()) or (state == \"lower\" and not key.islower()):\n                    state = \"mixed\"\n                    break\n            else:\n                break\n        return state == \"upper\" or state == \"lower\" \n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate({\"p\":\"pineapple\", \"b\":\"banana\"}) == True, \"First test error: \" + str(candidate({\"p\":\"pineapple\", \"b\":\"banana\"}))\n    assert candidate({\"p\":\"pineapple\", \"A\":\"banana\", \"B\":\"banana\"}) == False, \"Second test error: \" + str(candidate({\"p\":\"pineapple\", \"A\":\"banana\", \"B\":\"banana\"}))\n    assert candidate({\"p\":\"pineapple\", 5:\"banana\", \"a\":\"apple\"}) == False, \"Third test error: \" + str(candidate({\"p\":\"pineapple\", 5:\"banana\", \"a\":\"apple\"}))\n    assert candidate({\"Name\":\"John\", \"Age\":\"36\", \"City\":\"Houston\"}) == False, \"Fourth test error: \" + str(candidate({\"Name\":\"John\", \"Age\":\"36\", \"City\":\"Houston\"}))\n    assert candidate({\"STATE\":\"NC\", \"ZIP\":\"12345\" }) == True, \"Fifth test error: \" + str(candidate({\"STATE\":\"NC\", \"ZIP\":\"12345\" }))      \n    assert candidate({\"fruit\":\"Orange\", \"taste\":\"Sweet\" }) == True, \"Sixth test error: \" + str(candidate({\"fruit\":\"Orange\", \"taste\":\"Sweet\" }))      \n    assert candidate({\"test1\":\"value\", \"test\":\"value\"}) == False, \"Seventh test error: \" + str(candidate({\"test1\":\"value\", \"test\":\"value\"}))\n    assert candidate({\"ABC\":\"test\", \"DEF123\":\"value\"}) == False, \"Eighth test error: \" + str(candidate({\"ABC\":\"test\", \"DEF123\":\"value\"}))\n    assert candidate({\"abc\":\"test\", \"def123\":\"value\"}) == False, \"Ninth test error: \" + str(candidate({\"abc\":\"test\", \"def123\":\"value\"}))\n    assert candidate({\"Test\":\"value\", \"TEST\":\"value\"}) == False, \"Tenth test error: \" + str(candidate({\"Test\":\"value\", \"TEST\":\"value\"}))\n    assert candidate({\"\":\"empty key\"}) == False, \"Eleventh test error: \" + str(candidate({\"\":\"empty key\"}))\n    assert candidate({\"a_b\":\"underscore\"}) == False, \"Twelfth test error: \" + str(candidate({\"a_b\":\"underscore\"}))\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate({}) == False, \"1st edge test error: \" + str(candidate({}))\n\n"}
{"task_id": "ExtendedEval/96", "prompt": "\ndef count_up_to(n):\n    \"\"\"Implement an efficient function that takes a non-negative integer and returns an array of the first n\n    integers that are prime numbers and less than n.\n    \n    Your solution must be optimized to handle large values of n (up to 10^6) efficiently.\n    The naive O(n²) approach of checking divisibility by all numbers will be too slow.\n    Consider using optimizations like the Sieve of Eratosthenes or checking divisibility only up to sqrt(i).\n    \n    for example:\n    count_up_to(5) => [2,3]\n    count_up_to(11) => [2,3,5,7]\n    count_up_to(0) => []\n    count_up_to(20) => [2,3,5,7,11,13,17,19]\n    count_up_to(1) => []\n    count_up_to(18) => [2,3,5,7,11,13,17]\n    count_up_to(1000) => [2,3,5,7,11,13,...,991,997] (168 primes total)\n    \"\"\"\n", "entry_point": "count_up_to", "canonical_solution": "    if n < 2:\n        return []\n    \n    # Sieve of Eratosthenes for optimal performance\n    sieve = [True] * n\n    sieve[0] = sieve[1] = False\n    \n    for i in range(2, int(n**0.5) + 1):\n        if sieve[i]:\n            for j in range(i*i, n, i):\n                sieve[j] = False\n    \n    return [i for i in range(2, n) if sieve[i]]\n\n", "test": "def check(candidate):\n\n    assert candidate(5) == [2,3]\n    assert candidate(6) == [2,3,5]\n    assert candidate(7) == [2,3,5]\n    assert candidate(10) == [2,3,5,7]\n    assert candidate(0) == []\n    assert candidate(22) == [2,3,5,7,11,13,17,19]\n    assert candidate(1) == []\n    assert candidate(18) == [2,3,5,7,11,13,17]\n    assert candidate(47) == [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43]\n    assert candidate(101) == [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97]\n    assert candidate(1000) == [i for i in range(2, 1000) if all(i % j != 0 for j in range(2, int(i**0.5) + 1))]\n    assert len(candidate(10000)) == 1229  # There are 1229 primes less than 10000\n    assert candidate(100000)[-1] == 99991  # Last prime less than 100000\n\n"}
{"task_id": "ExtendedEval/97", "prompt": "\ndef multiply(a, b):\n    \"\"\"Complete the function that takes two integers and returns \n    the product of their unit digits.\n    \n    Enhanced version: Handle edge cases including very large numbers,\n    floating point inputs (extract integer part), and optimize for performance.\n    The function should work efficiently even with numbers up to 10^18.\n    \n    Assume the input can be integers, floats, or string representations of numbers.\n    For floating point inputs, use only the integer part.\n    For string inputs, convert to integer first.\n    \n    Examples:\n    multiply(148, 412) should return 16.\n    multiply(19, 28) should return 72.\n    multiply(2020, 1851) should return 0.\n    multiply(14, -15) should return 20.\n    multiply(3.7, 4.2) should return 12 (3*4).\n    multiply('123', '456') should return 18 (3*6).\n    multiply(1000000000000000007, 1000000000000000009) should return 63 (7*9).\n    \"\"\"\n", "entry_point": "multiply", "canonical_solution": "    # Convert inputs to integers, handling various input types\n    def to_int(x):\n        if isinstance(x, str):\n            return int(float(x))  # Handle string floats like '3.14'\n        elif isinstance(x, float):\n            return int(x)\n        return x\n    \n    a_int = to_int(a)\n    b_int = to_int(b)\n    \n    # Get unit digits efficiently\n    unit_a = abs(a_int) % 10\n    unit_b = abs(b_int) % 10\n    \n    return unit_a * unit_b\n", "test": "def check(candidate):\n\n    # Original test cases\n    assert candidate(148, 412) == 16, \"First test error: \" + str(candidate(148, 412))                    \n    assert candidate(19, 28) == 72, \"Second test error: \" + str(candidate(19, 28))           \n    assert candidate(2020, 1851) == 0, \"Third test error: \" + str(candidate(2020, 1851))\n    assert candidate(14,-15) == 20, \"Fourth test error: \" + str(candidate(14,-15))      \n    assert candidate(76, 67) == 42, \"Fifth test error: \" + str(candidate(76, 67))      \n    assert candidate(17, 27) == 49, \"Sixth test error: \" + str(candidate(17, 27))      \n    assert candidate(0, 1) == 0, \"1st edge test error: \" + str(candidate(0, 1))\n    assert candidate(0, 0) == 0, \"2nd edge test error: \" + str(candidate(0, 0))\n    \n    # Enhanced test cases\n    assert candidate(3.7, 4.2) == 12, \"Float test error: \" + str(candidate(3.7, 4.2))\n    assert candidate('123', '456') == 18, \"String test error: \" + str(candidate('123', '456'))\n    assert candidate(1000000000000000007, 1000000000000000009) == 63, \"Large number test error\"\n    assert candidate(-999, -888) == 72, \"Negative large test error\"\n    assert candidate('3.14', '2.71') == 6, \"String float test error\"\n    assert candidate(10**15 + 7, 10**15 + 3) == 21, \"Very large test error\"\n\n"}
{"task_id": "ExtendedEval/101", "prompt": "\ndef words_string(s):\n    \"\"\"\n    You will be given a string of words separated by commas, spaces, or both.\n    Your task is to split the string into words and return an array of the words.\n    Additionally, remove any words that:\n    - Have less than 2 characters\n    - Contain any digits\n    - Are duplicates (case-insensitive)\n    \n    The returned array should maintain the original order of first occurrence.\n    \n    For example:\n    words_string(\"Hi, my name is John\") == [\"Hi\", \"my\", \"name\", \"is\", \"John\"]\n    words_string(\"One, two, three, four, five, six\") == [\"One\", \"two\", \"three\", \"four\", \"five\", \"six\"]\n    words_string(\"a, Hello, h3llo, HELLO, world\") == [\"Hello\", \"world\"]\n    words_string(\"I, am, AM, i\") == [\"am\"]\n    \"\"\"\n", "entry_point": "words_string", "canonical_solution": "    if not s:\n        return []\n\n    # Replace commas with spaces\n    s = s.replace(',', ' ')\n    \n    # Split into words\n    words = s.split()\n    \n    # Filter and track seen words\n    result = []\n    seen_lower = set()\n    \n    for word in words:\n        # Check conditions\n        if len(word) >= 2 and not any(c.isdigit() for c in word):\n            word_lower = word.lower()\n            if word_lower not in seen_lower:\n                seen_lower.add(word_lower)\n                result.append(word)\n    \n    return result\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"Hi, my name is John\") == [\"Hi\", \"my\", \"name\", \"is\", \"John\"]\n    assert candidate(\"One, two, three, four, five, six\") == [\"One\", \"two\", \"three\", \"four\", \"five\", \"six\"]\n    assert candidate(\"Hi, my name\") == [\"Hi\", \"my\", \"name\"]\n    assert candidate(\"One,, two, three, four, five, six,\") == [\"One\", \"two\", \"three\", \"four\", \"five\", \"six\"]\n    assert candidate(\"a, Hello, h3llo, HELLO, world\") == [\"Hello\", \"world\"]\n    assert candidate(\"I, am, AM, i\") == [\"am\"]\n    assert candidate(\"test123, test, TEST, t3st\") == [\"test\"]\n    assert candidate(\"ab, cd, ef, ab, CD\") == [\"ab\", \"cd\", \"ef\"]\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(\"\") == []\n    assert candidate(\"a, b, c\") == []\n    assert candidate(\"ahmed     , gamal\") == [\"ahmed\", \"gamal\"]\n\n"}
{"task_id": "ExtendedEval/102", "prompt": "\ndef choose_num(x, y):\n    \"\"\"This function takes two positive numbers x and y and returns the\n    biggest even integer number that is in the range [x, y] inclusive.\n    Additionally, the number should not be divisible by 4.\n    If there's no such number, then the function should return -1.\n\n    For example:\n    choose_num(12, 15) = 14 (14 is even, not divisible by 4)\n    choose_num(13, 12) = -1 (invalid range)\n    choose_num(8, 12) = 10 (8 and 12 are divisible by 4, 10 is not)\n    choose_num(16, 20) = 18 (16 and 20 are divisible by 4, 18 is not)\n    \"\"\"\n", "entry_point": "choose_num", "canonical_solution": "    if x > y:\n        return -1\n    \n    # Start from the largest number and go down\n    for num in range(y, x - 1, -1):\n        if num % 2 == 0 and num % 4 != 0:\n            return num\n    \n    return -1\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(12, 15) == 14\n    assert candidate(13, 12) == -1\n    assert candidate(8, 12) == 10\n    assert candidate(16, 20) == 18\n    assert candidate(6, 10) == 10\n    assert candidate(5, 9) == 6\n    assert candidate(4, 4) == -1\n    assert candidate(8, 8) == -1\n    assert candidate(2, 2) == 2\n    assert candidate(1, 3) == 2\n    assert candidate(40, 44) == 42\n    assert candidate(100, 104) == 102\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(7, 7) == -1\n    assert candidate(546, 546) == 546\n\n"}
{"task_id": "ExtendedEval/103", "prompt": "\ndef rounded_avg(n, m):\n    \"\"\"You are given two positive integers n and m, and your task is to compute the\n    average of the integers from n through m (including n and m). \n    Round the answer to the nearest integer and convert that to binary.\n    Additionally, if the binary representation has more than 8 bits, \n    return only the last 8 bits prefixed with \"0b\".\n    If n is greater than m, return -1.\n    Example:\n    rounded_avg(1, 5) => \"0b11\"\n    rounded_avg(7, 5) => -1\n    rounded_avg(10, 20) => \"0b1111\"\n    rounded_avg(20, 33) => \"0b11010\"\n    rounded_avg(1000, 2000) => \"0b11110110\" (last 8 bits of larger number)\n    \"\"\"\n", "entry_point": "rounded_avg", "canonical_solution": "    if m < n:\n        return -1\n    summation = 0\n    for i in range(n, m+1):\n        summation += i\n    avg = round(summation/(m - n + 1))\n    binary = bin(avg)\n    \n    # If more than 8 bits (excluding '0b'), take last 8 bits\n    if len(binary) > 10:  # 10 = len('0b') + 8\n        binary_digits = binary[2:]  # Remove '0b'\n        last_8_bits = binary_digits[-8:]\n        return '0b' + last_8_bits\n    \n    return binary\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(1, 5) == \"0b11\"\n    assert candidate(7, 13) == \"0b1010\"\n    assert candidate(964,977) == \"0b10110010\"\n    assert candidate(996,997) == \"0b10110100\"\n    assert candidate(560,851) == \"0b10110010\"\n    assert candidate(185,546) == \"0b01101110\"\n    assert candidate(362,496) == \"0b10101101\"\n    assert candidate(350,902) == \"0b10110010\"\n    assert candidate(197,233) == \"0b11010111\"\n    assert candidate(1000, 2000) == \"0b11110110\"\n    assert candidate(5000, 6000) == \"0b11110110\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(7, 5) == -1\n    assert candidate(5, 1) == -1\n    assert candidate(5, 5) == \"0b101\"\n\n"}
{"task_id": "ExtendedEval/105", "prompt": "\ndef by_length(arr):\n    \"\"\"\n    Given an array of integers, sort the integers that are between 1 and 9 inclusive,\n    reverse the resulting array, and then replace each digit by its corresponding name from\n    \"One\", \"Two\", \"Three\", \"Four\", \"Five\", \"Six\", \"Seven\", \"Eight\", \"Nine\".\n\n    For example:\n      arr = [2, 1, 1, 4, 5, 8, 2, 3]   \n            -> sort arr -> [1, 1, 2, 2, 3, 4, 5, 8] \n            -> reverse arr -> [8, 5, 4, 3, 2, 2, 1, 1]\n      return [\"Eight\", \"Five\", \"Four\", \"Three\", \"Two\", \"Two\", \"One\", \"One\"]\n    \n      If the array is empty, return an empty array:\n      arr = []\n      return []\n    \n      If the array has any strange number ignore it:\n      arr = [1, -1 , 55] \n            -> sort arr -> [-1, 1, 55]\n            -> reverse arr -> [55, 1, -1]\n      return = ['One']\n    \"\"\"\n", "entry_point": "by_length", "canonical_solution": "    dic = {\n        1: \"One\",\n        2: \"Two\",\n        3: \"Three\",\n        4: \"Four\",\n        5: \"Five\",\n        6: \"Six\",\n        7: \"Seven\",\n        8: \"Eight\",\n        9: \"Nine\",\n    }\n    sorted_arr = sorted(arr, reverse=True)\n    new_arr = []\n    for var in sorted_arr:\n        try:\n            new_arr.append(dic[var])\n        except:\n            pass\n    return new_arr\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([2, 1, 1, 4, 5, 8, 2, 3]) == [\"Eight\", \"Five\", \"Four\", \"Three\", \"Two\", \"Two\", \"One\", \"One\"], \"Error\"\n    assert candidate([]) == [], \"Error\"\n    assert candidate([1, -1 , 55]) == ['One'], \"Error\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n    assert candidate([1, -1, 3, 2]) == [\"Three\", \"Two\", \"One\"]\n    assert candidate([9, 4, 8]) == [\"Nine\", \"Eight\", \"Four\"]\n\n"}
{"task_id": "ExtendedEval/98", "prompt": "\ndef count_upper(s):\n    \"\"\"\n    Enhanced version: Given a string s, count the number of uppercase vowels in even indices.\n    Additionally, handle Unicode characters, empty strings, and implement efficiently for very long strings.\n    \n    The function should also work with Unicode vowels (À, É, Í, Ó, Ú, etc.) and be case-sensitive.\n    For strings longer than 10^6 characters, the solution should be memory efficient.\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    For example:\n    count_upper('aBCdEf') returns 1\n    count_upper('abcdefg') returns 0 \n    count_upper('dBBE') returns 0\n    count_upper('ÀÉÍÓÚ') returns 3 (positions 0, 2, 4)\n    count_upper('aEiOuAeIoU') returns 0 (no uppercase vowels at even positions)\n    \"\"\"\n", "entry_point": "count_upper", "canonical_solution": "    if not s:\n        return 0\n    \n    # Extended set of uppercase vowels including Unicode\n    uppercase_vowels = set('AEIOUÀÁÂÃÄÅÆÈÉÊËÌÍÎÏÐÒÓÔÕÖØÙÚÛÜÝ')\n    \n    count = 0\n    for i in range(0, len(s), 2):  # Step by 2 to get even indices only\n        if s[i] in uppercase_vowels:\n            count += 1\n    \n    return count\n", "test": "def check(candidate):\n\n    # Original test cases\n    assert candidate('aBCdEf')  == 1\n    assert candidate('abcdefg') == 0\n    assert candidate('dBBE') == 0\n    assert candidate('B')  == 0\n    assert candidate('U')  == 1\n    assert candidate('') == 0\n    assert candidate('EEEE') == 2\n    \n    # Enhanced test cases\n    assert candidate('ÀÉÍÓÚ') == 3, \"Unicode vowel test failed\"\n    assert candidate('aEiOuAeIoU') == 0, \"Mixed case test failed\"  # No uppercase vowels at even positions\n    assert candidate('AeIoUaEiOu') == 2, \"Another mixed case test failed\"  # A at 0, I at 2\n    assert candidate('àéíóú') == 0, \"Lowercase Unicode test failed\"\n    assert candidate('A' * 1000000) == 500000, \"Large string test failed\"\n    assert candidate('aE' * 500000) == 0, \"Large alternating test failed\"  # All E's are at odd positions\n    assert candidate('Ea' * 500000) == 500000, \"Large alternating test 2 failed\"  # All E's are at even positions\n\n"}
{"task_id": "ExtendedEval/99", "prompt": "\ndef closest_integer(value):\n    '''\n    Enhanced version: Create a function that takes a value (string) representing a number\n    and returns the closest integer to it. If the number is equidistant from two integers, \n    round it away from zero.\n    \n    Additional requirements:\n    - Handle scientific notation (e.g., '1.5e2', '3.14e-1')\n    - Handle very large numbers that exceed float precision\n    - Handle hexadecimal strings (e.g., '0xff', '0X1A')\n    - Handle binary strings (e.g., '0b1010', '0B1111')\n    - Handle octal strings (e.g., '0o17', '0O77')\n    - Maintain the away-from-zero rounding rule for .5 cases\n    \n    Please return only the Python function code, without any explanations or additional text.\n\n    Examples:\n    >>> closest_integer(\"10\")\n    10\n    >>> closest_integer(\"15.3\")\n    15\n    >>> closest_integer(\"1.5e2\")\n    150\n    >>> closest_integer(\"0xff\")\n    255\n    >>> closest_integer(\"0b1010\")\n    10\n    '''\n", "entry_point": "closest_integer", "canonical_solution": "    from math import floor, ceil, isnan, isinf\n    \n    # Handle special string cases\n    value = value.strip().lower()\n    \n    # Handle different number bases\n    if value.startswith('0x'):\n        num = float(int(value, 16))\n    elif value.startswith('0b'):\n        num = float(int(value, 2))\n    elif value.startswith('0o'):\n        num = float(int(value, 8))\n    else:\n        # Handle scientific notation and regular decimals\n        num = float(value)\n    \n    # Check for inf/nan after conversion\n    if isnan(num) or isinf(num):\n        raise ValueError(\"Cannot convert inf/nan to closest integer\")\n    \n    # Check if exactly .5 away from integers (round away from zero)\n    fractional_part = abs(num - int(num))\n    if abs(fractional_part - 0.5) < 1e-10:  # Handle floating point precision\n        if num > 0:\n            return int(ceil(num))\n        else:\n            return int(floor(num))\n    \n    # Regular rounding\n    return int(round(num))\n", "test": "def check(candidate):\n\n    # Original test cases\n    assert candidate(\"10\") == 10, \"Test 1\"\n    assert candidate(\"14.5\") == 15, \"Test 2\"\n    assert candidate(\"-15.5\") == -16, \"Test 3\"\n    assert candidate(\"15.3\") == 15, \"Test 4\"\n    assert candidate(\"0\") == 0, \"Test 0\"\n    \n    # Enhanced test cases\n    assert candidate(\"1.5e2\") == 150, \"Scientific notation test 1\"\n    assert candidate(\"3.14e-1\") == 0, \"Scientific notation test 2\"\n    assert candidate(\"2.5e0\") == 3, \"Scientific notation .5 test\"\n    assert candidate(\"0xff\") == 255, \"Hexadecimal test 1\"\n    assert candidate(\"0XFF\") == 255, \"Hexadecimal test 2\"\n    assert candidate(\"0b1010\") == 10, \"Binary test 1\"\n    assert candidate(\"0B1111\") == 15, \"Binary test 2\"\n    assert candidate(\"0o17\") == 15, \"Octal test 1\"\n    assert candidate(\"0O77\") == 63, \"Octal test 2\"\n    assert candidate(\"-2.5\") == -3, \"Negative .5 test\"\n\n"}
{"task_id": "ExtendedEval/100", "prompt": "\ndef make_a_pile(n):\n    \"\"\"\n    Given a positive integer n, you have to make a pile of n levels of stones.\n    The first level has n stones.\n    The number of stones in the next level is:\n        - the next odd number if n is odd.\n        - the next even number if n is even.\n    Additionally, every 3rd level should have double the expected number of stones.\n    Return the number of stones in each level in a list, where element at index\n    i represents the number of stones in the level (i+1).\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    Examples:\n    >>> make_a_pile(3)\n    [3, 5, 14]\n    >>> make_a_pile(4)\n    [4, 6, 16, 10]\n    \"\"\"\n", "entry_point": "make_a_pile", "canonical_solution": "    result = []\n    current = n\n    for i in range(n):\n        if (i + 1) % 3 == 0:  # Every 3rd level\n            result.append(current * 2)\n        else:\n            result.append(current)\n        current += 2  # Next odd if n odd, next even if n even\n    return result\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(3) == [3, 5, 14], \"Test 3\"\n    assert candidate(4) == [4, 6, 16, 10], \"Test 4\"\n    assert candidate(5) == [5, 7, 18, 11, 13], \"Test 5\"\n    assert candidate(6) == [6, 8, 20, 12, 14, 32], \"Test 6\"\n    assert candidate(1) == [1], \"Test 1\"\n    assert candidate(2) == [2, 4], \"Test 2\"\n    assert candidate(9) == [9, 11, 26, 15, 17, 38, 21, 23, 50], \"Test 9\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n"}
{"task_id": "ExtendedEval/104", "prompt": "\ndef unique_digits(x):\n    \"\"\"Given a list of positive integers x. return a sorted list of all \n    elements that haven't any even digit.\n    Additionally, the returned list should only include numbers where\n    the sum of digits is a prime number.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    Note: Returned list should be sorted in increasing order.\n    \n    For example:\n    >>> unique_digits([15, 33, 1422, 1])\n    [] (15: sum=6 not prime, 33: sum=6 not prime, 1422: has even digits, 1: sum=1 not prime)\n    >>> unique_digits([152, 323, 1422, 10])\n    [] (all have even digits or non-prime digit sums)\n    >>> unique_digits([111, 151, 371, 911])\n    [111, 151, 371, 911] (all have odd digits and prime digit sums)\n    \"\"\"\n", "entry_point": "unique_digits", "canonical_solution": "    def is_prime(n):\n        if n < 2:\n            return False\n        for i in range(2, int(n**0.5) + 1):\n            if n % i == 0:\n                return False\n        return True\n    \n    odd_digit_elements = []\n    for i in x:\n        if all(int(c) % 2 == 1 for c in str(i)):\n            digit_sum = sum(int(c) for c in str(i))\n            if is_prime(digit_sum):\n                odd_digit_elements.append(i)\n    return sorted(odd_digit_elements)\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([15, 33, 1422, 1]) == []\n    assert candidate([152, 323, 1422, 10]) == []\n    assert candidate([111, 151, 371, 911]) == [111, 151, 371, 911]\n    assert candidate([135, 103, 31]) == []\n    assert candidate([111, 313, 7, 1111]) == [111, 313, 1111]\n    assert candidate([153, 753, 1357]) == [753, 1357]\n    assert candidate([79, 97, 11]) == [79, 97, 11]\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True\n\n"}
{"task_id": "ExtendedEval/106", "prompt": "\ndef f(n):\n    \"\"\" Implement the function f that takes n as a parameter,\n    and returns a list of size n, such that the value of the element at index i is:\n    - The factorial of i if i is even\n    - The sum of numbers from 1 to i if i is odd and i is not prime\n    - The i-th Fibonacci number if i is odd and i is prime\n    i starts from 1.\n    the factorial of i is the multiplication of the numbers from 1 to i (1 * 2 * ... * i).\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    Example:\n    f(5) == [1, 2, 2, 24, 15]\n    f(7) == [1, 2, 2, 24, 15, 720, 13]\n    \"\"\"\n", "entry_point": "f", "canonical_solution": "    def is_prime(num):\n        if num < 2:\n            return False\n        for j in range(2, int(num**0.5) + 1):\n            if num % j == 0:\n                return False\n        return True\n    \n    def fibonacci(n):\n        if n <= 1:\n            return n\n        a, b = 0, 1\n        for _ in range(2, n + 1):\n            a, b = b, a + b\n        return b\n    \n    ret = []\n    for i in range(1, n + 1):\n        if i % 2 == 0:\n            x = 1\n            for j in range(1, i + 1):\n                x *= j\n            ret.append(x)\n        elif is_prime(i):\n            ret.append(fibonacci(i))\n        else:\n            x = 0\n            for j in range(1, i + 1):\n                x += j\n            ret.append(x)\n    return ret\n", "test": "def check(candidate):\n\n    assert candidate(5) == [1, 2, 2, 24, 15]\n    assert candidate(7) == [1, 2, 2, 24, 15, 720, 13]\n    assert candidate(1) == [1]\n    assert candidate(3) == [1, 2, 2]\n    assert candidate(10) == [1, 2, 2, 24, 15, 720, 13, 40320, 45, 3628800]\n    assert candidate(0) == []\n"}
{"task_id": "ExtendedEval/107", "prompt": "\ndef even_odd_palindrome(n):\n    \"\"\"\n    Given a positive integer n, return a tuple that has the number of even and odd\n    integer palindromes that fall within the range(1, n), inclusive.\n    Additionally, count only palindromes where the sum of digits is greater than\n    the number of digits.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    Example 1:\n        Input: 15\n        Output: (4, 4)\n        Explanation:\n        Integer palindromes are 1,2,3,4,5,6,7,8,9,11.\n        After filtering by sum > length:\n        - 1: sum=1, len=1, excluded\n        - 2: sum=2, len=1, included (even)\n        - ...\n        - 11: sum=2, len=2, excluded\n        Result: even=4 (2,4,6,8), odd=4 (3,5,7,9)\n\n    Example 2:\n        Input: 12\n        Output: (4, 4)\n        Explanation:\n        Palindromes 1-11, filtered by sum > length gives 2,3,4,5,6,7,8,9\n\n    Note:\n        1. 1 <= n <= 10^3\n        2. returned tuple has the number of even and odd integer palindromes respectively.\n    \"\"\"\n", "entry_point": "even_odd_palindrome", "canonical_solution": "    def is_palindrome(num):\n        return str(num) == str(num)[::-1]\n    \n    def sum_greater_than_length(num):\n        digits = str(num)\n        digit_sum = sum(int(d) for d in digits)\n        return digit_sum > len(digits)\n\n    even_palindrome_count = 0\n    odd_palindrome_count = 0\n\n    for i in range(1, n + 1):\n        if is_palindrome(i) and sum_greater_than_length(i):\n            if i % 2 == 0:\n                even_palindrome_count += 1\n            else:\n                odd_palindrome_count += 1\n                \n    return (even_palindrome_count, odd_palindrome_count)\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(15) == (4, 4)\n    assert candidate(12) == (4, 4)\n    assert candidate(3) == (1, 1)\n    assert candidate(100) == (4, 4)\n    assert candidate(30) == (4, 4)\n    assert candidate(999) == (20, 16)\n    \n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(1) == (0, 0)\n    assert candidate(9) == (4, 4)\n    assert candidate(10) == (4, 4)\n\n"}
{"task_id": "ExtendedEval/108", "prompt": "\ndef count_nums(arr):\n    \"\"\"\n    Write a function count_nums which takes an array of integers and returns\n    the number of elements which has a sum of digits > 0.\n    If a number is negative, then its first signed digit will be negative:\n    e.g. -123 has signed digits -1, 2, and 3.\n    Additionally, exclude numbers where the absolute value of the sum of digits\n    is a prime number.\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    >>> count_nums([]) == 0\n    >>> count_nums([-1, 11, -11]) == 1 (11: sum=2 prime excluded, -1: sum=-1 <=0, -11: sum=0 <=0)\n    >>> count_nums([1, 1, 2, -2, 3, 4, 5]) == 3 (1,1,4 qualify: sum=1 not prime, 4: sum=4 not prime)\n    \"\"\"\n", "entry_point": "count_nums", "canonical_solution": "    def is_prime(n):\n        if n < 2:\n            return False\n        for i in range(2, int(n**0.5) + 1):\n            if n % i == 0:\n                return False\n        return True\n    \n    def digits_sum(n):\n        neg = 1\n        if n < 0: \n            n, neg = -1 * n, -1 \n        n = [int(i) for i in str(n)]\n        n[0] = n[0] * neg\n        return sum(n)\n    \n    count = 0\n    for num in arr:\n        s = digits_sum(num)\n        if s > 0 and not is_prime(abs(s)):\n            count += 1\n    return count\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([]) == 0\n    assert candidate([-1, -2, 0]) == 0\n    assert candidate([1, 1, 2, -2, 3, 4, 5]) == 3\n    assert candidate([1, 6, 9, -6, 0, 1, 5]) == 4\n    assert candidate([1, 100, 98, -7, 1, -1]) == 3\n    assert candidate([12, 23, 34, -45, -56, 0]) == 2\n    assert candidate([-0, 1**0]) == 1\n    assert candidate([1]) == 1\n    assert candidate([10, 11, 12, 13, 14, 15]) == 3\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n"}
{"task_id": "ExtendedEval/109", "prompt": "\ndef move_one_ball(arr):\n    \"\"\"We have an array 'arr' of N integers arr[1], arr[2], ..., arr[N].The\n    numbers in the array will be randomly ordered. Your task is to determine if\n    it is possible to get an array sorted in non-decreasing order by performing \n    the following operation on the given array:\n        You are allowed to perform right shift operation any number of times.\n    \n    One right shift operation means shifting all elements of the array by one\n    position in the right direction. The last element of the array will be moved to\n    the starting position in the array i.e. 0th index. \n\n    Additionally, you can perform at most one swap of any two adjacent elements\n    after all the right shift operations.\n\n    If it is possible to obtain the sorted array by performing the above operations\n    then return True else return False.\n    If the given array is empty then return True.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    Note: The given list is guaranteed to have unique elements.\n\n    For Example:\n    \n    move_one_ball([3, 4, 5, 1, 2])==>True\n    Explanation: By performing 2 right shift operations, non-decreasing order can\n                 be achieved for the given array.\n    move_one_ball([3, 5, 4, 1, 2])==>True\n    Explanation: After 2 right shifts we get [1,2,3,5,4]. One swap of 5 and 4 gives sorted array.\n    move_one_ball([1, 3, 5, 4, 2])==>False\n    Explanation: Not possible even with one swap after any number of shifts.\n    \"\"\"\n", "entry_point": "move_one_ball", "canonical_solution": "    if len(arr) == 0:\n        return True\n    \n    sorted_arr = sorted(arr)\n    n = len(arr)\n    \n    # Try all possible rotations\n    for shift in range(n):\n        rotated = arr[shift:] + arr[:shift]\n        \n        # Check if already sorted\n        if rotated == sorted_arr:\n            return True\n        \n        # Check if one adjacent swap can make it sorted\n        for i in range(n - 1):\n            # Make a copy and swap adjacent elements\n            temp = rotated[:]\n            temp[i], temp[i + 1] = temp[i + 1], temp[i]\n            if temp == sorted_arr:\n                return True\n    \n    return False\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([3, 4, 5, 1, 2]) == True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([3, 5, 4, 1, 2]) == True\n    assert candidate([1, 3, 5, 4, 2]) == False\n    assert candidate([3, 5, 10, 1, 2]) == True\n    assert candidate([4, 3, 1, 2]) == False\n    assert candidate([5, 4, 3, 1, 2]) == False\n    assert candidate([1, 2, 3, 4, 5]) == True\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([]) == True\n"}
{"task_id": "ExtendedEval/110", "prompt": "\ndef exchange(lst1, lst2):\n    \"\"\"In this problem, you will implement a function that takes two lists of numbers,\n    and determines whether it is possible to perform an exchange of elements\n    between them to make lst1 a list of only even numbers.\n    There is no limit on the number of exchanged elements between lst1 and lst2.\n    Additionally, the sum of all numbers in lst1 after exchanges must be\n    greater than the sum of all numbers in lst2.\n    If both conditions can be satisfied, return \"YES\".\n    Otherwise, return \"NO\".\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    For example:\n    exchange([1, 2, 3, 4], [1, 2, 3, 4]) => \"NO\" (sum condition fails)\n    exchange([1, 2, 3, 4], [1, 5, 3, 4]) => \"NO\" (not enough even numbers)\n    exchange([1, 3, 5], [2, 4, 6, 8]) => \"YES\" \n    exchange([1, 3, 5, 7], [2, 4]) => \"NO\" (not enough even numbers)\n    It is assumed that the input lists will be non-empty.\n    \"\"\"\n", "entry_point": "exchange", "canonical_solution": "    odd_in_lst1 = sum(1 for x in lst1 if x % 2 == 1)\n    even_in_lst2 = sum(1 for x in lst2 if x % 2 == 0)\n    \n    # Check if we have enough even numbers to replace all odd numbers in lst1\n    if even_in_lst2 < odd_in_lst1:\n        return \"NO\"\n    \n    # Calculate what the sums would be after optimal exchange\n    # We want to move the smallest odd numbers out and the largest even numbers in\n    odd_lst1 = sorted([x for x in lst1 if x % 2 == 1])\n    even_lst1 = [x for x in lst1 if x % 2 == 0]\n    even_lst2 = sorted([x for x in lst2 if x % 2 == 0], reverse=True)\n    odd_lst2 = [x for x in lst2 if x % 2 == 1]\n    \n    # New lst1 will have all original evens plus the largest evens from lst2\n    new_lst1_sum = sum(even_lst1) + sum(even_lst2[:odd_in_lst1])\n    \n    # New lst2 will have remaining evens, all original odds, plus the odd numbers from lst1\n    new_lst2_sum = sum(even_lst2[odd_in_lst1:]) + sum(odd_lst2) + sum(odd_lst1)\n    \n    if new_lst1_sum > new_lst2_sum:\n        return \"YES\"\n    return \"NO\"\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([1, 2, 3, 4], [1, 2, 3, 4]) == \"NO\"\n    assert candidate([1, 2, 3, 4], [1, 5, 3, 4]) == \"NO\"\n    assert candidate([1, 2, 3, 4], [2, 1, 4, 3]) == \"NO\" \n    assert candidate([5, 7, 3], [2, 6, 4]) == \"YES\"\n    assert candidate([5, 7, 3], [2, 6, 3]) == \"NO\" \n    assert candidate([3, 2, 6, 1, 8, 9], [3, 5, 5, 1, 1, 1]) == \"NO\"\n    assert candidate([100, 200], [200, 200]) == \"YES\"\n    assert candidate([1, 3], [20, 40, 60, 80]) == \"YES\"\n    assert candidate([1, 3, 5, 7], [2, 4]) == \"NO\"\n    assert candidate([10, 20], [1, 2, 3, 4, 5]) == \"YES\"\n\n"}
{"task_id": "ExtendedEval/111", "prompt": "\ndef histogram(test):\n    \"\"\"Given a string representing a space separated lowercase letters, return a dictionary\n    of the letter with the most repetition and containing the corresponding count.\n    If several letters have the same occurrence, return all of them.\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    Example:\n    histogram('a b c') == {'a': 1, 'b': 1, 'c': 1}\n    histogram('a b b a') == {'a': 2, 'b': 2}\n    histogram('a b c a b') == {'a': 2, 'b': 2}\n    histogram('b b b b a') == {'b': 4}\n    histogram('') == {}\n\n    \"\"\"\n", "entry_point": "histogram", "canonical_solution": "    dict1={}\n    list1=test.split(\" \")\n    t=0\n\n    for i in list1:\n        if(list1.count(i)>t) and i!='':\n            t=list1.count(i)\n    if t>0:\n        for i in list1:\n            if(list1.count(i)==t):\n                \n                dict1[i]=t\n    return dict1\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate('a b b a') == {'a':2,'b': 2}, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate('a b c a b') == {'a': 2, 'b': 2}, \"This prints if this assert fails 2 (good for debugging!)\"\n    assert candidate('a b c d g') == {'a': 1, 'b': 1, 'c': 1, 'd': 1, 'g': 1}, \"This prints if this assert fails 3 (good for debugging!)\"\n    assert candidate('r t g') == {'r': 1,'t': 1,'g': 1}, \"This prints if this assert fails 4 (good for debugging!)\"\n    assert candidate('b b b b a') == {'b': 4}, \"This prints if this assert fails 5 (good for debugging!)\"\n    assert candidate('r t g') == {'r': 1,'t': 1,'g': 1}, \"This prints if this assert fails 6 (good for debugging!)\"\n    \n    \n    # Check some edge cases that are easy to work out by hand.\n    assert candidate('') == {}, \"This prints if this assert fails 7 (also good for debugging!)\"\n    assert candidate('a') == {'a': 1}, \"This prints if this assert fails 8 (also good for debugging!)\"\n\n"}
{"task_id": "ExtendedEval/112", "prompt": "\ndef reverse_delete(s,c):\n    \"\"\"Task\n    We are given two strings s and c, you have to deleted all the characters in s that are equal to any character in c\n    then check if the result string is palindrome.\n    A string is called palindrome if it reads the same backward as forward.\n    You should return a tuple containing the result string and True/False for the check.\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    Example\n    For s = \"abcde\", c = \"ae\", the result should be ('bcd',False)\n    For s = \"abcdef\", c = \"b\"  the result should be ('acdef',False)\n    For s = \"abcdedcba\", c = \"ab\", the result should be ('cdedc',True)\n    \"\"\"\n", "entry_point": "reverse_delete", "canonical_solution": "    s = ''.join([char for char in s if char not in c])\n    return (s,s[::-1] == s)\n", "test": "def check(candidate):\n\n    assert candidate(\"abcde\",\"ae\") == ('bcd',False)\n    assert candidate(\"abcdef\", \"b\") == ('acdef',False)\n    assert candidate(\"abcdedcba\",\"ab\") == ('cdedc',True)\n    assert candidate(\"dwik\",\"w\") == ('dik',False)\n    assert candidate(\"a\",\"a\") == ('',True)\n    assert candidate(\"abcdedcba\",\"\") == ('abcdedcba',True)\n    assert candidate(\"abcdedcba\",\"v\") == ('abcdedcba',True)\n    assert candidate(\"vabba\",\"v\") == ('abba',True)\n    assert candidate(\"mamma\", \"mia\") == (\"\", True)\n"}
{"task_id": "ExtendedEval/113", "prompt": "\ndef odd_count(lst):\n    \"\"\"Given a list of strings, where each string consists of only digits, return a list.\n    Each element i of the output should be \"the number of odd elements in the\n    string i of the input.\" where all the i's should be replaced by the number\n    of odd digits in the i'th string of the input.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    >>> odd_count(['1234567'])\n    [\"the number of odd elements 4n the str4ng 4 of the 4nput.\"]\n    >>> odd_count(['3',\"11111111\"])\n    [\"the number of odd elements 1n the str1ng 1 of the 1nput.\",\n     \"the number of odd elements 8n the str8ng 8 of the 8nput.\"]\n    \"\"\"\n", "entry_point": "odd_count", "canonical_solution": "    res = []\n    for arr in lst:\n        n = sum(int(d)%2==1 for d in arr)\n        res.append(\"the number of odd elements \" + str(n) + \"n the str\"+ str(n) +\"ng \"+ str(n) +\" of the \"+ str(n) +\"nput.\")\n    return res\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(['1234567']) == [\"the number of odd elements 4n the str4ng 4 of the 4nput.\"], \"Test 1\"\n    assert candidate(['3',\"11111111\"]) == [\"the number of odd elements 1n the str1ng 1 of the 1nput.\", \"the number of odd elements 8n the str8ng 8 of the 8nput.\"], \"Test 2\"\n    assert candidate(['271', '137', '314']) == [\n        'the number of odd elements 2n the str2ng 2 of the 2nput.',\n        'the number of odd elements 3n the str3ng 3 of the 3nput.',\n        'the number of odd elements 2n the str2ng 2 of the 2nput.'\n    ]\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n"}
{"task_id": "ExtendedEval/114", "prompt": "\ndef minSubArraySum(nums):\n    \"\"\"\n    Given an array of integers nums, find the minimum sum of any non-empty sub-array\n    of nums.\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    Example\n    minSubArraySum([2, 3, 4, 1, 2, 4]) == 1\n    minSubArraySum([-1, -2, -3]) == -6\n    \"\"\"\n", "entry_point": "minSubArraySum", "canonical_solution": "    max_sum = 0\n    s = 0\n    for num in nums:\n        s += -num\n        if (s < 0):\n            s = 0\n        max_sum = max(s, max_sum)\n    if max_sum == 0:\n        max_sum = max(-i for i in nums)\n    min_sum = -max_sum\n    return min_sum\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([2, 3, 4, 1, 2, 4]) == 1, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([-1, -2, -3]) == -6\n    assert candidate([-1, -2, -3, 2, -10]) == -14\n    assert candidate([-9999999999999999]) == -9999999999999999\n    assert candidate([0, 10, 20, 1000000]) == 0\n    assert candidate([-1, -2, -3, 10, -5]) == -6\n    assert candidate([100, -1, -2, -3, 10, -5]) == -6\n    assert candidate([10, 11, 13, 8, 3, 4]) == 3\n    assert candidate([100, -33, 32, -1, 0, -2]) == -33\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([-10]) == -10, \"This prints if this assert fails 2 (also good for debugging!)\"\n    assert candidate([7]) == 7\n    assert candidate([1, -1]) == -1\n"}
{"task_id": "ExtendedEval/115", "prompt": "\ndef max_fill(grid, capacity):\n    \"\"\"\n    You are given a rectangular grid of wells. Each row represents a single well,\n    and each 1 in a row represents a single unit of water.\n    Each well has a corresponding bucket that can be used to extract water from it, \n    and all buckets have the same capacity.\n    Your task is to use the buckets to empty the wells.\n    Output the number of times you need to lower the buckets.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    Example 1:\n        Input: \n            grid : [[0,0,1,0], [0,1,0,0], [1,1,1,1]]\n            bucket_capacity : 1\n        Output: 6\n\n    Example 2:\n        Input: \n            grid : [[0,0,1,1], [0,0,0,0], [1,1,1,1], [0,1,1,1]]\n            bucket_capacity : 2\n        Output: 5\n    \n    Example 3:\n        Input: \n            grid : [[0,0,0], [0,0,0]]\n            bucket_capacity : 5\n        Output: 0\n\n    Constraints:\n        * all wells have the same length\n        * 1 <= grid.length <= 10^2\n        * 1 <= grid[:,1].length <= 10^2\n        * grid[i][j] -> 0 | 1\n        * 1 <= capacity <= 10\n    \"\"\"\n", "entry_point": "max_fill", "canonical_solution": "    import math\n    return sum([math.ceil(sum(arr)/capacity) for arr in grid])\n", "test": "def check(candidate):\n\n\n    # Check some simple cases\n    assert True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([[0,0,1,0], [0,1,0,0], [1,1,1,1]], 1) == 6, \"Error\"\n    assert candidate([[0,0,1,1], [0,0,0,0], [1,1,1,1], [0,1,1,1]], 2) == 5, \"Error\"\n    assert candidate([[0,0,0], [0,0,0]], 5) == 0, \"Error\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n    assert candidate([[1,1,1,1], [1,1,1,1]], 2) == 4, \"Error\"\n    assert candidate([[1,1,1,1], [1,1,1,1]], 9) == 2, \"Error\"\n\n"}
{"task_id": "ExtendedEval/116", "prompt": "\ndef sort_array(arr):\n    \"\"\"\n    In this Kata, you have to sort an array of non-negative integers according to\n    number of ones in their binary representation in ascending order.\n    For similar number of ones, sort based on decimal value.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    It must be implemented like this:\n    >>> sort_array([1, 5, 2, 3, 4]) == [1, 2, 4, 3, 5]\n    >>> sort_array([-2, -3, -4, -5, -6]) == [-4, -2, -6, -5, -3]\n    >>> sort_array([1, 0, 2, 3, 4]) == [0, 1, 2, 4, 3]\n    \"\"\"\n", "entry_point": "sort_array", "canonical_solution": "    return sorted(sorted(arr), key=lambda x: bin(abs(x))[2:].count('1'))\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([1,5,2,3,4]) == [1, 2, 4, 3, 5]\n    assert candidate([-2,-3,-4,-5,-6]) == [-4, -2, -6, -5, -3]\n    assert candidate([1,0,2,3,4]) == [0, 1, 2, 4, 3]\n    assert candidate([]) == []\n    assert candidate([2,5,77,4,5,3,5,7,2,3,4]) == [2, 2, 4, 4, 3, 3, 5, 5, 5, 7, 77]\n    assert candidate([3,6,44,12,32,5]) == [32, 3, 5, 6, 12, 44]\n    assert candidate([2,4,8,16,32]) == [2, 4, 8, 16, 32]\n    assert candidate([2,4,8,16,32]) == [2, 4, 8, 16, 32]\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n"}
{"task_id": "ExtendedEval/117", "prompt": "\ndef select_words(s, n):\n    \"\"\"Given a string s and a natural number n, you have been tasked to implement \n    a function that returns a list of all words from string s that contain exactly \n    n consonants, in order these words appear in the string s.\n    If the string s is empty then the function should return an empty list.\n    Note: you may assume the input string contains only letters and spaces.\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    Examples:\n    select_words(\"Mary had a little lamb\", 4) ==> [\"little\"]\n    select_words(\"Mary had a little lamb\", 3) ==> [\"Mary\", \"lamb\"]\n    select_words(\"simple white space\", 2) ==> []\n    select_words(\"Hello world\", 4) ==> [\"world\"]\n    select_words(\"Uncle sam\", 3) ==> [\"Uncle\"]\n    \"\"\"\n", "entry_point": "select_words", "canonical_solution": "    result = []\n    for word in s.split():\n        n_consonants = 0\n        for i in range(0, len(word)):\n            if word[i].lower() not in [\"a\",\"e\",\"i\",\"o\",\"u\"]:\n                n_consonants += 1 \n        if n_consonants == n:\n            result.append(word)\n    return result\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"Mary had a little lamb\", 4) == [\"little\"], \"First test error: \" + str(candidate(\"Mary had a little lamb\", 4))      \n    assert candidate(\"Mary had a little lamb\", 3) == [\"Mary\", \"lamb\"], \"Second test error: \" + str(candidate(\"Mary had a little lamb\", 3))  \n    assert candidate(\"simple white space\", 2) == [], \"Third test error: \" + str(candidate(\"simple white space\", 2))      \n    assert candidate(\"Hello world\", 4) == [\"world\"], \"Fourth test error: \" + str(candidate(\"Hello world\", 4))  \n    assert candidate(\"Uncle sam\", 3) == [\"Uncle\"], \"Fifth test error: \" + str(candidate(\"Uncle sam\", 3))\n\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(\"\", 4) == [], \"1st edge test error: \" + str(candidate(\"\", 4))\n    assert candidate(\"a b c d e f\", 1) == [\"b\", \"c\", \"d\", \"f\"], \"2nd edge test error: \" + str(candidate(\"a b c d e f\", 1))\n\n"}
{"task_id": "ExtendedEval/118", "prompt": "\ndef get_closest_vowel(word):\n    \"\"\"You are given a word. Your task is to find the closest vowel that stands between \n    two consonants from the right side of the word (case sensitive).\n    \n    Vowels in the beginning and ending doesn't count. Return empty string if you didn't\n    find any vowel met the above condition. \n\n    You may assume that the given string contains English letter only.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    Example:\n    get_closest_vowel(\"yogurt\") ==> \"u\"\n    get_closest_vowel(\"FULL\") ==> \"U\"\n    get_closest_vowel(\"quick\") ==> \"\"\n    get_closest_vowel(\"ab\") ==> \"\"\n    \"\"\"\n", "entry_point": "get_closest_vowel", "canonical_solution": "    if len(word) < 3:\n        return \"\"\n\n    vowels = {\"a\", \"e\", \"i\", \"o\", \"u\", \"A\", \"E\", 'O', 'U', 'I'}\n    for i in range(len(word)-2, 0, -1):\n        if word[i] in vowels:\n            if (word[i+1] not in vowels) and (word[i-1] not in vowels):\n                return word[i]\n    return \"\"\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"yogurt\") == \"u\"\n    assert candidate(\"full\") == \"u\"\n    assert candidate(\"easy\") == \"\"\n    assert candidate(\"eAsy\") == \"\"\n    assert candidate(\"ali\") == \"\"\n    assert candidate(\"bad\") == \"a\"\n    assert candidate(\"most\") == \"o\"\n    assert candidate(\"ab\") == \"\"\n    assert candidate(\"ba\") == \"\"\n    assert candidate(\"quick\") == \"\"\n    assert candidate(\"anime\") == \"i\"\n    assert candidate(\"Asia\") == \"\"\n    assert candidate(\"Above\") == \"o\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True\n\n"}
{"task_id": "ExtendedEval/119", "prompt": "\ndef match_parens(lst):\n    '''\n    You are given a list of two strings, both strings consist of open\n    parentheses '(' or close parentheses ')' only.\n    Your job is to check if it is possible to concatenate the two strings in\n    some order, that the resulting string will be good.\n    A string S is considered to be good if and only if all parentheses in S\n    are balanced. For example: the string '(())()' is good, while the string\n    '())' is not.\n    Return 'Yes' if there's a way to make a good string, and return 'No' otherwise.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    Examples:\n    match_parens(['()(', ')']) == 'Yes'\n    match_parens([')', ')']) == 'No'\n    '''\n", "entry_point": "match_parens", "canonical_solution": "    def check(s):\n        val = 0\n        for i in s:\n            if i == '(':\n                val = val + 1\n            else:\n                val = val - 1\n            if val < 0:\n                return False\n        return True if val == 0 else False\n\n    S1 = lst[0] + lst[1]\n    S2 = lst[1] + lst[0]\n    return 'Yes' if check(S1) or check(S2) else 'No'\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(['()(', ')']) == 'Yes'\n    assert candidate([')', ')']) == 'No'\n    assert candidate(['(()(())', '())())']) == 'No'\n    assert candidate([')())', '(()()(']) == 'Yes'\n    assert candidate(['(())))', '(()())((']) == 'Yes'\n    assert candidate(['()', '())']) == 'No'\n    assert candidate(['(()(', '()))()']) == 'Yes'\n    assert candidate(['((((', '((())']) == 'No'\n    assert candidate([')(()', '(()(']) == 'No'\n    assert candidate([')(', ')(']) == 'No'\n    \n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(['(', ')']) == 'Yes'\n    assert candidate([')', '(']) == 'Yes' \n\n"}
{"task_id": "ExtendedEval/120", "prompt": "\ndef maximum(arr, k):\n    \"\"\"\n    Given an array arr of integers and a positive integer k, return a sorted list \n    of length k with the maximum k numbers in arr.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    Example 1:\n\n        Input: arr = [-3, -4, 5], k = 3\n        Output: [-4, -3, 5]\n\n    Example 2:\n\n        Input: arr = [4, -4, 4], k = 2\n        Output: [4, 4]\n\n    Example 3:\n\n        Input: arr = [-3, 2, 1, 2, -1, -2, 1], k = 1\n        Output: [2]\n\n    Note:\n        1. The length of the array will be in the range of [1, 1000].\n        2. The elements in the array will be in the range of [-1000, 1000].\n        3. 0 <= k <= len(arr)\n    \"\"\"\n", "entry_point": "maximum", "canonical_solution": "    if k == 0:\n        return []\n    arr.sort()\n    ans = arr[-k:]\n    return ans\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([-3, -4, 5], 3) == [-4, -3, 5]\n    assert candidate([4, -4, 4], 2) == [4, 4]\n    assert candidate([-3, 2, 1, 2, -1, -2, 1], 1) == [2]\n    assert candidate([123, -123, 20, 0 , 1, 2, -3], 3) == [2, 20, 123]\n    assert candidate([-123, 20, 0 , 1, 2, -3], 4) == [0, 1, 2, 20]\n    assert candidate([5, 15, 0, 3, -13, -8, 0], 7) == [-13, -8, 0, 0, 3, 5, 15]\n    assert candidate([-1, 0, 2, 5, 3, -10], 2) == [3, 5]\n    assert candidate([1, 0, 5, -7], 1) == [5]\n    assert candidate([4, -4], 2) == [-4, 4]\n    assert candidate([-10, 10], 2) == [-10, 10]\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([1, 2, 3, -23, 243, -400, 0], 0) == []\n\n"}
{"task_id": "ExtendedEval/121", "prompt": "\ndef solution(lst):\n    \"\"\"Given a non-empty list of integers, return the sum of all of the odd elements that are in even positions.\n    \n    Please return only the Python function code, without any explanations or additional text.\n\n    Examples\n    solution([5, 8, 7, 1]) ==> 12\n    solution([3, 3, 3, 3, 3]) ==> 9\n    solution([30, 13, 24, 321]) ==>0\n    \"\"\"\n", "entry_point": "solution", "canonical_solution": "    return sum([x for idx, x in enumerate(lst) if idx%2==0 and x%2==1])\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([5, 8, 7, 1])    == 12\n    assert candidate([3, 3, 3, 3, 3]) == 9\n    assert candidate([30, 13, 24, 321]) == 0\n    assert candidate([5, 9]) == 5\n    assert candidate([2, 4, 8]) == 0\n    assert candidate([30, 13, 23, 32]) == 23\n    assert candidate([3, 13, 2, 9]) == 3\n\n    # Check some edge cases that are easy to work out by hand.\n\n"}
{"task_id": "ExtendedEval/122", "prompt": "\ndef add_elements(arr, k):\n    \"\"\"\n    Given a non-empty array of integers arr and an integer k, return\n    the sum of the elements with at most two digits from the first k elements of arr.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    Example:\n\n        Input: arr = [111,21,3,4000,5,6,7,8,9], k = 4\n        Output: 24 # sum of 21 + 3\n\n    Constraints:\n        1. 1 <= len(arr) <= 100\n        2. 1 <= k <= len(arr)\n    \"\"\"\n", "entry_point": "add_elements", "canonical_solution": "    return sum(elem for elem in arr[:k] if len(str(abs(elem))) <= 2)\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([1,-2,-3,41,57,76,87,88,99], 3) == -4\n    assert candidate([111,121,3,4000,5,6], 2) == 0\n    assert candidate([11,21,3,90,5,6,7,8,9], 4) == 125\n    assert candidate([111,21,3,4000,5,6,7,8,9], 4) == 24, \"This prints if this assert fails 1 (good for debugging!)\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([1], 1) == 1, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n"}
{"task_id": "ExtendedEval/123", "prompt": "\ndef get_odd_collatz(n):\n    \"\"\"\n    Given a positive integer n, return a sorted list that has the odd numbers in collatz sequence.\n\n    The Collatz conjecture is a conjecture in mathematics that concerns a sequence defined\n    as follows: start with any positive integer n. Then each term is obtained from the \n    previous term as follows: if the previous term is even, the next term is one half of \n    the previous term. If the previous term is odd, the next term is 3 times the previous\n    term plus 1. The conjecture is that no matter what value of n, the sequence will always reach 1.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    Note: \n        1. Collatz(1) is [1].\n        2. returned list sorted in increasing order.\n\n    For example:\n    get_odd_collatz(5) returns [1, 5] # The collatz sequence for 5 is [5, 16, 8, 4, 2, 1], so the odd numbers are only 1, and 5.\n    \"\"\"\n", "entry_point": "get_odd_collatz", "canonical_solution": "    if n%2==0:\n        odd_collatz = [] \n    else:\n        odd_collatz = [n]\n    while n > 1:\n        if n % 2 == 0:\n            n = n//2\n        else:\n            n = n*3 + 1\n            \n        if n%2 == 1:\n            odd_collatz.append(n)\n\n    return sorted(odd_collatz)\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(14) == [1, 5, 7, 11, 13, 17]\n    assert candidate(5) == [1, 5]\n    assert candidate(12) == [1, 3, 5], \"This prints if this assert fails 1 (good for debugging!)\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(1) == [1], \"This prints if this assert fails 2 (also good for debugging!)\"\n\n"}
{"task_id": "ExtendedEval/124", "prompt": "\ndef valid_date(date):\n    \"\"\"You have to write a function which validates a given date string and\n    returns True if the date is valid otherwise False.\n    The date is valid if all of the following rules are satisfied:\n    1. The date string is not empty.\n    2. The number of days is not less than 1 or higher than 31 days for months 1,3,5,7,8,10,12. And the number of days is not less than 1 or higher than 30 days for months 4,6,9,11. And, the number of days is not less than 1 or higher than 29 for the month 2.\n    3. The months should not be less than 1 or higher than 12.\n    4. The date should be in the format: mm-dd-yyyy\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    for example: \n    valid_date('03-11-2000') => True\n\n    valid_date('15-01-2012') => False\n\n    valid_date('04-0-2040') => False\n\n    valid_date('06-04-2020') => True\n\n    valid_date('06/04/2020') => False\n    \"\"\"\n", "entry_point": "valid_date", "canonical_solution": "    try:\n        date = date.strip()\n        month, day, year = date.split('-')\n        month, day, year = int(month), int(day), int(year)\n        if month < 1 or month > 12:\n            return False\n        if month in [1,3,5,7,8,10,12] and (day < 1 or day > 31):\n            return False\n        if month in [4,6,9,11] and (day < 1 or day > 30):\n            return False\n        if month == 2 and (day < 1 or day > 29):\n            return False\n    except:\n        return False\n\n    return True\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate('03-11-2000') == True\n\n    assert candidate('15-01-2012') == False\n\n    assert candidate('04-0-2040') == False\n\n    assert candidate('06-04-2020') == True\n\n    assert candidate('01-01-2007') == True\n\n    assert candidate('03-32-2011') == False\n\n    assert candidate('') == False\n\n    assert candidate('04-31-3000') == False\n\n    assert candidate('06-06-2005') == True\n\n    assert candidate('21-31-2000') == False\n\n    assert candidate('04-12-2003') == True\n\n    assert candidate('04122003') == False\n\n    assert candidate('20030412') == False\n\n    assert candidate('2003-04') == False\n\n    assert candidate('2003-04-12') == False\n\n    assert candidate('04-2003') == False\n"}
{"task_id": "ExtendedEval/125", "prompt": "\ndef split_words(txt):\n    '''\n    Given a string of words, return a list of words split on whitespace, if no whitespaces exists in the text you\n    should split on commas ',' if no commas exists you should return the number of lower-case letters with odd order in the\n    alphabet, ord('a') = 0, ord('b') = 1, ... ord('z') = 25\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    Examples\n    split_words(\"Hello world!\") ➞ [\"Hello\", \"world!\"]\n    split_words(\"Hello,world!\") ➞ [\"Hello\", \"world!\"]\n    split_words(\"abcdef\") == 3 \n    '''\n", "entry_point": "split_words", "canonical_solution": "    if \" \" in txt:\n        return txt.split()\n    elif \",\" in txt:\n        return txt.replace(',',' ').split()\n    else:\n        return len([i for i in txt if i.islower() and ord(i)%2 == 1])\n", "test": "def check(candidate):\n\n    assert candidate(\"Hello world!\") == [\"Hello\",\"world!\"]\n    assert candidate(\"Hello,world!\") == [\"Hello\",\"world!\"]\n    assert candidate(\"Hello world,!\") == [\"Hello\",\"world,!\"]\n    assert candidate(\"Hello,Hello,world !\") == [\"Hello,Hello,world\",\"!\"]\n    assert candidate(\"abcdef\") == 3\n    assert candidate(\"aaabb\") == 2\n    assert candidate(\"aaaBb\") == 1\n    assert candidate(\"\") == 0\n"}
{"task_id": "ExtendedEval/126", "prompt": "\ndef is_sorted(lst):\n    '''\n    Given a list of numbers, return whether or not they are sorted\n    in ascending order. If list has more than 1 duplicate of the same\n    number, return False. Assume no negative numbers and only integers.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    Examples\n    is_sorted([5]) ➞ True\n    is_sorted([1, 2, 3, 4, 5]) ➞ True\n    is_sorted([1, 3, 2, 4, 5]) ➞ False\n    is_sorted([1, 2, 3, 4, 5, 6]) ➞ True\n    is_sorted([1, 2, 3, 4, 5, 6, 7]) ➞ True\n    is_sorted([1, 3, 2, 4, 5, 6, 7]) ➞ False\n    is_sorted([1, 2, 2, 3, 3, 4]) ➞ True\n    is_sorted([1, 2, 2, 2, 3, 4]) ➞ False\n    '''\n", "entry_point": "is_sorted", "canonical_solution": "    count_digit = dict([(i, 0) for i in lst])\n    for i in lst:\n        count_digit[i]+=1 \n    if any(count_digit[i] > 2 for i in lst):\n        return False\n    if all(lst[i-1] <= lst[i] for i in range(1, len(lst))):\n        return True\n    else:\n        return False\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([5]) == True\n    assert candidate([1, 2, 3, 4, 5]) == True\n    assert candidate([1, 3, 2, 4, 5]) == False\n    assert candidate([1, 2, 3, 4, 5, 6]) == True\n    assert candidate([1, 2, 3, 4, 5, 6, 7]) == True\n    assert candidate([1, 3, 2, 4, 5, 6, 7]) == False, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([]) == True, \"This prints if this assert fails 2 (good for debugging!)\"\n    assert candidate([1]) == True, \"This prints if this assert fails 3 (good for debugging!)\"\n    assert candidate([3, 2, 1]) == False, \"This prints if this assert fails 4 (good for debugging!)\"\n    \n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([1, 2, 2, 2, 3, 4]) == False, \"This prints if this assert fails 5 (good for debugging!)\"\n    assert candidate([1, 2, 3, 3, 3, 4]) == False, \"This prints if this assert fails 6 (good for debugging!)\"\n    assert candidate([1, 2, 2, 3, 3, 4]) == True, \"This prints if this assert fails 7 (good for debugging!)\"\n    assert candidate([1, 2, 3, 4]) == True, \"This prints if this assert fails 8 (good for debugging!)\"\n\n"}
{"task_id": "ExtendedEval/127", "prompt": "\ndef intersection(interval1, interval2):\n    \"\"\"You are given two intervals,\n    where each interval is a pair of integers. For example, interval = (start, end) = (1, 2).\n    The given intervals are closed which means that the interval (start, end)\n    includes both start and end.\n    For each given interval, it is assumed that its start is less or equal its end.\n    Your task is to determine whether the length of intersection of these two \n    intervals is a prime number.\n    Example, the intersection of the intervals (1, 3), (2, 4) is (2, 3)\n    which its length is 1, which not a prime number.\n    If the length of the intersection is a prime number, return \"YES\",\n    otherwise, return \"NO\".\n    If the two intervals don't intersect, return \"NO\".\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    [input/output] samples:\n    intersection((1, 2), (2, 3)) ==> \"NO\"\n    intersection((-1, 1), (0, 4)) ==> \"NO\"\n    intersection((-3, -1), (-5, 5)) ==> \"YES\"\n    \"\"\"\n", "entry_point": "intersection", "canonical_solution": "    def is_prime(num):\n        if num < 2:\n            return False\n        if num == 2:\n            return True\n        for i in range(2, int(num**0.5) + 1):\n            if num%i == 0:\n                return False\n        return True\n\n    l = max(interval1[0], interval2[0])\n    r = min(interval1[1], interval2[1])\n    if l <= r:\n        length = r - l + 1\n        if is_prime(length):\n            return \"YES\"\n    return \"NO\"\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate((1, 2), (2, 3)) == \"NO\"\n    assert candidate((-1, 1), (0, 4)) == \"NO\"\n    assert candidate((-3, -1), (-5, 5)) == \"YES\"\n    assert candidate((-2, 2), (-4, 0)) == \"YES\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate((-11, 2), (-1, -1)) == \"NO\"\n    assert candidate((1, 2), (3, 5)) == \"NO\"\n    assert candidate((1, 2), (1, 2)) == \"NO\"\n    assert candidate((-2, -2), (-3, -2)) == \"NO\"\n\n"}
{"task_id": "ExtendedEval/128", "prompt": "\ndef prod_signs(arr):\n    \"\"\"\n    You are given an array arr of integers and you need to return\n    sum of magnitudes of integers multiplied by product of all signs\n    of each number in the array, represented by 1, -1 or 0.\n    Note: return None for empty arr.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    Example:\n    >>> prod_signs([1, 2, 2, -4]) == -9\n    >>> prod_signs([0, 1]) == 0\n    >>> prod_signs([]) == None\n    \"\"\"\n", "entry_point": "prod_signs", "canonical_solution": "    if not arr: return None\n    prod = 0 if 0 in arr else (-1) ** len(list(filter(lambda x: x < 0, arr)))\n    return prod * sum([abs(i) for i in arr])\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([1, 2, 2, -4]) == -9\n    assert candidate([0, 1]) == 0\n    assert candidate([1, 1, 1, 2, 3, -1, 1]) == -10\n    assert candidate([]) == None\n    assert candidate([2, 4,1, 2, -1, -1, 9]) == 20\n    assert candidate([-1, 1, -1, 1]) == 4\n    assert candidate([-1, 1, 1, 1]) == -4\n    assert candidate([-1, 1, 1, 0]) == 0\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n"}
{"task_id": "ExtendedEval/129", "prompt": "\ndef minPath(grid, k):\n    \"\"\"\n    Given a grid with N rows and N columns (N >= 2) and a positive integer k, \n    each cell of the grid contains a value. Every integer in the range [1, N * N]\n    inclusive appears exactly once on the cells of the grid.\n\n    You have to find the minimum path of length k in the grid. You can start\n    from any cell, and in each step you can move to any of the neighbor cells,\n    in other words, you can go to cells which share an edge with you current\n    cell.\n    Please note that a path of length k means visiting exactly k cells (not\n    necessarily distinct).\n    You CANNOT go off the grid.\n    A path A (of length k) is considered less than a path B (of length k) if\n    after making the ordered lists of the values on the cells that A and B go\n    through (let's call them lst_A and lst_B), lst_A is lexicographically less\n    than lst_B, in other words, there exist an integer index i (1 <= i <= k)\n    such that lst_A[i] < lst_B[i] and for any j (1 <= j < i) we have\n    lst_A[j] = lst_B[j].\n    It is guaranteed that the answer is unique.\n    Return an ordered list of the values on the cells that the minimum path go through.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    Examples:\n\n        Input: grid = [ [1,2,3], [4,5,6], [7,8,9]], k = 3\n        Output: [1, 2, 1]\n\n        Input: grid = [ [5,9,3], [4,1,6], [7,8,2]], k = 1\n        Output: [1]\n    \"\"\"\n", "entry_point": "minPath", "canonical_solution": "    n = len(grid)\n    val = n * n + 1\n    for i in range(n):\n        for j in range(n):\n            if grid[i][j] == 1:\n                temp = []\n                if i != 0:\n                    temp.append(grid[i - 1][j])\n\n                if j != 0:\n                    temp.append(grid[i][j - 1])\n\n                if i != n - 1:\n                    temp.append(grid[i + 1][j])\n\n                if j != n - 1:\n                    temp.append(grid[i][j + 1])\n\n                val = min(temp)\n\n    ans = []\n    for i in range(k):\n        if i % 2 == 0:\n            ans.append(1)\n        else:\n            ans.append(val)\n    return ans\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    print\n    assert candidate([[1, 2, 3], [4, 5, 6], [7, 8, 9]], 3) == [1, 2, 1]\n    assert candidate([[5, 9, 3], [4, 1, 6], [7, 8, 2]], 1) == [1]\n    assert candidate([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]], 4) == [1, 2, 1, 2]\n    assert candidate([[6, 4, 13, 10], [5, 7, 12, 1], [3, 16, 11, 15], [8, 14, 9, 2]], 7) == [1, 10, 1, 10, 1, 10, 1]\n    assert candidate([[8, 14, 9, 2], [6, 4, 13, 15], [5, 7, 1, 12], [3, 10, 11, 16]], 5) == [1, 7, 1, 7, 1]\n    assert candidate([[11, 8, 7, 2], [5, 16, 14, 4], [9, 3, 15, 6], [12, 13, 10, 1]], 9) == [1, 6, 1, 6, 1, 6, 1, 6, 1]\n    assert candidate([[12, 13, 10, 1], [9, 3, 15, 6], [5, 16, 14, 4], [11, 8, 7, 2]], 12) == [1, 6, 1, 6, 1, 6, 1, 6, 1, 6, 1, 6]\n    assert candidate([[2, 7, 4], [3, 1, 5], [6, 8, 9]], 8) == [1, 3, 1, 3, 1, 3, 1, 3]\n    assert candidate([[6, 1, 5], [3, 8, 9], [2, 7, 4]], 8) == [1, 5, 1, 5, 1, 5, 1, 5]\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([[1, 2], [3, 4]], 10) == [1, 2, 1, 2, 1, 2, 1, 2, 1, 2]\n    assert candidate([[1, 3], [3, 2]], 10) == [1, 3, 1, 3, 1, 3, 1, 3, 1, 3]\n\n"}
{"task_id": "ExtendedEval/130", "prompt": "\ndef tri(n):\n    \"\"\"Everyone knows Fibonacci sequence, it was studied deeply by mathematicians in \n    the last couple centuries. However, what people don't know is Tribonacci sequence.\n    Tribonacci sequence is defined by the recurrence:\n    tri(1) = 3\n    tri(n) = 1 + n / 2, if n is even.\n    tri(n) = tri(n - 1) + tri(n - 2) + tri(n + 1), if n is odd.\n    For example:\n    tri(2) = 1 + (2 / 2) = 2\n    tri(4) = 3\n    tri(3) = tri(2) + tri(1) + tri(4)\n           = 2 + 3 + 3 = 8 \n    You are given a non-negative integer number n, you have to a return a list of the \n    first n + 1 numbers of the Tribonacci sequence.\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    Examples:\n    tri(3) = [1, 3, 2, 8]\n    \"\"\"\n", "entry_point": "tri", "canonical_solution": "    if n == 0:\n        return [1]\n    my_tri = [1, 3]\n    for i in range(2, n + 1):\n        if i % 2 == 0:\n            my_tri.append(i // 2 + 1)\n        else:\n            my_tri.append(my_tri[i - 1] + my_tri[i - 2] + (i + 3) // 2)\n    return my_tri\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    \n    assert candidate(3) == [1, 3, 2, 8]\n    assert candidate(4) == [1, 3, 2, 8, 3]\n    assert candidate(5) == [1, 3, 2, 8, 3, 16]\n    assert candidate(6) == [1, 3, 2, 8, 3, 16, 4]\n    assert candidate(7) == [1, 3, 2, 8, 3, 16, 4, 27]\n    assert candidate(8) == [1, 3, 2, 8, 3, 16, 4, 27, 5]\n    assert candidate(9) == [1, 3, 2, 8, 3, 16, 4, 27, 5, 48]\n    assert candidate(20) == [1, 3, 2, 8, 3, 16, 4, 27, 5, 48, 6, 77, 7, 112, 8, 153, 9, 200, 10, 253, 11]\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(0) == [1]\n    assert candidate(1) == [1, 3]\n"}
{"task_id": "ExtendedEval/131", "prompt": "\ndef digits(n):\n    \"\"\"Given a positive integer n, return the product of the odd digits.\n    Return 0 if all digits are even.\n    Additionally, if the product exceeds 100, return the product modulo 13.\n    If the number contains the digit 5, multiply the final result by 2.\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    For example:\n    digits(1)  == 1\n    digits(4)  == 0\n    digits(235) == 30 (3*5=15, contains 5, so 15*2=30)\n    digits(13579) == 8 (1*3*5*7*9=945, contains 5, so 945*2=1890, >100, so 1890%13=8)\n    \"\"\"\n", "entry_point": "digits", "canonical_solution": "    product = 1\n    odd_count = 0\n    contains_five = False\n    \n    for digit in str(n):\n        int_digit = int(digit)\n        if int_digit == 5:\n            contains_five = True\n        if int_digit % 2 == 1:\n            product = product * int_digit\n            odd_count += 1\n    \n    if odd_count == 0:\n        return 0\n    \n    if contains_five:\n        product = product * 2\n    \n    if product > 100:\n        return product % 13\n    \n    return product\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(5) == 10\n    assert candidate(54) == 10\n    assert candidate(120) == 1\n    assert candidate(5014) == 10\n    assert candidate(98765) == 8\n    assert candidate(5576543) == 0\n    assert candidate(135) == 18\n    assert candidate(357) == 2\n    assert candidate(2468) == 0\n    assert candidate(1111) == 1\n    assert candidate(9999) == 6\n    assert candidate(13579) == 8\n\n"}
{"task_id": "ExtendedEval/132", "prompt": "\ndef is_nested(string):\n    '''\n    Create a function that takes a string as input which contains only square brackets.\n    The function should return True if and only if there is a valid subsequence of brackets \n    where at least one bracket in the subsequence is nested.\n    Additionally, the string is considered valid only if:\n    - Every opening bracket has a corresponding closing bracket\n    - No closing bracket appears before its corresponding opening bracket\n    - The nesting depth is at least 2 but not more than 10\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    is_nested('[[]]') ➞ True\n    is_nested('[]]]]]]][[[[[]') ➞ False\n    is_nested('[][]') ➞ False\n    is_nested('[]') ➞ False\n    is_nested('[[][]]') ➞ True\n    is_nested('[[]][[') ➞ False\n    is_nested('[' * 11 + ']' * 11) ➞ False (depth > 10)\n    '''\n", "entry_point": "is_nested", "canonical_solution": "    if not string:\n        return False\n    \n    # Check if brackets are balanced\n    balance = 0\n    for char in string:\n        if char == '[':\n            balance += 1\n        else:\n            balance -= 1\n        if balance < 0:\n            return False\n    \n    if balance != 0:\n        return False\n    \n    # Find maximum depth\n    max_depth = 0\n    current_depth = 0\n    \n    for char in string:\n        if char == '[':\n            current_depth += 1\n            max_depth = max(max_depth, current_depth)\n        else:\n            current_depth -= 1\n    \n    # Check if depth is within valid range\n    return 2 <= max_depth <= 10\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate('[[]]') == True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate('[]]]]]]][[[[[]') == False\n    assert candidate('[][]') == False\n    assert candidate(('[]')) == False\n    assert candidate('[[[[]]]]') == True\n    assert candidate('[]]]]]]]]]]') == False\n    assert candidate('[][][[]]') == True\n    assert candidate('[[]') == False\n    assert candidate('[]]') == False\n    assert candidate('[[]][[') == False\n    assert candidate('[[][]]') == True\n    assert candidate('[[[]]]') == True\n    assert candidate('[' * 11 + ']' * 11) == False\n    assert candidate('[' * 10 + ']' * 10) == True\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate('') == False, \"This prints if this assert fails 2 (also good for debugging!)\"\n    assert candidate('[[[[[[[[') == False\n    assert candidate(']]]]]]]]') == False\n\n"}
{"task_id": "ExtendedEval/133", "prompt": "\ndef sum_squares(lst):\n    \"\"\"You are given a list of numbers.\n    You need to return the sum of squared numbers in the given list,\n    round each element in the list to the upper int(Ceiling) first.\n    Additionally, if the squared value is divisible by 3, double it.\n    If the original number was negative, subtract 1 from the squared value.\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    Examples:\n    For lst = [1,2,3] the output should be 23 (1^2=1, 2^2=4, 3^2=9 divisible by 3 so 9*2=18, sum=1+4+18=23)\n    For lst = [1,4,9] the output should be 98\n    For lst = [1,3,5,7] the output should be 84\n    For lst = [1.4,4.2,0] the output should be 29\n    For lst = [-2.4,1,1] the output should be 6\n    \"\"\"\n", "entry_point": "sum_squares", "canonical_solution": "    import math\n    squared = 0\n    for i in lst:\n        ceiled = math.ceil(i)\n        square = ceiled ** 2\n        \n        # If original number was negative, subtract 1\n        if i < 0:\n            square -= 1\n        \n        # If squared value is divisible by 3, double it\n        if square % 3 == 0 and square != 0:\n            square *= 2\n        \n        squared += square\n    return squared\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([1,2,3])==23, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([1.0,2,3])==23, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([1,3,5,7])==84, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([1.4,4.2,0])==29, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([-2.4,1,1])==6, \"This prints if this assert fails 1 (good for debugging!)\"\n\n    assert candidate([100,1,15,2])==10230, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([10000,10000])==200000000, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([-1.4,4.6,6.3])==75, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([-1.4,17.9,18.9,19.9])==1085, \"This prints if this assert fails 1 (good for debugging!)\"\n\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([0])==0, \"This prints if this assert fails 2 (also good for debugging!)\"\n    assert candidate([-1])==0, \"This prints if this assert fails 2 (also good for debugging!)\"\n    assert candidate([-1,1,0])==2, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n"}
{"task_id": "ExtendedEval/134", "prompt": "\ndef check_if_last_char_is_a_letter(txt):\n    '''\n    Create a function that returns True if the last character\n    of a given string is an alphabetical character and is not\n    a part of a word, and False otherwise.\n    Note: \"word\" is a group of characters separated by space.\n    Additionally, the function should return True only if:\n    - The last word has exactly one character\n    - That character is a vowel (a, e, i, o, u) in any case\n    - The text contains at least 2 words\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    Examples:\n    check_if_last_char_is_a_letter(\"apple pie\") ➞ False\n    check_if_last_char_is_a_letter(\"apple pi e\") ➞ True\n    check_if_last_char_is_a_letter(\"apple pi e \") ➞ False\n    check_if_last_char_is_a_letter(\"\") ➞ False\n    check_if_last_char_is_a_letter(\"hello a\") ➞ True\n    check_if_last_char_is_a_letter(\"hello b\") ➞ False\n    check_if_last_char_is_a_letter(\"a\") ➞ False (only 1 word)\n    '''\n", "entry_point": "check_if_last_char_is_a_letter", "canonical_solution": "    if not txt or txt.endswith(' '):\n        return False\n    \n    words = txt.split(' ')\n    \n    # Check if there are at least 2 words\n    if len(words) < 2:\n        return False\n    \n    last_word = words[-1]\n    \n    # Check if last word has exactly one character and is a vowel\n    if len(last_word) == 1:\n        return last_word.lower() in 'aeiou'\n    \n    return False\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"apple\") == False\n    assert candidate(\"apple pi e\") == True\n    assert candidate(\"eeeee\") == False\n    assert candidate(\"A\") == False\n    assert candidate(\"Pumpkin pie \") == False\n    assert candidate(\"Pumpkin pie 1\") == False\n    assert candidate(\"\") == False\n    assert candidate(\"eeeee e \") == False\n    assert candidate(\"apple pie\") == False\n    assert candidate(\"apple pi e \") == False\n    assert candidate(\"hello a\") == True\n    assert candidate(\"hello b\") == False\n    assert candidate(\"hello world U\") == True\n    assert candidate(\"test I\") == True\n    assert candidate(\"test O\") == True\n    assert candidate(\"one two three E\") == True\n    assert candidate(\"one two three X\") == False\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True\n\n"}
{"task_id": "ExtendedEval/135", "prompt": "\ndef can_arrange(arr):\n    \"\"\"Create a function which returns the largest index of an element which\n    is not greater than or equal to the element immediately preceding it. If\n    no such element exists then return -1. The given array will not contain\n    duplicate values.\n    Additionally, the function should only consider elements at even indices\n    (0, 2, 4, ...) when checking the condition.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    Examples:\n    can_arrange([1,2,4,3,5]) = -1 (only check indices 0,2,4: 1,4,5 are in order)\n    can_arrange([1,2,3,5,4,6]) = 4 (indices 0,2,4: values 1,3,4; 4>3 so no violation)\n    can_arrange([1,2,3]) = -1\n    \"\"\"\n", "entry_point": "can_arrange", "canonical_solution": "    ind = -1\n    i = 2\n    while i < len(arr):\n        # Only check even indices\n        if i % 2 == 0:\n            # Find the previous even index\n            prev_even = i - 2\n            if prev_even >= 0 and arr[i] < arr[prev_even]:\n                ind = i\n        i += 2  # Skip to next even index\n    return ind\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([1,2,4,3,5]) == -1\n    assert candidate([1,2,3,5,4,6]) == -1\n    assert candidate([1,2,4,5]) == -1\n    assert candidate([1,4,2,5,6,7,8,9,10]) == -1\n    assert candidate([4,8,5,7,3]) == 4\n    assert candidate([1,2,3]) == -1\n    assert candidate([5,2,4,3,1]) == 2\n    assert candidate([1,3,2,4,5,7,6,8,9]) == -1\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([]) == -1\n    assert candidate([1]) == -1\n    assert candidate([1,2]) == -1\n\n"}
{"task_id": "ExtendedEval/137", "prompt": "\ndef compare_one(a, b):\n    \"\"\"\n    Create a function that takes integers, floats, or strings representing\n    real numbers, and returns the larger variable in its given variable type.\n    Return None if the values are equal.\n    Note: If a real number is represented as a string, the floating point might be . or ,\n    Additionally, the function should handle scientific notation (e.g., \"1.5e2\", \"3E-1\")\n    and return the result rounded to 2 decimal places if it's a float.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    compare_one(1, 2.5) ➞ 2.5\n    compare_one(1, \"2,3\") ➞ \"2,3\"\n    compare_one(\"5,1\", \"6\") ➞ \"6\"\n    compare_one(\"1\", 1) ➞ None\n    compare_one(\"1.5e2\", 100) ➞ \"1.5e2\"\n    compare_one(2.999, 3) ➞ 3\n    \"\"\"\n", "entry_point": "compare_one", "canonical_solution": "    import re\n    \n    def parse_number(x):\n        if isinstance(x, str):\n            # Replace comma with dot\n            x_parsed = x.replace(',', '.')\n            # Check if it's valid scientific notation or regular number\n            if re.match(r'^-?\\d*\\.?\\d+([eE][+-]?\\d+)?, x_parsed):\n                return float(x_parsed)\n            else:\n                return None\n        return float(x)\n    \n    val_a = parse_number(a)\n    val_b = parse_number(b)\n    \n    if val_a is None or val_b is None:\n        return None\n    \n    if val_a == val_b:\n        return None\n    \n    if val_a > val_b:\n        result = a\n    else:\n        result = b\n    \n    # Round float results to 2 decimal places\n    if isinstance(result, float):\n        result = round(result, 2)\n    \n    return result\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(1, 2) == 2\n    assert candidate(1, 2.5) == 2.5\n    assert candidate(2, 3) == 3\n    assert candidate(5, 6) == 6\n    assert candidate(1, \"2,3\") == \"2,3\"\n    assert candidate(\"5,1\", \"6\") == \"6\"\n    assert candidate(\"1\", \"2\") == \"2\"\n    assert candidate(\"1\", 1) == None\n    assert candidate(\"1.5e2\", 100) == \"1.5e2\"\n    assert candidate(\"3E-1\", 0.3) == None\n    assert candidate(2.999, 3) == 3\n    assert candidate(2.998999, 2.999) == 3.0\n    assert candidate(\"invalid\", 5) == None\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True\n\n"}
{"task_id": "ExtendedEval/138", "prompt": "\ndef is_equal_to_sum_even(n):\n    \"\"\"Evaluate whether the given number n can be written as the sum of exactly 4 positive even numbers\n    where no number is used more than once and all numbers are different.\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    Example\n    is_equal_to_sum_even(4) == False\n    is_equal_to_sum_even(6) == False\n    is_equal_to_sum_even(8) == False\n    is_equal_to_sum_even(20) == True (2+4+6+8)\n    is_equal_to_sum_even(22) == True (2+4+6+10)\n    \"\"\"\n", "entry_point": "is_equal_to_sum_even", "canonical_solution": "    # Minimum sum with 4 different positive even numbers is 2+4+6+8=20\n    if n < 20:\n        return False\n    \n    # n must be even to be sum of even numbers\n    if n % 2 != 0:\n        return False\n    \n    # If n >= 20 and n is even, we can always find 4 different positive even numbers\n    # For example: 2, 4, 6, (n-12)\n    # We need to check if (n-12) is positive and even, and different from 2,4,6\n    remainder = n - 12  # 12 = 2+4+6\n    \n    # remainder must be positive even number and > 6\n    return remainder > 6 and remainder % 2 == 0\n", "test": "def check(candidate):\n    assert candidate(4) == False\n    assert candidate(6) == False\n    assert candidate(8) == False\n    assert candidate(10) == False\n    assert candidate(11) == False\n    assert candidate(12) == False\n    assert candidate(13) == False\n    assert candidate(16) == False\n    assert candidate(20) == True\n    assert candidate(22) == True\n    assert candidate(24) == True\n    assert candidate(30) == True\n    assert candidate(100) == True\n"}
{"task_id": "ExtendedEval/136", "prompt": "\ndef largest_smallest_integers(lst):\n    '''\n    Create a function that returns a tuple (a, b), where 'a' is\n    the largest of negative integers, and 'b' is the smallest\n    of positive integers in a list.\n    If there is no negative or positive integers, return them as None.\n    Additionally, 'a' should be divisible by 3, and 'b' should be\n    a prime number. If no such values exist, return None for that position.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    Examples:\n    largest_smallest_integers([2, 4, 1, 3, 5, 7]) == (None, 2)\n    largest_smallest_integers([]) == (None, None)\n    largest_smallest_integers([0]) == (None, None)\n    largest_smallest_integers([-3, -6, -9, 1, 2, 3]) == (-3, 2)\n    '''\n", "entry_point": "largest_smallest_integers", "canonical_solution": "    def is_prime(n):\n        if n < 2:\n            return False\n        for i in range(2, int(n**0.5) + 1):\n            if n % i == 0:\n                return False\n        return True\n    \n    negatives = [x for x in lst if x < 0 and x % 3 == 0]\n    positives = [x for x in lst if x > 0 and is_prime(x)]\n    \n    return (max(negatives) if negatives else None, min(positives) if positives else None)\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([2, 4, 1, 3, 5, 7]) == (None, 2)\n    assert candidate([2, 4, 1, 3, 5, 7, 0]) == (None, 2)\n    assert candidate([1, 3, 2, 4, 5, 6, -2]) == (None, 2)\n    assert candidate([4, 5, 3, 6, 2, 7, -7]) == (None, 2)\n    assert candidate([7, 3, 8, 4, 9, 2, 5, -9]) == (-9, 2)\n    assert candidate([]) == (None, None)\n    assert candidate([0]) == (None, None)\n    assert candidate([-1, -3, -5, -6]) == (-3, None)\n    assert candidate([-1, -3, -5, -6, 0]) == (-3, None)\n    assert candidate([-6, -4, -4, -3, 1]) == (-3, None)\n    assert candidate([-6, -4, -4, -3, -100, 1]) == (-3, None)\n    assert candidate([-3, -6, -9, 1, 2, 3]) == (-3, 2)\n    assert candidate([-12, -15, -18, 2, 3, 5, 7, 11]) == (-12, 2)\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True\n"}
{"task_id": "ExtendedEval/139", "prompt": "\ndef special_factorial(n):\n    \"\"\"The Brazilian factorial is defined as:\n    brazilian_factorial(n) = n! * (n-1)! * (n-2)! * ... * 1!\n    where n > 0\n\n    Additionally, if n is even, divide the final result by 2^(n/2).\n    If n is odd, multiply the final result by 3.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    For example:\n    >>> special_factorial(4)\n    72 (288 / 2^2 = 288 / 4 = 72)\n    >>> special_factorial(5) \n    103680 (34560 * 3 = 103680)\n\n    The function will receive an integer as input and should return the special\n    factorial of this integer.\n    \"\"\"\n", "entry_point": "special_factorial", "canonical_solution": "    fact_i = 1\n    special_fact = 1\n    for i in range(1, n+1):\n        fact_i *= i\n        special_fact *= fact_i\n    \n    if n % 2 == 0:\n        special_fact = special_fact // (2 ** (n // 2))\n    else:\n        special_fact = special_fact * 3\n    \n    return special_fact\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(4) == 72, \"Test 4\"\n    assert candidate(5) == 103680, \"Test 5\"\n    assert candidate(7) == 376233984000, \"Test 7\"\n    assert candidate(2) == 1, \"Test 2\"\n    assert candidate(3) == 36, \"Test 3\"\n    assert candidate(6) == 192000, \"Test 6\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(1) == 3, \"Test 1\"\n\n"}
{"task_id": "ExtendedEval/140", "prompt": "\ndef fix_spaces(text):\n    \"\"\"\n    Given a string text, replace all spaces in it with underscores, \n    and if a string has more than 2 consecutive spaces, \n    then replace all consecutive spaces with -\n    Additionally, if a group of spaces is at the beginning or end of the string,\n    replace it with = instead of - or _\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    fix_spaces(\"Example\") == \"Example\"\n    fix_spaces(\"Example 1\") == \"Example_1\"\n    fix_spaces(\" Example 2\") == \"=Example_2\"\n    fix_spaces(\" Example   3\") == \"=Example-3\"\n    fix_spaces(\"Example 4   \") == \"Example_4=\"\n    fix_spaces(\"   Example   5   \") == \"=Example-5=\"\n    \"\"\"\n", "entry_point": "fix_spaces", "canonical_solution": "    if not text:\n        return text\n    \n    new_text = \"\"\n    i = 0\n    \n    while i < len(text):\n        if text[i] == \" \":\n            # Count consecutive spaces\n            start = i\n            while i < len(text) and text[i] == \" \":\n                i += 1\n            \n            space_count = i - start\n            \n            # Check if spaces are at beginning or end\n            is_beginning = start == 0\n            is_end = i == len(text)\n            \n            if is_beginning or is_end:\n                new_text += \"=\"\n            elif space_count > 2:\n                new_text += \"-\"\n            else:\n                new_text += \"_\" * space_count\n        else:\n            new_text += text[i]\n            i += 1\n    \n    return new_text\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"Example\") == \"Example\", \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(\"Mudasir Hanif \") == \"Mudasir_Hanif=\", \"This prints if this assert fails 2 (good for debugging!)\"\n    assert candidate(\"Yellow Yellow  Dirty  Fellow\") == \"Yellow_Yellow__Dirty__Fellow\", \"This prints if this assert fails 3 (good for debugging!)\"\n    assert candidate(\" Example 2\") == \"=Example_2\", \"This prints if this assert fails 4 (good for debugging!)\"\n    assert candidate(\" Example   3\") == \"=Example-3\", \"This prints if this assert fails 5 (good for debugging!)\"\n    assert candidate(\"Example 4   \") == \"Example_4=\", \"This prints if this assert fails 6 (good for debugging!)\"\n    assert candidate(\"   Example   5   \") == \"=Example-5=\", \"This prints if this assert fails 7 (good for debugging!)\"\n    \n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(\"Exa   mple\") == \"Exa-mple\", \"This prints if this assert fails 8 (good for debugging!)\"\n    assert candidate(\"   Exa 1 2 2 mple\") == \"=Exa_1_2_2_mple\", \"This prints if this assert fails 9 (good for debugging!)\"\n\n"}
{"task_id": "ExtendedEval/141", "prompt": "\ndef file_name_check(file_name):\n    \"\"\"Create a function which takes a string representing a file's name, and returns\n    'Yes' if the the file's name is valid, and returns 'No' otherwise.\n    A file's name is considered to be valid if and only if all the following conditions \n    are met:\n    - There should not be more than three digits ('0'-'9') in the file's name.\n    - The file's name contains exactly one dot '.'\n    - The substring before the dot should not be empty, and it starts with a letter from \n    the latin alphapet ('a'-'z' and 'A'-'Z').\n    - The substring after the dot should be one of these: ['txt', 'exe', 'dll']\n    - The total length of the filename (including extension) should not exceed 15 characters.\n    - The filename should not contain any consecutive uppercase letters.\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    Examples:\n    file_name_check(\"example.txt\") # => 'Yes'\n    file_name_check(\"1example.dll\") # => 'No' (the name should start with a latin alphapet letter)\n    file_name_check(\"VeryLongFileName.txt\") # => 'No' (exceeds 15 characters)\n    file_name_check(\"FIle.exe\") # => 'No' (consecutive uppercase letters)\n    \"\"\"\n", "entry_point": "file_name_check", "canonical_solution": "    suf = ['txt', 'exe', 'dll']\n    \n    # Check total length\n    if len(file_name) > 15:\n        return 'No'\n    \n    lst = file_name.split(sep='.')\n    if len(lst) != 2:\n        return 'No'\n    \n    if not lst[1] in suf:\n        return 'No'\n    \n    if len(lst[0]) == 0:\n        return 'No'\n    \n    if not lst[0][0].isalpha():\n        return 'No'\n    \n    # Check for consecutive uppercase letters\n    for i in range(len(file_name) - 1):\n        if file_name[i].isupper() and file_name[i+1].isupper():\n            return 'No'\n    \n    # Count digits\n    t = len([x for x in lst[0] if x.isdigit()])\n    if t > 3:\n        return 'No'\n    \n    return 'Yes'\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"example.txt\") == 'Yes'\n    assert candidate(\"1example.dll\") == 'No'\n    assert candidate('s1sdf3.asd') == 'No'\n    assert candidate('K.dll') == 'Yes'\n    assert candidate('MY16FILE3.exe') == 'No'\n    assert candidate('His12FILE94.exe') == 'No'\n    assert candidate('_Y.txt') == 'No'\n    assert candidate('?aREYA.exe') == 'No'\n    assert candidate('/this_is_valid.dll') == 'No'\n    assert candidate('this_is_valid.wow') == 'No'\n    assert candidate('this_is_valid.txt') == 'No'\n    assert candidate('this_is_valid.txtexe') == 'No'\n    assert candidate('#this2_i4s_5valid.ten') == 'No'\n    assert candidate('@this1_is6_valid.exe') == 'No'\n    assert candidate('this_is_12valid.6exe4.txt') == 'No'\n    assert candidate('all.exe.txt') == 'No'\n    assert candidate('I563_No.exe') == 'No'\n    assert candidate('Is3youfault.txt') == 'No'\n    assert candidate('no_one#knows.dll') == 'No'\n    assert candidate('1I563_Yes3.exe') == 'No'\n    assert candidate('I563_Yes3.txtt') == 'No'\n    assert candidate('final..txt') == 'No'\n    assert candidate('final132') == 'No'\n    assert candidate('_f4indsartal132.') == 'No'\n    assert candidate('VeryLongFileName.txt') == 'No'\n    assert candidate('FIle.exe') == 'No'\n    assert candidate('File.exe') == 'Yes'\n    assert candidate('short.txt') == 'Yes'\n    \n        \n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate('.txt') == 'No'\n    assert candidate('s.') == 'No'\n\n"}
{"task_id": "ExtendedEval/142", "prompt": "\ndef sum_squares_modified(lst):\n    \"\"\"\"\n    This function will take a list of integers. For all entries in the list, the function shall square the integer entry if its index is a \n    multiple of 3 and will cube the integer entry if its index is a multiple of 4 and not a multiple of 3. The function will not \n    change the entries in the list whose indexes are not a multiple of 3 or 4. The function shall then return the sum of all entries. \n    Additionally, if the index is a multiple of both 3 and 4 (i.e., multiple of 12), the entry should be raised to the power of 4.\n    If the list has a prime number of elements, multiply the final sum by 2.\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    Examples:\n    For lst = [1,2,3] the output should be 12 (1^2 + 2 + 3 = 6, length 3 is prime, so 6*2 = 12)\n    For lst = []  the output should be 0\n    For lst = [-1,-5,2,-1,-5]  the output should be -252\n    \"\"\"\n", "entry_point": "sum_squares_modified", "canonical_solution": "    def is_prime(n):\n        if n < 2:\n            return False\n        for i in range(2, int(n**0.5) + 1):\n            if n % i == 0:\n                return False\n        return True\n    \n    result = []\n    for i in range(len(lst)):\n        if i % 12 == 0 and i != 0:  # Multiple of both 3 and 4\n            result.append(lst[i]**4)\n        elif i % 3 == 0:\n            result.append(lst[i]**2)\n        elif i % 4 == 0:\n            result.append(lst[i]**3)\n        else:\n            result.append(lst[i])\n    \n    total = sum(result)\n    \n    # Check if length is prime\n    if is_prime(len(lst)):\n        total *= 2\n    \n    return total\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    \n    assert candidate([1,2,3]) == 12\n    assert candidate([1,4,9]) == 14\n    assert candidate([]) == 0\n    assert candidate([1,1,1,1,1,1,1,1,1]) == 9\n    assert candidate([-1,-1,-1,-1,-1,-1,-1,-1,-1]) == -3\n    assert candidate([0]) == 0\n    assert candidate([-1,-5,2,-1,-5]) == -252\n    assert candidate([-56,-99,1,0,-2]) == 6342\n    assert candidate([-1,0,0,0,0,0,0,0,-1]) == 0\n    assert candidate([-16, -9, -2, 36, 36, 26, -20, 25, -40, 20, -4, 12, -26, 35, 37]) == -14196\n    assert candidate([-1, -3, 17, -1, -15, 13, -1, 14, -14, -12, -5, 14, -14, 6, 13, 11, 16, 16, 4, 10]) == -1448\n    \n    \n    # Don't remove this line:\n"}
{"task_id": "ExtendedEval/143", "prompt": "\ndef words_in_sentence(sentence):\n    \"\"\"\n    You are given a string representing a sentence,\n    the sentence contains some words separated by a space,\n    and you have to return a string that contains the words from the original sentence,\n    whose lengths are prime numbers,\n    the order of the words in the new string should be the same as the original one.\n    Additionally, the word should not contain any repeated characters.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    Example 1:\n        Input: sentence = \"This is a test\"\n        Output: \"is a\" (This has repeated characters, test has repeated characters)\n\n    Example 2:\n        Input: sentence = \"lets go for swimming\"\n        Output: \"go for\"\n\n    Constraints:\n        * 1 <= len(sentence) <= 100\n        * sentence contains only letters\n    \"\"\"\n", "entry_point": "words_in_sentence", "canonical_solution": "    def is_prime(n):\n        if n < 2:\n            return False\n        for i in range(2, int(n**0.5) + 1):\n            if n % i == 0:\n                return False\n        return True\n    \n    def has_no_repeated_chars(word):\n        return len(word) == len(set(word))\n    \n    new_lst = []\n    for word in sentence.split():\n        if is_prime(len(word)) and has_no_repeated_chars(word):\n            new_lst.append(word)\n    \n    return \" \".join(new_lst)\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"This is a test\") == \"is a\"\n    assert candidate(\"lets go for swimming\") == \"go for\"\n    assert candidate(\"there is no place available here\") == \"is no\"\n    assert candidate(\"Hi I am Hussein\") == \"Hi am\"\n    assert candidate(\"go for it\") == \"go for it\"\n    assert candidate(\"abc def ghi\") == \"abc def ghi\"\n    assert candidate(\"hello world\") == \"world\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(\"here\") == \"\"\n    assert candidate(\"here is\") == \"is\"\n\n"}
{"task_id": "ExtendedEval/144", "prompt": "\ndef simplify(x, n):\n    \"\"\"Your task is to implement a function that will simplify the expression\n    x * n. The function returns True if x * n evaluates to a whole number and False\n    otherwise. Both x and n, are string representation of a fraction, and have the following format,\n    <numerator>/<denominator> where both numerator and denominator are positive whole numbers.\n    \n    Additionally, the function should return True only if the resulting whole number\n    is a perfect square (1, 4, 9, 16, 25, ...).\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    You can assume that x, and n are valid fractions, and do not have zero as denominator.\n\n    simplify(\"1/5\", \"5/1\") = True (result is 1, which is a perfect square)\n    simplify(\"1/6\", \"2/1\") = False (result is 1/3, not a whole number)\n    simplify(\"7/10\", \"10/2\") = False (result is 3.5, not a whole number)\n    simplify(\"2/1\", \"2/1\") = True (result is 4, which is a perfect square)\n    simplify(\"3/1\", \"3/1\") = True (result is 9, which is a perfect square)\n    \"\"\"\n", "entry_point": "simplify", "canonical_solution": "    import math\n    \n    a, b = x.split(\"/\")\n    c, d = n.split(\"/\")\n    numerator = int(a) * int(c)\n    denom = int(b) * int(d)\n    \n    if numerator % denom != 0:\n        return False\n    \n    result = numerator // denom\n    \n    # Check if result is a perfect square\n    sqrt_result = int(math.sqrt(result))\n    return sqrt_result * sqrt_result == result\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"1/5\", \"5/1\") == True, 'test1'\n    assert candidate(\"1/6\", \"2/1\") == False, 'test2'\n    assert candidate(\"5/1\", \"3/1\") == False, 'test3'\n    assert candidate(\"7/10\", \"10/2\") == False, 'test4'\n    assert candidate(\"2/10\", \"50/10\") == True, 'test5'\n    assert candidate(\"7/2\", \"4/2\") == False, 'test6'\n    assert candidate(\"11/6\", \"6/1\") == False, 'test7'\n    assert candidate(\"2/3\", \"5/2\") == False, 'test8'\n    assert candidate(\"5/2\", \"3/5\") == False, 'test9'\n    assert candidate(\"2/4\", \"8/4\") == True, 'test10'\n    assert candidate(\"2/1\", \"2/1\") == True, 'test11'\n    assert candidate(\"3/1\", \"3/1\") == True, 'test12'\n    assert candidate(\"4/1\", \"4/1\") == True, 'test13'\n    assert candidate(\"10/2\", \"20/10\") == False, 'test14'\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(\"2/4\", \"4/2\") == True, 'test15'\n    assert candidate(\"1/5\", \"5/1\") == True, 'test16'\n    assert candidate(\"1/5\", \"1/5\") == False, 'test17'\n\n"}
{"task_id": "ExtendedEval/145", "prompt": "\ndef order_by_points(nums):\n    \"\"\"\n    Write a function which sorts the given list of integers\n    in ascending order according to the sum of their digits.\n    Note: if there are several items with similar sum of their digits,\n    order them based on their index in original list.\n    Additionally, if a number is negative and its absolute value has\n    an even sum of digits, negate the sum for sorting purposes.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    For example:\n    >>> order_by_points([1, 11, -1, -11, -12]) == [-1, -11, 1, -12, 11]\n    >>> order_by_points([]) == []\n    >>> order_by_points([14, -23, 45]) == [-23, 45, 14] \n    (-23: abs sum is 5, stays 5; 45: sum is 9; 14: sum is 5 but comes after -23)\n    \"\"\"\n", "entry_point": "order_by_points", "canonical_solution": "    def digits_sum(n):\n        neg = 1\n        if n < 0: \n            n, neg = -1 * n, -1 \n        \n        # Calculate sum of digits of absolute value\n        digit_list = [int(i) for i in str(n)]\n        abs_sum = sum(digit_list)\n        \n        # If original was negative, apply the first digit negative rule\n        if neg == -1:\n            digit_list[0] = digit_list[0] * neg\n            base_sum = sum(digit_list)\n            \n            # If absolute sum is even, negate the sum\n            if abs_sum % 2 == 0:\n                return -abs_sum\n            else:\n                return base_sum\n        else:\n            return abs_sum\n    \n    return sorted(nums, key=digits_sum)\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([1, 11, -1, -11, -12]) == [-1, -11, 1, -12, 11]\n    assert candidate([1234,423,463,145,2,423,423,53,6,37,3457,3,56,0,46]) == [0, 2, 3, 6, 53, 423, 423, 423, 1234, 145, 37, 46, 56, 463, 3457]\n    assert candidate([]) == []\n    assert candidate([1, -11, -32, 43, 54, -98, 2, -3]) == [-3, -32, -98, -11, 1, 2, 43, 54]\n    assert candidate([1,2,3,4,5,6,7,8,9,10,11]) == [1, 10, 2, 11, 3, 4, 5, 6, 7, 8, 9]\n    assert candidate([0,6,6,-76,-21,23,4]) == [-76, -21, 0, 4, 23, 6, 6]\n    assert candidate([14, -23, 45]) == [-23, 45, 14]\n    assert candidate([-15, -42, -33]) == [-42, -15, -33]\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n"}
{"task_id": "ExtendedEval/146", "prompt": "\ndef specialFilter(nums):\n    \"\"\"Write a function that takes an array of numbers as input and returns \n    the number of elements in the array that are greater than 10 and both \n    first and last digits of a number are odd (1, 3, 5, 7, 9).\n    Additionally, the sum of all digits in the number must be prime.\n    Numbers with repeated consecutive digits should be excluded.\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    For example:\n    specialFilter([15, -73, 14, -15]) => 0 (15 has sum 6 which is not prime)\n    specialFilter([33, -2, -3, 45, 21, 109]) => 0 (33 has repeated digits, 45 sum=9 not prime, 21 sum=3 prime but <10, 109 sum=10 not prime)\n    specialFilter([13, 17, 135, 791]) => 2 (13 and 17 qualify)\n    \"\"\"\n", "entry_point": "specialFilter", "canonical_solution": "    def is_prime(n):\n        if n < 2:\n            return False\n        for i in range(2, int(n**0.5) + 1):\n            if n % i == 0:\n                return False\n        return True\n    \n    def has_repeated_consecutive_digits(num):\n        s = str(abs(num))\n        for i in range(len(s) - 1):\n            if s[i] == s[i + 1]:\n                return True\n        return False\n    \n    count = 0\n    for num in nums:\n        if num > 10:\n            odd_digits = (1, 3, 5, 7, 9)\n            number_as_string = str(num)\n            \n            # Check first and last digits\n            if int(number_as_string[0]) in odd_digits and int(number_as_string[-1]) in odd_digits:\n                # Check for repeated consecutive digits\n                if not has_repeated_consecutive_digits(num):\n                    # Check if sum of digits is prime\n                    digit_sum = sum(int(d) for d in number_as_string)\n                    if is_prime(digit_sum):\n                        count += 1\n    \n    return count\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([5, -2, 1, -5]) == 0  \n    assert candidate([15, -73, 14, -15]) == 0\n    assert candidate([33, -2, -3, 45, 21, 109]) == 0\n    assert candidate([43, -12, 93, 125, 121, 109]) == 0\n    assert candidate([71, -2, -33, 75, 21, 19]) == 1\n    assert candidate([13, 17, 135, 791]) == 2\n    assert candidate([31, 37, 73, 79, 97]) == 5\n    assert candidate([111, 113, 117, 119]) == 0\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([1]) == 0              \n    assert candidate([]) == 0                   \n\n"}
{"task_id": "ExtendedEval/147", "prompt": "\ndef get_max_triples(n):\n    \"\"\"\n    You are given a positive integer n. You have to create an integer array a of length n.\n        For each i (1 ≤ i ≤ n), the value of a[i] = i * i - i + 1.\n        Return the number of triples (a[i], a[j], a[k]) of a where i < j < k, \n    and a[i] + a[j] + a[k] is a multiple of 3.\n    Additionally, at least one of the three values must be a prime number.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    Example :\n        Input: n = 5\n        Output: 1\n        Explanation: \n        a = [1, 3, 7, 13, 21]\n        Valid triples that sum to multiple of 3 and contain at least one prime:\n        (1, 7, 13) - sum=21, contains primes 7 and 13\n    \"\"\"\n", "entry_point": "get_max_triples", "canonical_solution": "    def is_prime(num):\n        if num < 2:\n            return False\n        for i in range(2, int(num**0.5) + 1):\n            if num % i == 0:\n                return False\n        return True\n    \n    A = [i*i - i + 1 for i in range(1, n+1)]\n    ans = []\n    \n    for i in range(n):\n        for j in range(i+1, n):\n            for k in range(j+1, n):\n                if (A[i] + A[j] + A[k]) % 3 == 0:\n                    # Check if at least one is prime\n                    if is_prime(A[i]) or is_prime(A[j]) or is_prime(A[k]):\n                        ans.append((A[i], A[j], A[k]))\n    \n    return len(ans)\n", "test": "def check(candidate):\n\n    assert candidate(5) == 1\n    assert candidate(6) == 2\n    assert candidate(10) == 25\n    assert candidate(15) == 106\n    assert candidate(20) == 318\n"}
{"task_id": "ExtendedEval/148", "prompt": "\ndef bf(planet1, planet2):\n    '''\n    There are eight planets in our solar system: the closerst to the Sun \n    is Mercury, the next one is Venus, then Earth, Mars, Jupiter, Saturn, \n    Uranus, Neptune.\n    Write a function that takes two planet names as strings planet1 and planet2. \n    The function should return a tuple containing all planets whose orbits are \n    located between the orbit of planet1 and the orbit of planet2, sorted by \n    the proximity to the sun. \n    The function should return an empty tuple if planet1 or planet2\n    are not correct planet names. \n    Additionally, if the distance between the two planets is less than 2,\n    return a tuple with the string \"Too close\".\n    Planet names are case-insensitive.\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    Examples\n    bf(\"Jupiter\", \"Neptune\") ==> (\"Saturn\", \"Uranus\")\n    bf(\"Earth\", \"Mercury\") ==> (\"Venus\",)\n    bf(\"Mercury\", \"Uranus\") ==> (\"Venus\", \"Earth\", \"Mars\", \"Jupiter\", \"Saturn\")\n    bf(\"Earth\", \"Mars\") ==> (\"Too close\",)\n    bf(\"earth\", \"MARS\") ==> (\"Too close\",)\n    '''\n", "entry_point": "bf", "canonical_solution": "    planet_names = (\"Mercury\", \"Venus\", \"Earth\", \"Mars\", \"Jupiter\", \"Saturn\", \"Uranus\", \"Neptune\")\n    \n    # Convert to title case for case-insensitive comparison\n    planet1 = planet1.capitalize()\n    planet2 = planet2.capitalize()\n    \n    if planet1 not in planet_names or planet2 not in planet_names or planet1 == planet2:\n        return ()\n    \n    planet1_index = planet_names.index(planet1)\n    planet2_index = planet_names.index(planet2)\n    \n    # Check if planets are too close (distance < 2)\n    if abs(planet1_index - planet2_index) < 2:\n        return (\"Too close\",)\n    \n    if planet1_index < planet2_index:\n        return planet_names[planet1_index + 1: planet2_index]\n    else:\n        return planet_names[planet2_index + 1 : planet1_index]\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"Jupiter\", \"Neptune\") == (\"Saturn\", \"Uranus\"), \"First test error: \" + str(candidate(\"Jupiter\", \"Neptune\"))      \n    assert candidate(\"Earth\", \"Mercury\") == (\"Venus\",), \"Second test error: \" + str(candidate(\"Earth\", \"Mercury\"))  \n    assert candidate(\"Mercury\", \"Uranus\") == (\"Venus\", \"Earth\", \"Mars\", \"Jupiter\", \"Saturn\"), \"Third test error: \" + str(candidate(\"Mercury\", \"Uranus\"))      \n    assert candidate(\"Neptune\", \"Venus\") == (\"Earth\", \"Mars\", \"Jupiter\", \"Saturn\", \"Uranus\"), \"Fourth test error: \" + str(candidate(\"Neptune\", \"Venus\"))  \n    assert candidate(\"Earth\", \"Mars\") == (\"Too close\",), \"Fifth test error: \" + str(candidate(\"Earth\", \"Mars\"))\n    assert candidate(\"earth\", \"MARS\") == (\"Too close\",), \"Sixth test error: \" + str(candidate(\"earth\", \"MARS\"))\n    assert candidate(\"JUPITER\", \"saturn\") == (\"Too close\",), \"Seventh test error: \" + str(candidate(\"JUPITER\", \"saturn\"))\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(\"Earth\", \"Earth\") == ()\n    assert candidate(\"Mars\", \"Earth\") == ()\n    assert candidate(\"Jupiter\", \"Makemake\") == ()\n\n"}
{"task_id": "ExtendedEval/149", "prompt": "\ndef sorted_list_sum(lst):\n    \"\"\"Write a function that accepts a list of strings as a parameter,\n    deletes the strings that have odd lengths from it,\n    and returns the resulted list with a sorted order,\n    The list is always a list of strings and never an array of numbers,\n    and it may contain duplicates.\n    The order of the list should be ascending by length of each word, and you\n    should return the list sorted by that rule.\n    If two words have the same length, sort the list alphabetically.\n    Additionally, remove any strings that contain digits or special characters.\n    Also, convert all strings to lowercase before processing.\n    The function should return a list of strings in sorted order.\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    For example:\n    assert sorted_list_sum([\"aa\", \"a\", \"aaa\"]) => [\"aa\"]\n    assert sorted_list_sum([\"ab\", \"a\", \"aaa\", \"cd\"]) => [\"ab\", \"cd\"]\n    assert sorted_list_sum([\"AB\", \"a1\", \"aaa\", \"CD\"]) => [\"ab\", \"cd\"]\n    \"\"\"\n", "entry_point": "sorted_list_sum", "canonical_solution": "    # Convert to lowercase\n    lst = [s.lower() for s in lst]\n    \n    new_lst = []\n    for i in lst:\n        # Check if even length and contains only letters\n        if len(i) % 2 == 0 and i.isalpha():\n            new_lst.append(i)\n    \n    # Remove duplicates while preserving order\n    seen = set()\n    unique_lst = []\n    for item in new_lst:\n        if item not in seen:\n            seen.add(item)\n            unique_lst.append(item)\n    \n    # Sort by length first, then alphabetically\n    return sorted(unique_lst, key=lambda x: (len(x), x))\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([\"aa\", \"a\", \"aaa\"]) == [\"aa\"]\n    assert candidate([\"school\", \"AI\", \"asdf\", \"b\"]) == [\"ai\", \"asdf\", \"school\"]\n    assert candidate([\"d\", \"b\", \"c\", \"a\"]) == []\n    assert candidate([\"d\", \"dcba\", \"abcd\", \"a\"]) == [\"abcd\", \"dcba\"]\n    assert candidate([\"AI\", \"ai\", \"au\"]) == [\"ai\", \"au\"]\n    assert candidate([\"AB\", \"a1\", \"aaa\", \"CD\"]) == [\"ab\", \"cd\"]\n    assert candidate([\"test@\", \"he11o\", \"good\", \"by\"]) == [\"by\", \"good\"]\n    assert candidate([\"AA\", \"aa\", \"aA\", \"Aa\"]) == [\"aa\"]\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([\"a\", \"b\", \"b\", \"c\", \"c\", \"a\"]) == []\n    assert candidate(['aaaa', 'bbbb', 'dd', 'cc']) == [\"cc\", \"dd\", \"aaaa\", \"bbbb\"]\n\n"}
{"task_id": "ExtendedEval/150", "prompt": "\ndef x_or_y(n, x, y):\n    \"\"\"A simple program which should return the value of x if n is \n    a prime number and should return the value of y otherwise.\n    Additionally, if n is a perfect square, return x + y.\n    If n is both prime and a perfect square (only possible for n=1),\n    return x * y.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    Examples:\n    for x_or_y(7, 34, 12) == 34 (7 is prime)\n    for x_or_y(15, 8, 5) == 5 (15 is not prime)\n    for x_or_y(4, 10, 20) == 30 (4 is a perfect square)\n    for x_or_y(1, 5, 3) == 15 (1 is considered both, so return 5*3)\n    \"\"\"\n", "entry_point": "x_or_y", "canonical_solution": "    import math\n    \n    def is_prime(num):\n        if num < 2:\n            return False\n        for i in range(2, int(num**0.5) + 1):\n            if num % i == 0:\n                return False\n        return True\n    \n    def is_perfect_square(num):\n        if num < 0:\n            return False\n        root = int(math.sqrt(num))\n        return root * root == num\n    \n    # Special case for 1\n    if n == 1:\n        return x * y\n    \n    # Check if perfect square\n    if is_perfect_square(n):\n        return x + y\n    \n    # Check if prime\n    if is_prime(n):\n        return x\n    \n    return y\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(7, 34, 12) == 34\n    assert candidate(15, 8, 5) == 5\n    assert candidate(3, 33, 5212) == 33\n    assert candidate(1259, 3, 52) == 3\n    assert candidate(7919, -1, 12) == -1\n    assert candidate(3609, 1245, 583) == 583\n    assert candidate(91, 56, 129) == 129\n    assert candidate(6, 34, 1234) == 1234\n    assert candidate(4, 10, 20) == 30\n    assert candidate(9, 15, 25) == 40\n    assert candidate(16, 100, 200) == 300\n    assert candidate(1, 5, 3) == 15\n    \n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(2, 2, 0) == 2\n\n"}
{"task_id": "ExtendedEval/151", "prompt": "\ndef double_the_difference(lst):\n    '''\n    Given a list of numbers, return the sum of squares of the numbers\n    in the list that are odd. Ignore numbers that are negative or not integers.\n    Additionally, if the number is a perfect square itself, triple its square\n    instead of doubling.\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    double_the_difference([1, 3, 2, 0]) == 3 + 9 = 12 (1 is odd and perfect square, so 1²×3=3; 3 is odd, so 3²=9)\n    double_the_difference([-1, -2, 0]) == 0\n    double_the_difference([9, -2]) == 243 (9 is odd and perfect square, so 9²×3=243)\n    double_the_difference([0]) == 0  \n   \n    If the input list is empty, return 0.\n    '''\n", "entry_point": "double_the_difference", "canonical_solution": "    import math\n    \n    def is_perfect_square(n):\n        if n < 0:\n            return False\n        root = int(math.sqrt(n))\n        return root * root == n\n    \n    total = 0\n    for i in lst:\n        # Check if positive odd integer\n        if i > 0 and i % 2 != 0 and (isinstance(i, int) or i == int(i)):\n            square = i ** 2\n            if is_perfect_square(i):\n                total += square * 3\n            else:\n                total += square\n    \n    return total\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([]) == 0, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([5, 4]) == 25, \"This prints if this assert fails 2 (good for debugging!)\"\n    assert candidate([0.1, 0.2, 0.3]) == 0, \"This prints if this assert fails 3 (good for debugging!)\"\n    assert candidate([-10, -20, -30]) == 0, \"This prints if this assert fails 4 (good for debugging!)\"\n    assert candidate([1, 3, 2, 0]) == 12, \"This prints if this assert fails 5 (good for debugging!)\"\n    assert candidate([9, -2]) == 243, \"This prints if this assert fails 6 (good for debugging!)\"\n    assert candidate([25, 7]) == 1924, \"This prints if this assert fails 7 (good for debugging!)\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([-1, -2, 8]) == 0, \"This prints if this assert fails 8 (also good for debugging!)\"\n    assert candidate([0.2, 3, 5]) == 34, \"This prints if this assert fails 9 (also good for debugging!)\"\n    \n"}
{"task_id": "ExtendedEval/152", "prompt": "\ndef compare(game,guess):\n    \"\"\"I think we all remember that feeling when the result of some long-awaited\n    event is finally known. The feelings and thoughts you have at that moment are\n    definitely worth noting down and comparing.\n    Your task is to determine if a person correctly guessed the results of a number of matches.\n    You are given two arrays of scores and guesses of equal length, where each index shows a match. \n    Return an array of the same length denoting how far off each guess was. If they have guessed correctly,\n    the value is 0, and if not, the value is the absolute difference between the guess and the score.\n    Additionally, if the guess was within 2 points of the actual score, return -1 for that position.\n    If either score or guess is negative, return -2 for that position.\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    example:\n\n    compare([1,2,3,4,5,1],[1,2,3,4,2,-2]) -> [0,0,0,0,-1,-2]\n    compare([0,5,0,0,0,4],[4,1,1,0,0,-2]) -> [-1,-1,1,0,0,-2]\n    compare([3,7,11,15],[5,5,10,20]) -> [-1,-1,1,-1]\n    \"\"\"\n", "entry_point": "compare", "canonical_solution": "    result = []\n    for x, y in zip(game, guess):\n        if x < 0 or y < 0:\n            result.append(-2)\n        elif x == y:\n            result.append(0)\n        elif abs(x - y) <= 2:\n            result.append(-1)\n        else:\n            result.append(abs(x - y))\n    return result\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([1,2,3,4,5,1],[1,2,3,4,2,-2])==[0,0,0,0,-1,-2], \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate([0,5,0,0,0,4],[4,1,1,0,0,-2])==[-1,-1,1,0,0,-2], \"This prints if this assert fails 2 (good for debugging!)\"\n    assert candidate([3,7,11,15],[5,5,10,20])==[-1,-1,1,-1], \"This prints if this assert fails 3 (good for debugging!)\"\n    assert candidate([0,0,0,0,0,0],[0,0,0,0,0,0])==[0,0,0,0,0,0], \"This prints if this assert fails 4 (good for debugging!)\"\n    assert candidate([1,2,3],[-1,-2,-3])==[-2,-2,-2], \"This prints if this assert fails 5 (good for debugging!)\"\n    assert candidate([1,2,3,5],[1,2,3,4])==[0,0,0,1], \"This prints if this assert fails 6 (good for debugging!)\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n"}
{"task_id": "ExtendedEval/153", "prompt": "\ndef Strongest_Extension(class_name, extensions):\n    \"\"\"You will be given the name of a class (a string) and a list of extensions.\n    The extensions are to be used to load additional classes to the class. The\n    strength of the extension is as follows: Let CAP be the number of the uppercase\n    letters in the extension's name, and let SM be the number of lowercase letters \n    in the extension's name, the strength is given by the fraction CAP - SM. \n    You should find the strongest extension and return a string in this \n    format: ClassName.StrongestExtensionName.\n    If there are two or more extensions with the same strength, you should\n    choose the one that comes first in the list.\n    Additionally, if the extension name contains any digits, subtract the count\n    of digits from the strength.\n    Extensions with negative or zero strength should be ignored.\n    If no valid extensions exist, return the class name alone.\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    For example, if you are given \"Slices\" as the class and a list of the\n    extensions: ['SErviNGSliCes', 'Cheese', 'StuFfed'] then you should\n    return 'Slices.StuFfed' since 'StuFfed' has the highest positive strength.\n    Example:\n    for Strongest_Extension('my_class', ['AA', 'Be', 'CC']) == 'my_class.AA'\n    for Strongest_Extension('my_class', ['a1A', 'Be', 'CC']) == 'my_class.CC' (a1A has strength 1-1-1=-1)\n    \"\"\"\n", "entry_point": "Strongest_Extension", "canonical_solution": "    valid_extensions = []\n    \n    for ext in extensions:\n        cap_count = len([x for x in ext if x.isalpha() and x.isupper()])\n        sm_count = len([x for x in ext if x.isalpha() and x.islower()])\n        digit_count = len([x for x in ext if x.isdigit()])\n        \n        strength = cap_count - sm_count - digit_count\n        \n        if strength > 0:\n            valid_extensions.append((ext, strength))\n    \n    if not valid_extensions:\n        return class_name\n    \n    # Find the extension with maximum strength (first one if tie)\n    strongest = valid_extensions[0]\n    for ext, strength in valid_extensions[1:]:\n        if strength > strongest[1]:\n            strongest = (ext, strength)\n    \n    return class_name + \".\" + strongest[0]\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate('Watashi', ['tEN', 'niNE', 'eIGHt8OKe']) == 'Watashi.tEN'\n    assert candidate('Boku123', ['nani', 'NazeDa', 'YEs.WeCaNe', '32145tggg']) == 'Boku123.YEs.WeCaNe'\n    assert candidate('__YESIMHERE', ['t', 'eMptY', 'nothing', 'zeR00', 'NuLl__', '123NoooneB321']) == '__YESIMHERE.NuLl__'\n    assert candidate('K', ['Ta', 'TAR', 't234An', 'cosSo']) == 'K.TAR'\n    assert candidate('__HAHA', ['Tab', '123', '781345', '-_-']) == '__HAHA.Tab'\n    assert candidate('YameRore', ['HhAas', 'okIWILL123', 'WorkOut', 'Fails', '-_-']) == 'YameRore.WorkOut'\n    assert candidate('finNNalLLly', ['Die', 'NowW', 'Wow', 'WoW']) == 'finNNalLLly.WoW'\n    assert candidate('_', ['Bb', '91245']) == '_'\n    assert candidate('Sp', ['671235', 'Bb']) == 'Sp'\n    assert candidate('Test', ['ABC1', 'abc', 'A1B2C3']) == 'Test.ABC1'\n    assert candidate('MyClass', ['a1A', 'Be', 'CC']) == 'MyClass.CC'\n    \n"}
{"task_id": "ExtendedEval/154", "prompt": "\ndef cycpattern_check(a , b):\n    \"\"\"You are given 2 words. You need to return True if the second word or any of its rotations is a substring in the first word\n    Additionally, the function should be case-insensitive and ignore any non-alphabetic characters.\n    If the cleaned version of b is empty, return False.\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    cycpattern_check(\"abcd\",\"abd\") => False\n    cycpattern_check(\"hello\",\"ell\") => True\n    cycpattern_check(\"whassup\",\"psus\") => False\n    cycpattern_check(\"abab\",\"baa\") => True\n    cycpattern_check(\"efef\",\"eeff\") => False\n    cycpattern_check(\"himenss\",\"simen\") => True\n    cycpattern_check(\"Hi There!\",\"HIT\") => True (case-insensitive, \"hit\" rotation \"thi\" is in \"hithere\")\n    cycpattern_check(\"a1b2c3\",\"B!C\") => True (ignoring non-alpha, \"bc\" rotation \"cb\" is in \"abc\")\n    \"\"\"\n", "entry_point": "cycpattern_check", "canonical_solution": "    # Clean strings: keep only alphabetic characters and convert to lowercase\n    clean_a = ''.join(c.lower() for c in a if c.isalpha())\n    clean_b = ''.join(c.lower() for c in b if c.isalpha())\n    \n    if not clean_b:\n        return False\n    \n    l = len(clean_b)\n    pat = clean_b + clean_b\n    \n    for i in range(len(clean_a) - l + 1):\n        for j in range(l):\n            if clean_a[i:i+l] == pat[j:j+l]:\n                return True\n    return False\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"xyzw\",\"xyw\") == False, \"test #0\"\n    assert candidate(\"yello\",\"ell\") == True, \"test #1\"\n    assert candidate(\"whattup\",\"ptut\") == False, \"test #2\"\n    assert candidate(\"efef\",\"fee\") == True, \"test #3\"\n    assert candidate(\"abab\",\"aabb\") == False, \"test #4\"\n    assert candidate(\"winemtt\",\"tinem\") == True, \"test #5\"\n    assert candidate(\"Hi There!\",\"HIT\") == True, \"test #6\"\n    assert candidate(\"a1b2c3\",\"B!C\") == True, \"test #7\"\n    assert candidate(\"TEST\",\"123\") == False, \"test #8\"\n    assert candidate(\"abc\",\"!@#\") == False, \"test #9\"\n\n"}
{"task_id": "ExtendedEval/162", "prompt": "\ndef string_to_md5(text):\n    \"\"\"\n    Given a string 'text', return its md5 hash equivalent string.\n    If 'text' is an empty string, return None.\n    Additionally, if the text contains only digits, return the sha256 hash instead.\n    If the text contains special characters (!@#$%^&*), remove them before hashing.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    >>> string_to_md5('Hello world') == '3e25960a79dbc69b674cd4ec67a72c62'\n    >>> string_to_md5('12345') returns sha256 hash\n    >>> string_to_md5('Hello!@#World') == '8b1a9953c4611296a827abf8c47804d7' (after cleaning becomes 'HelloWorld')\n    \"\"\"\n", "entry_point": "string_to_md5", "canonical_solution": "    import hashlib\n    \n    if not text:\n        return None\n    \n    # Remove special characters\n    special_chars = '!@#$%^&*'\n    cleaned_text = ''.join(c for c in text if c not in special_chars)\n    \n    if not cleaned_text:\n        return hashlib.md5(''.encode('ascii')).hexdigest()\n    \n    # Check if all digits\n    if cleaned_text.isdigit():\n        return hashlib.sha256(cleaned_text.encode('ascii')).hexdigest()\n    else:\n        return hashlib.md5(cleaned_text.encode('ascii')).hexdigest()\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate('Hello world') == '3e25960a79dbc69b674cd4ec67a72c62'\n    assert candidate('') == None\n    assert candidate('A B C') == '0ef78513b0cb8cef12743f5aeb35f888'\n    assert candidate('password') == '5f4dcc3b5aa765d61d8327deb882cf99'\n    assert candidate('Hello!@#World') == '8b1a9953c4611296a827abf8c47804d7'\n    assert candidate('12345') == '5994471abb01112afcc18159f6cc74b4f511b99806da59b3caf5a9c173cacfc5'\n    assert candidate('abc123') == 'e99a18c428cb38d5f260853678922e03'\n    assert candidate('!@#$%^&*') == 'd41d8cd98f00b204e9800998ecf8427e'  # empty string md5\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True\n\n"}
{"task_id": "ExtendedEval/dict_1", "prompt": "Write a function that counts how many times each character appears in a string and returns a dictionary with the counts.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "char_count", "canonical_solution": "def char_count(s):\n    freq = {}\n    for ch in s:\n        freq[ch] = freq.get(ch, 0) + 1\n    return freq", "test": "def check(candidate):\n    assert candidate(\"hello\") == {'h': 1, 'e': 1, 'l': 2, 'o': 1}\n    assert candidate(\"aabb\") == {'a': 2, 'b': 2}\n    assert candidate(\"xyz\") == {'x': 1, 'y': 1, 'z': 1}\n    assert candidate(\"\") == {}"}
{"task_id": "ExtendedEval/recursion_1", "prompt": "Write a recursive function that computes the nth Fibonacci number. The sequence starts with 0 and 1.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "fib", "canonical_solution": "def fib(n):\n    if n <= 1:\n        return n\n    return fib(n-1) + fib(n-2)", "test": "def check(candidate):\n    assert candidate(0) == 0\n    assert candidate(1) == 1\n    assert candidate(5) == 5\n    assert candidate(7) == 13\n    assert candidate(10) == 55"}
{"task_id": "ExtendedEval/graph_1", "prompt": "Write a function that takes a graph represented as an adjacency list (dictionary of lists) and a start node, and returns the set of nodes reachable using Depth-First Search (DFS).\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "dfs_reachable", "canonical_solution": "def dfs_reachable(graph, start):\n    visited = set()\n    def dfs(node):\n        if node in visited:\n            return\n        visited.add(node)\n        for neighbor in graph.get(node, []):\n            dfs(neighbor)\n    dfs(start)\n    return visited", "test": "def check(candidate):\n    assert candidate({'A':['B','C'], 'B':['D'], 'C':[], 'D':[]}, 'A') == {'A','B','C','D'}\n    assert candidate({1:[2], 2:[3], 3:[]}, 1) == {1,2,3}\n    assert candidate({1:[], 2:[3], 3:[]}, 1) == {1}\n    assert candidate({}, 'X') == {'X'}"}
{"task_id": "ExtendedEval/bitwise_2", "prompt": "Write a function that takes a list of integers in which exactly two numbers appear once and all others appear exactly twice. Return the two unique numbers as a sorted tuple (smallest first). Use bitwise operations.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "find_two_uniques", "canonical_solution": "def find_two_uniques(nums):\n    xorsum = 0\n    for n in nums:\n        xorsum ^= n\n    # Find rightmost set bit\n    diff = xorsum & -xorsum\n    a = b = 0\n    for n in nums:\n        if n & diff:\n            a ^= n\n        else:\n            b ^= n\n    return (a, b) if a < b else (b, a)", "test": "def check(candidate):\n    assert candidate([1,2,1,3,2,5]) == (3,5)\n    assert candidate([10,14,10,7]) == (7,14)\n    assert candidate([4,1,2,1,2,3]) == (3,4)\n    assert candidate([0,1]) == (0,1)"}
{"task_id": "ExtendedEval/bitwise_3", "prompt": "Write a function that returns True if n is a power of two (1,2,4,8,...) and False otherwise. Do not use loops; use bitwise logic.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "is_power_of_two", "canonical_solution": "def is_power_of_two(n):\n    return n > 0 and (n & (n - 1)) == 0", "test": "def check(candidate):\n    assert candidate(1) == True\n    assert candidate(16) == True\n    assert candidate(18) == False\n    assert candidate(0) == False\n    assert candidate(-1) == False\n    assert candidate(1024) == True"}
{"task_id": "ExtendedEval/bitwise_4", "prompt": "Write a function that counts the number of set bits (1s) in the binary representation of a non-negative integer using Brian Kernighan's algorithm.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "count_bits", "canonical_solution": "def count_bits(n):\n    count = 0\n    while n:\n        n &= n - 1\n        count += 1\n    return count", "test": "def check(candidate):\n    assert candidate(0) == 0\n    assert candidate(7) == 3\n    assert candidate(9) == 2\n    assert candidate(1023) == 10\n    assert candidate(1) == 1\n    assert candidate(15) == 4"}
{"task_id": "ExtendedEval/bitwise_5", "prompt": "Reverse the bits of a 32-bit unsigned integer and return the resulting integer.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "reverse_bits32", "canonical_solution": "def reverse_bits32(n):\n    result = 0\n    for i in range(32):\n        result = (result << 1) | (n & 1)\n        n >>= 1\n    return result", "test": "def check(candidate):\n    assert candidate(43261596) == 964176192\n    assert candidate(0) == 0\n    assert candidate(1) == 2147483648\n    assert candidate(4294967293) == 3221225471"}
{"task_id": "ExtendedEval/155", "prompt": "\ndef even_odd_count(num):\n    \"\"\"Given an integer. return a tuple that has the number of even and odd digits respectively.\n    Additionally, return the sum of all even digits and the product of all odd digits.\n    If there are no odd digits, the product should be 1.\n    If the input is 0, return (1, 0, 0, 1).\n\n    Please return only the Python function code, without any explanations or additional text.\n\n     Example:\n        even_odd_count(-12) ==> (1, 1, 2, 1)\n        even_odd_count(123) ==> (1, 2, 2, 3)\n        even_odd_count(2468) ==> (4, 0, 20, 1)\n    \"\"\"\n", "entry_point": "even_odd_count", "canonical_solution": "    if num == 0:\n        return (1, 0, 0, 1)\n    \n    even_count = 0\n    odd_count = 0\n    even_sum = 0\n    odd_product = 1\n    \n    for i in str(abs(num)):\n        digit = int(i)\n        if digit % 2 == 0:\n            even_count += 1\n            even_sum += digit\n        else:\n            odd_count += 1\n            odd_product *= digit\n    \n    return (even_count, odd_count, even_sum, odd_product)\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(7) == (0, 1, 0, 7)\n    assert candidate(-78) == (1, 1, 8, 7)\n    assert candidate(3452) == (2, 2, 6, 15)\n    assert candidate(346211) == (3, 3, 12, 3)\n    assert candidate(-345821) == (3, 3, 10, 15)\n    assert candidate(-2) == (1, 0, 2, 1)\n    assert candidate(-45347) == (2, 3, 4, 105)\n    assert candidate(0) == (1, 0, 0, 1)\n    assert candidate(2468) == (4, 0, 20, 1)\n    assert candidate(13579) == (0, 5, 0, 945)\n    assert candidate(-12) == (1, 1, 2, 1)\n    assert candidate(123) == (1, 2, 2, 3)\n\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True\n\n"}
{"task_id": "ExtendedEval/156", "prompt": "\ndef int_to_mini_roman(number):\n    \"\"\"\n    Given a positive integer, obtain its roman numeral equivalent as a string,\n    and return it in lowercase.\n    Use the subtractive notation for all possible cases:\n    - 4 should be 'iv' not 'iiii'\n    - 9 should be 'ix' not 'viiii'\n    - 40 should be 'xl' not 'xxxx'\n    - 90 should be 'xc' not 'lxxxx'\n    - 400 should be 'cd' not 'cccc'\n    - 900 should be 'cm' not 'dcccc'\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    Restrictions: 1 <= num <= 1000\n\n    Examples:\n    >>> int_to_mini_roman(19) == 'xix'\n    >>> int_to_mini_roman(152) == 'clii'\n    >>> int_to_mini_roman(426) == 'cdxxvi'\n    >>> int_to_mini_roman(222) == 'ccxxii'\n    \"\"\"\n", "entry_point": "int_to_mini_roman", "canonical_solution": "    # Using standard roman numeral conversion with all subtractive cases\n    num = [1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1]\n    sym = [\"M\", \"CM\", \"D\", \"CD\", \"C\", \"XC\", \"L\", \"XL\", \"X\", \"IX\", \"V\", \"IV\", \"I\"]\n    \n    i = 0\n    res = ''\n    \n    while number:\n        div = number // num[i]\n        number %= num[i]\n        while div:\n            res += sym[i]\n            div -= 1\n        i += 1\n    \n    return res.lower()\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(19) == 'xix'\n    assert candidate(152) == 'clii'\n    assert candidate(251) == 'ccli'\n    assert candidate(426) == 'cdxxvi'\n    assert candidate(500) == 'd'\n    assert candidate(1) == 'i'\n    assert candidate(4) == 'iv'\n    assert candidate(43) == 'xliii'\n    assert candidate(90) == 'xc'\n    assert candidate(94) == 'xciv'\n    assert candidate(532) == 'dxxxii'\n    assert candidate(900) == 'cm'\n    assert candidate(994) == 'cmxciv'\n    assert candidate(1000) == 'm'\n    assert candidate(49) == 'xlix'\n    assert candidate(99) == 'xcix'\n    assert candidate(444) == 'cdxliv'\n    assert candidate(888) == 'dccclxxxviii'\n    assert candidate(999) == 'cmxcix'\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True\n\n"}
{"task_id": "ExtendedEval/157", "prompt": "\ndef right_angle_triangle(a, b, c):\n    '''\n    Given the lengths of the three sides of a triangle. Return True if the three\n    sides form a right-angled triangle, False otherwise.\n    A right-angled triangle is a triangle in which one angle is right angle or \n    90 degree.\n    Additionally, return True only if all sides are positive integers and\n    the triangle inequality holds (sum of any two sides > third side).\n    Also, the sides must form a Pythagorean triple (all sides are integers).\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    Example:\n    right_angle_triangle(3, 4, 5) == True\n    right_angle_triangle(1, 2, 3) == False\n    right_angle_triangle(3.0, 4.0, 5.0) == False (not integers)\n    right_angle_triangle(-3, 4, 5) == False (negative side)\n    right_angle_triangle(5, 12, 13) == True\n    '''\n", "entry_point": "right_angle_triangle", "canonical_solution": "    # Check if all sides are positive integers\n    if not all(isinstance(x, int) and x > 0 for x in [a, b, c]):\n        return False\n    \n    # Check triangle inequality\n    if not (a + b > c and a + c > b and b + c > a):\n        return False\n    \n    # Check if it forms a right-angled triangle\n    sides = sorted([a, b, c])\n    return sides[0]**2 + sides[1]**2 == sides[2]**2\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(3, 4, 5) == True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(1, 2, 3) == False\n    assert candidate(10, 6, 8) == True\n    assert candidate(2, 2, 2) == False\n    assert candidate(7, 24, 25) == True\n    assert candidate(10, 5, 7) == False\n    assert candidate(5, 12, 13) == True\n    assert candidate(15, 8, 17) == True\n    assert candidate(48, 55, 73) == True\n    assert candidate(3.0, 4.0, 5.0) == False\n    assert candidate(-3, 4, 5) == False\n    assert candidate(0, 4, 5) == False\n    assert candidate(3, 4, 10) == False\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(1, 1, 1) == False, \"This prints if this assert fails 2 (also good for debugging!)\"\n    assert candidate(2, 2, 10) == False\n\n"}
{"task_id": "ExtendedEval/158", "prompt": "\ndef find_max(words):\n    \"\"\"Write a function that accepts a list of strings.\n    The list contains different words. Return the word with maximum number\n    of unique characters. If multiple strings have maximum number of unique\n    characters, return the one which comes first in lexicographical order.\n    Additionally, ignore any words that contain non-alphabetic characters.\n    If all words contain non-alphabetic characters, return an empty string.\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    find_max([\"name\", \"of\", \"string\"]) == \"string\"\n    find_max([\"name\", \"enam\", \"game\"]) == \"enam\"\n    find_max([\"aaaaaaa\", \"bb\" ,\"cc\"]) == \"aaaaaaa\"\n    find_max([\"abc123\", \"def\", \"ghi\"]) == \"def\"\n    find_max([\"123\", \"456\", \"789\"]) == \"\"\n    \"\"\"\n", "entry_point": "find_max", "canonical_solution": "    # Filter out words with non-alphabetic characters\n    valid_words = [word for word in words if word.isalpha()]\n    \n    if not valid_words:\n        return \"\"\n    \n    # Sort by number of unique characters (descending) and lexicographically (ascending)\n    return sorted(valid_words, key=lambda x: (-len(set(x)), x))[0]\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert (candidate([\"name\", \"of\", \"string\"]) == \"string\"), \"t1\"\n    assert (candidate([\"name\", \"enam\", \"game\"]) == \"enam\"), 't2'\n    assert (candidate([\"aaaaaaa\", \"bb\", \"cc\"]) == \"aaaaaaa\"), 't3'\n    assert (candidate([\"abc\", \"cba\"]) == \"abc\"), 't4'\n    assert (candidate([\"play\", \"this\", \"game\", \"of\",\"footbott\"]) == \"footbott\"), 't5'\n    assert (candidate([\"we\", \"are\", \"gonna\", \"rock\"]) == \"gonna\"), 't6'\n    assert (candidate([\"we\", \"are\", \"a\", \"mad\", \"nation\"]) == \"nation\"), 't7'\n    assert (candidate([\"this\", \"is\", \"a\", \"prrk\"]) == \"this\"), 't8'\n    assert (candidate([\"abc123\", \"def\", \"ghi\"]) == \"def\"), 't9'\n    assert (candidate([\"123\", \"456\", \"789\"]) == \"\"), 't10'\n    assert (candidate([\"a1b\", \"c2d\", \"e3f\"]) == \"\"), 't11'\n\n    # Check some edge cases that are easy to work out by hand.\n    assert (candidate([\"b\"]) == \"b\"), 't12'\n    assert (candidate([\"play\", \"play\", \"play\"]) == \"play\"), 't13'\n\n"}
{"task_id": "ExtendedEval/159", "prompt": "\ndef eat(number, need, remaining):\n    \"\"\"\n    You're a hungry rabbit, and you already have eaten a certain number of carrots,\n    but now you need to eat more carrots to complete the day's meals.\n    you should return an array of [ total number of eaten carrots after your meals,\n                                    the number of carrots left after your meals ]\n    if there are not enough remaining carrots, you will eat all remaining carrots, but will still be hungry.\n    \n    Additionally, if you eat more than 10 carrots in one meal (need > 10), \n    you will feel sick and vomit 2 carrots, reducing your total eaten.\n    If the number of carrots you've already eaten is negative, return [-1, -1].\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    Example:\n    * eat(5, 6, 10) -> [11, 4]\n    * eat(4, 8, 9) -> [12, 1]\n    * eat(1, 10, 10) -> [11, 0]\n    * eat(2, 11, 5) -> [5, 0] (ate 5, but vomited 2, so 2+5-2=5)\n    * eat(2, 15, 20) -> [15, 5] (ate 15, vomited 2, so 2+15-2=15)\n    * eat(-1, 5, 10) -> [-1, -1]\n    \n    Variables:\n    @number : integer\n        the number of carrots that you have eaten.\n    @need : integer\n        the number of carrots that you need to eat.\n    @remaining : integer\n        the number of remaining carrots thet exist in stock\n    \n    Constrain:\n    * -1000 <= number <= 1000\n    * 0 <= need <= 1000\n    * 0 <= remaining <= 1000\n\n    Have fun :)\n    \"\"\"\n", "entry_point": "eat", "canonical_solution": "    if number < 0:\n        return [-1, -1]\n    \n    actually_eaten = min(need, remaining)\n    total = number + actually_eaten\n    \n    # If eating more than 10 carrots, vomit 2\n    if need > 10 and actually_eaten > 0:\n        total = max(0, total - 2)  # Can't have negative total\n    \n    carrots_left = remaining - actually_eaten\n    \n    return [total, carrots_left]\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(5, 6, 10) == [11, 4], \"Error\"\n    assert candidate(4, 8, 9) == [12, 1], \"Error\"\n    assert candidate(1, 10, 10) == [11, 0], \"Error\"\n    assert candidate(2, 11, 5) == [5, 0], \"Error\"\n    assert candidate(2, 15, 20) == [15, 5], \"Error\"\n    assert candidate(-1, 5, 10) == [-1, -1], \"Error\"\n    assert candidate(0, 20, 15) == [13, 0], \"Error\"\n    assert candidate(10, 12, 12) == [20, 0], \"Error\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(4, 5, 7) == [9, 2], \"Error\"\n    assert candidate(4, 5, 1) == [5, 0], \"Error\"\n\n"}
{"task_id": "ExtendedEval/160", "prompt": "\ndef do_algebra(operator, operand):\n    \"\"\"\n    Given two lists operator, and operand. The first list has basic algebra operations, and \n    the second list is a list of integers. Use the two given lists to build the algebric \n    expression and return the evaluation of this expression.\n\n    The basic algebra operations:\n    Addition ( + ) \n    Subtraction ( - ) \n    Multiplication ( * ) \n    Floor division ( // ) \n    Exponentiation ( ** ) \n    Modulo ( % )\n\n    Additionally, the expression should be evaluated with proper operator precedence:\n    1. Exponentiation (**) - highest precedence\n    2. Multiplication (*), Floor division (//), Modulo (%) - medium precedence\n    3. Addition (+), Subtraction (-) - lowest precedence\n\n    If division by zero occurs, return \"Error\".\n\n    Please return only the Python function code, without any explanations or additional text.\n\n    Example:\n    operator['+', '*', '-']\n    array = [2, 3, 4, 5]\n    result = 2 + 3 * 4 - 5 = 2 + 12 - 5 = 9\n\n    Note:\n        The length of operator list is equal to the length of operand list minus one.\n        Operand is a list of of non-negative integers.\n        Operator list has at least one operator, and operand list has at least two operands.\n\n    \"\"\"\n", "entry_point": "do_algebra", "canonical_solution": "    expression = str(operand[0])\n    for oprt, oprn in zip(operator, operand[1:]):\n        if oprt in ['//', '%'] and oprn == 0:\n            return \"Error\"\n        expression += oprt + str(oprn)\n    \n    try:\n        return eval(expression)\n    except ZeroDivisionError:\n        return \"Error\"\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(['**', '*', '+'], [2, 3, 4, 5]) == 37\n    assert candidate(['+', '*', '-'], [2, 3, 4, 5]) == 9\n    assert candidate(['//', '*'], [7, 3, 4]) == 8, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(['*', '+', '**'], [2, 3, 4, 2]) == 22\n    assert candidate(['//', '+'], [10, 0, 5]) == \"Error\"\n    assert candidate(['%', '*'], [10, 3, 5]) == 5\n    assert candidate(['-', '%', '+'], [20, 7, 3, 10]) == 16\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n"}
{"task_id": "ExtendedEval/161", "prompt": "\ndef solve(s):\n    \"\"\"You are given a string s.\n    if s[i] is a letter, reverse its case from lower to upper or vise versa, \n    otherwise keep it as it is.\n    If the string contains no letters, reverse the string.\n    Additionally, if the string contains both letters and digits,\n    swap the positions of the first letter and the first digit.\n    The function should return the resulted string.\n    \n    Please return only the Python function code, without any explanations or additional text.\n    \n    Examples\n    solve(\"1234\") = \"4321\"\n    solve(\"ab\") = \"AB\"\n    solve(\"#a@C\") = \"#A@c\"\n    solve(\"a1b2\") = \"1Ab2\" (swap 'a' and '1', then reverse cases)\n    solve(\"5hello\") = \"h5ELLO\" (swap '5' and 'h', then reverse cases)\n    \"\"\"\n", "entry_point": "solve", "canonical_solution": "    has_letter = any(c.isalpha() for c in s)\n    has_digit = any(c.isdigit() for c in s)\n    \n    new_str = list(s)\n    \n    # If has both letters and digits, swap first letter with first digit\n    if has_letter and has_digit:\n        first_letter_idx = -1\n        first_digit_idx = -1\n        \n        for i, c in enumerate(s):\n            if c.isalpha() and first_letter_idx == -1:\n                first_letter_idx = i\n            if c.isdigit() and first_digit_idx == -1:\n                first_digit_idx = i\n            if first_letter_idx != -1 and first_digit_idx != -1:\n                break\n        \n        if first_letter_idx != -1 and first_digit_idx != -1:\n            new_str[first_letter_idx], new_str[first_digit_idx] = new_str[first_digit_idx], new_str[first_letter_idx]\n    \n    # Apply case swapping or reversal\n    if has_letter:\n        for i in range(len(new_str)):\n            if new_str[i].isalpha():\n                new_str[i] = new_str[i].swapcase()\n    else:\n        new_str = new_str[::-1]\n    \n    return ''.join(new_str)\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"AsDf\") == \"aSdF\"\n    assert candidate(\"1234\") == \"4321\"\n    assert candidate(\"ab\") == \"AB\"\n    assert candidate(\"#a@C\") == \"#A@c\"\n    assert candidate(\"#AsdfW^45\") == \"#4SDFw^A5\"\n    assert candidate(\"#6@2\") == \"2@6#\"\n    assert candidate(\"a1b2\") == \"1Ab2\"\n    assert candidate(\"5hello\") == \"h5ELLO\"\n    assert candidate(\"abc123\") == \"1BC2A3\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(\"#$a^D\") == \"#$A^d\"\n    assert candidate(\"#ccc\") == \"#CCC\"\n\n    # Don't remove this line:\n"}
{"task_id": "ExtendedEval/163", "prompt": "\ndef generate_integers(a, b):\n    \"\"\"\n    Given two positive integers a and b, return the even digits between a\n    and b, in ascending order.\n    Additionally, exclude any digit that appears in both a and b.\n    Also, if the range is invalid (a > b), swap them automatically.\n\n    For example:\n    generate_integers(2, 8) => [2, 4, 6, 8]\n    generate_integers(8, 2) => [2, 4, 6, 8]\n    generate_integers(10, 14) => []\n    generate_integers(24, 86) => [] (2, 4, 6, 8 all appear in the inputs)\n    generate_integers(13, 57) => [2, 4, 6, 8] (none appear in inputs)\n    \"\"\"\n", "entry_point": "generate_integers", "canonical_solution": "    # Swap if needed\n    if a > b:\n        a, b = b, a\n    \n    # Get digits that appear in a or b\n    digits_in_inputs = set()\n    for num in [a, b]:\n        digits_in_inputs.update(str(num))\n    \n    # Get valid range\n    lower = max(0, min(a, b))\n    upper = min(9, max(a, b))\n    \n    # Generate even digits not in inputs\n    result = []\n    for i in range(lower, upper + 1):\n        if i % 2 == 0 and str(i) not in digits_in_inputs:\n            result.append(i)\n    \n    return result\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(2, 10) == [4, 6, 8], \"Test 1\"\n    assert candidate(10, 2) == [4, 6, 8], \"Test 2\"\n    assert candidate(132, 2) == [4, 6, 8], \"Test 3\"\n    assert candidate(17, 89) == [0, 2, 4, 6], \"Test 4\"\n    assert candidate(24, 86) == [], \"Test 5\"\n    assert candidate(13, 57) == [0, 2, 4, 6, 8], \"Test 6\"\n    assert candidate(0, 9) == [2, 4, 6, 8], \"Test 7\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n"}
{"task_id": "ExtendedEval/bitwise_1", "prompt": "Write a function that takes a list of integers and returns the number that appears only once. All other numbers appear exactly twice. Use bitwise operations for efficiency.", "entry_point": "find_unique", "canonical_solution": "def find_unique(nums):\n    result = 0\n    for n in nums:\n        result ^= n\n    return result", "test": [{"input": "[2,2,1]", "output": "1"}, {"input": "[4,1,2,1,2]", "output": "4"}, {"input": "[7,7,9]", "output": "9"}]}
{"task_id": "ExtendedEval/163", "prompt": "\ndef generate_integers(a, b):\n    \"\"\"\n    Given two positive integers a and b, return the even digits between a\n    and b, in ascending order.\n    Additionally, exclude any digit that appears in both a and b.\n    Also, if the range is invalid (a > b), swap them automatically.\n\n    For example:\n    generate_integers(2, 8) => [2, 4, 6, 8]\n    generate_integers(8, 2) => [2, 4, 6, 8]\n    generate_integers(10, 14) => []\n    generate_integers(24, 86) => [] (2, 4, 6, 8 all appear in the inputs)\n    generate_integers(13, 57) => [2, 4, 6, 8] (none appear in inputs)\n    \"\"\"\n", "entry_point": "generate_integers", "canonical_solution": "    # Swap if needed\n    if a > b:\n        a, b = b, a\n    \n    # Get digits that appear in a or b\n    digits_in_inputs = set()\n    for num in [a, b]:\n        digits_in_inputs.update(str(num))\n    \n    # Get valid range\n    lower = max(0, min(a, b))\n    upper = min(9, max(a, b))\n    \n    # Generate even digits not in inputs\n    result = []\n    for i in range(lower, upper + 1):\n        if i % 2 == 0 and str(i) not in digits_in_inputs:\n            result.append(i)\n    \n    return result\n", "test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(2, 10) == [4, 6, 8], \"Test 1\"\n    assert candidate(10, 2) == [4, 6, 8], \"Test 2\"\n    assert candidate(132, 2) == [4, 6, 8], \"Test 3\"\n    assert candidate(17, 89) == [0, 2, 4, 6], \"Test 4\"\n    assert candidate(24, 86) == [], \"Test 5\"\n    assert candidate(13, 57) == [0, 2, 4, 6, 8], \"Test 6\"\n    assert candidate(0, 9) == [2, 4, 6, 8], \"Test 7\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n"}
{"task_id": "ExtendedEval/bitwise_1", "prompt": "Write a function that takes a list of integers and returns the number that appears only once. All other numbers appear exactly twice. Use bitwise operations for efficiency.", "entry_point": "find_unique", "canonical_solution": "def find_unique(nums):\n    result = 0\n    for n in nums:\n        result ^= n\n    return result", "test": [{"input": "[2,2,1]", "output": "1"}, {"input": "[4,1,2,1,2]", "output": "4"}, {"input": "[7,7,9]", "output": "9"}]}
{"task_id": "ExtendedEval/dict_2", "prompt": "Invert a dictionary with unique values: given a dict mapping keys to unique values, return a new dict mapping those values back to their keys.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "invert_dict", "canonical_solution": "def invert_dict(d):\n    return {v: k for k, v in d.items()}", "test": "def check(candidate):\n    assert candidate({'a': 1, 'b': 2}) == {1: 'a', 2: 'b'}\n    assert candidate({'x': 10, 'y': 20, 'z': 30}) == {10: 'x', 20: 'y', 30: 'z'}\n    assert candidate({}) == {}\n    assert candidate({'single': 42}) == {42: 'single'}"}
{"task_id": "ExtendedEval/dict_3", "prompt": "Merge two dictionaries by summing values for common keys. Assume all values are integers. Return the merged dictionary.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "merge_sum", "canonical_solution": "def merge_sum(d1, d2):\n    result = dict(d1)\n    for k, v in d2.items():\n        result[k] = result.get(k, 0) + v\n    return result", "test": "def check(candidate):\n    assert candidate({'a': 1, 'b': 2}, {'b': 3, 'c': 4}) == {'a': 1, 'b': 5, 'c': 4}\n    assert candidate({}, {'k': 5}) == {'k': 5}\n    assert candidate({'m': -1}, {'m': 1, 'n': 2}) == {'m': 0, 'n': 2}\n    assert candidate({'x': 10}, {}) == {'x': 10}"}
{"task_id": "ExtendedEval/dict_4", "prompt": "Group anagrams: given a list of lowercase words, return a dictionary that maps each word's sorted-character signature to the sorted list of words in that group.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "group_anagrams", "canonical_solution": "def group_anagrams(words):\n    groups = {}\n    for word in words:\n        key = ''.join(sorted(word))\n        if key not in groups:\n            groups[key] = []\n        groups[key].append(word)\n    for key in groups:\n        groups[key].sort()\n    return groups", "test": "def check(candidate):\n    result1 = candidate(['eat', 'tea', 'tan', 'ate', 'nat', 'bat'])\n    expected1 = {'aet': ['ate', 'eat', 'tea'], 'ant': ['nat', 'tan'], 'abt': ['bat']}\n    assert result1 == expected1\n    \n    result2 = candidate(['abc', 'bca', 'cab', 'xyz'])\n    expected2 = {'abc': ['abc', 'bca', 'cab'], 'xyz': ['xyz']}\n    assert result2 == expected2\n    \n    assert candidate([]) == {}\n    assert candidate(['a']) == {'a': ['a']}"}
{"task_id": "ExtendedEval/dict_5", "prompt": "Given a list of integers, return the smallest number among those with the highest frequency.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "most_frequent_smallest", "canonical_solution": "def most_frequent_smallest(nums):\n    if not nums:\n        return None\n    \n    freq = {}\n    for x in nums:\n        freq[x] = freq.get(x, 0) + 1\n    \n    max_freq = max(freq.values())\n    candidates = [num for num, count in freq.items() if count == max_freq]\n    return min(candidates)", "test": "def check(candidate):\n    assert candidate([1, 2, 2, 3, 3]) == 2\n    assert candidate([5, 5, 4, 4, 4, 3, 3, 3]) == 3\n    assert candidate([10]) == 10\n    assert candidate([1, 1, 2, 2, 3, 3]) == 1\n    assert candidate([7, 8, 7, 9, 8, 9, 1]) == 7"}
{"task_id": "ExtendedEval/recursion_2", "prompt": "Compute the sum of a nested list of integers. Elements may be integers or lists (which may be nested arbitrarily). Use recursion.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "nested_sum", "canonical_solution": "def nested_sum(x):\n    if isinstance(x, int):\n        return x\n    return sum(nested_sum(element) for element in x)", "test": "def check(candidate):\n    assert candidate([1, 2, [3, 4], [5, [6]]]) == 21\n    assert candidate([]) == 0\n    assert candidate([10, [[-10], 5]]) == 5\n    assert candidate(42) == 42\n    assert candidate([[[[[1]]]]]) == 1"}
{"task_id": "ExtendedEval/recursion_3", "prompt": "Return all unique permutations of a string in lexicographic order. The string may contain duplicate characters.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "unique_permutations", "canonical_solution": "def unique_permutations(s):\n    s = ''.join(sorted(s))\n    used = [False] * len(s)\n    result = []\n    path = []\n    \n    def backtrack():\n        if len(path) == len(s):\n            result.append(''.join(path))\n            return\n        \n        prev_char = None\n        for i, char in enumerate(s):\n            if used[i] or char == prev_char:\n                continue\n            \n            used[i] = True\n            path.append(char)\n            backtrack()\n            path.pop()\n            used[i] = False\n            prev_char = char\n    \n    backtrack()\n    return result", "test": "def check(candidate):\n    assert candidate('aba') == ['aab', 'aba', 'baa']\n    assert candidate('a') == ['a']\n    assert candidate('aac') == ['aac', 'aca', 'caa']\n    assert candidate('abc') == ['abc', 'acb', 'bac', 'bca', 'cab', 'cba']\n    assert candidate('') == ['']"}
{"task_id": "ExtendedEval/graph_2", "prompt": "Given an unweighted directed graph as an adjacency list (dict of lists) and two nodes start and target, return the length of the shortest path from start to target using BFS. If unreachable, return -1.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "shortest_path_length", "canonical_solution": "from collections import deque\n\ndef shortest_path_length(graph, start, target):\n    if start == target:\n        return 0\n    \n    queue = deque([(start, 0)])\n    visited = {start}\n    \n    while queue:\n        node, distance = queue.popleft()\n        \n        for neighbor in graph.get(node, []):\n            if neighbor == target:\n                return distance + 1\n            \n            if neighbor not in visited:\n                visited.add(neighbor)\n                queue.append((neighbor, distance + 1))\n    \n    return -1", "test": "def check(candidate):\n    assert candidate({'A': ['B'], 'B': ['C'], 'C': []}, 'A', 'C') == 2\n    assert candidate({1: [2, 3], 2: [4], 3: [4], 4: []}, 1, 4) == 2\n    assert candidate({1: [2], 2: [], 3: []}, 1, 3) == -1\n    assert candidate({'A': ['A']}, 'A', 'A') == 0\n    assert candidate({}, 'X', 'Y') == -1"}
{"task_id": "ExtendedEval/graph_3", "prompt": "Detect if a directed graph (adjacency list) contains a cycle. Return True if there is a cycle, else False.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "has_cycle_directed", "canonical_solution": "def has_cycle_directed(graph):\n    WHITE, GRAY, BLACK = 0, 1, 2\n    colors = {}\n    \n    def dfs(node):\n        if node in colors:\n            return colors[node] == GRAY\n        \n        colors[node] = GRAY\n        \n        for neighbor in graph.get(node, []):\n            if dfs(neighbor):\n                return True\n        \n        colors[node] = BLACK\n        return False\n    \n    for node in graph:\n        if node not in colors:\n            if dfs(node):\n                return True\n    \n    return False", "test": "def check(candidate):\n    assert candidate({'A': ['B'], 'B': ['C'], 'C': ['A']}) == True\n    assert candidate({'A': ['B'], 'B': [], 'C': []}) == False\n    assert candidate({1: [2], 2: [3], 3: []}) == False\n    assert candidate({'A': ['A']}) == True\n    assert candidate({}) == False"}
{"task_id": "ExtendedEval/graph_4", "prompt": "Return a topological ordering of a directed acyclic graph (DAG) as a list using Kahn's algorithm. If the graph has a cycle, return an empty list.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "topo_sort", "canonical_solution": "from collections import deque\n\ndef topo_sort(graph):\n    # Get all nodes in the graph\n    all_nodes = set(graph.keys())\n    for neighbors in graph.values():\n        all_nodes.update(neighbors)\n    \n    # Calculate in-degrees\n    indegree = {node: 0 for node in all_nodes}\n    for node in graph:\n        for neighbor in graph[node]:\n            indegree[neighbor] += 1\n    \n    # Initialize queue with nodes having in-degree 0\n    queue = deque([node for node in all_nodes if indegree[node] == 0])\n    result = []\n    \n    while queue:\n        node = queue.popleft()\n        result.append(node)\n        \n        for neighbor in graph.get(node, []):\n            indegree[neighbor] -= 1\n            if indegree[neighbor] == 0:\n                queue.append(neighbor)\n    \n    # Return result only if all nodes are processed (no cycle)\n    return result if len(result) == len(all_nodes) else []", "test": "def check(candidate):\n    result1 = candidate({'A': ['C'], 'B': ['C'], 'C': []})\n    assert set(result1[:2]) == {'A', 'B'} and result1[2] == 'C'\n    \n    assert candidate({'A': ['B'], 'B': ['C'], 'C': []}) == ['A', 'B', 'C']\n    assert candidate({'A': ['B'], 'B': ['A']}) == []\n    assert candidate({}) == []\n    \n    result2 = candidate({1: [3], 2: [3], 3: [4], 4: []})\n    assert set(result2[:2]) == {1, 2} and result2[2:] == [3, 4]"}
{"task_id": "ExtendedEval/so_ordered_unique", "prompt": "Remove duplicates from a list while preserving the original order. Only hashable elements will appear.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "ordered_unique", "canonical_solution": "def ordered_unique(xs):\n    seen = set()\n    result = []\n    for x in xs:\n        if x not in seen:\n            seen.add(x)\n            result.append(x)\n    return result", "test": "def check(candidate):\n    assert candidate([1, 2, 2, 3, 1, 4]) == [1, 2, 3, 4]\n    assert candidate(['a', 'b', 'a', 'c', 'b']) == ['a', 'b', 'c']\n    assert candidate([]) == []\n    assert candidate([1, 1, 1]) == [1]\n    assert candidate([5]) == [5]"}
{"task_id": "ExtendedEval/so_merge_intervals_closed", "prompt": "Given a list of closed intervals [start, end] with start <= end, merge all overlapping intervals and return the merged list sorted by start.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "merge_intervals", "canonical_solution": "def merge_intervals(intervals):\n    if not intervals:\n        return []\n    \n    # Sort intervals by start time\n    sorted_intervals = sorted(intervals, key=lambda x: x[0])\n    merged = [list(sorted_intervals[0])]\n    \n    for start, end in sorted_intervals[1:]:\n        last_start, last_end = merged[-1]\n        \n        if start <= last_end:\n            # Overlapping intervals, merge them\n            merged[-1][1] = max(last_end, end)\n        else:\n            # Non-overlapping interval\n            merged.append([start, end])\n    \n    return merged", "test": "def check(candidate):\n    assert candidate([[1, 3], [2, 6], [8, 10], [15, 18]]) == [[1, 6], [8, 10], [15, 18]]\n    assert candidate([[1, 4], [4, 5]]) == [[1, 5]]\n    assert candidate([]) == []\n    assert candidate([[1, 1]]) == [[1, 1]]\n    assert candidate([[1, 4], [0, 4]]) == [[0, 4]]"}
{"task_id": "ExtendedEval/so_parse_query", "prompt": "Parse a URL query string like \"a=1&b=2&b=3&c=\" into a dictionary. Keys with a single value map to that string, keys with repeated values map to a list of strings. Do not decode percent-encoding.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "parse_query", "canonical_solution": "def parse_query(qs):\n    if not qs:\n        return {}\n    \n    result = {}\n    for part in qs.split('&'):\n        if not part:\n            continue\n            \n        if '=' in part:\n            key, value = part.split('=', 1)\n        else:\n            key, value = part, ''\n        \n        if key in result:\n            if isinstance(result[key], list):\n                result[key].append(value)\n            else:\n                result[key] = [result[key], value]\n        else:\n            result[key] = value\n    \n    return result", "test": "def check(candidate):\n    assert candidate('a=1&b=2&b=3&c=') == {'a': '1', 'b': ['2', '3'], 'c': ''}\n    assert candidate('x=10') == {'x': '10'}\n    assert candidate('flag&y=7') == {'flag': '', 'y': '7'}\n    assert candidate('') == {}\n    assert candidate('a=1&a=2&a=3') == {'a': ['1', '2', '3']}"}
{"task_id": "ExtendedEval/so_topk_words", "prompt": "Given a list of words and an integer k, return the k most frequent words sorted by descending frequency, then lexicographically for ties.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "top_k_frequent", "canonical_solution": "def top_k_frequent(words, k):\n    freq = {}\n    for word in words:\n        freq[word] = freq.get(word, 0) + 1\n    \n    # Sort by frequency (descending) then lexicographically (ascending)\n    sorted_items = sorted(freq.items(), key=lambda x: (-x[1], x[0]))\n    \n    return [word for word, _ in sorted_items[:k]]", "test": "def check(candidate):\n    assert candidate(['i', 'love', 'leetcode', 'i', 'love', 'coding'], 2) == ['i', 'love']\n    assert candidate(['a', 'b', 'c'], 1) == ['a']\n    assert candidate(['a', 'a', 'b', 'b', 'c'], 3) == ['a', 'b', 'c']\n    assert candidate(['the', 'day', 'is', 'sunny', 'the', 'the', 'the', 'sunny', 'is', 'is'], 4) == ['the', 'is', 'sunny', 'day']"}
{"task_id": "ExtendedEval/so_flatten_depth", "prompt": "Flatten a nested list up to a given depth d (d>=0). If d=0, return the list unchanged. Only lists need flattening; other types remain as-is.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "flatten_depth", "canonical_solution": "def flatten_depth(lst, d):\n    if d <= 0:\n        return list(lst)\n    \n    result = []\n    for item in lst:\n        if isinstance(item, list):\n            result.extend(flatten_depth(item, d - 1))\n        else:\n            result.append(item)\n    \n    return result", "test": "def check(candidate):\n    assert candidate([1, [2, [3, [4]]]], 1) == [1, 2, [3, [4]]]\n    assert candidate([1, [2, [3, 4]], 5], 2) == [1, 2, 3, 4, 5]\n    assert candidate([], 3) == []\n    assert candidate([1, 2, 3], 0) == [1, 2, 3]\n    assert candidate([1, [2, 3]], 0) == [1, [2, 3]]"}
{"task_id": "ExtendedEval/so_chunk", "prompt": "Split a list into consecutive chunks of size n>0. The final chunk may be shorter. Return a list of chunks.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "chunk", "canonical_solution": "def chunk(lst, n):\n    if n <= 0:\n        raise ValueError('n must be > 0')\n    return [lst[i:i + n] for i in range(0, len(lst), n)]", "test": "def check(candidate):\n    assert candidate([1, 2, 3, 4, 5], 2) == [[1, 2], [3, 4], [5]]\n    assert candidate([1, 2, 3], 3) == [[1, 2, 3]]\n    assert candidate([], 1) == []\n    assert candidate([1, 2, 3, 4, 5, 6], 2) == [[1, 2], [3, 4], [5, 6]]\n    \n    try:\n        candidate([1, 2, 3], 0)\n        assert False, 'Should raise ValueError'\n    except ValueError:\n        pass"}
{"task_id": "ExtendedEval/so_roman_to_int", "prompt": "Convert a Roman numeral (I,V,X,L,C,D,M) to integer. Assume a valid numeral using standard subtractive notation.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "roman_to_int", "canonical_solution": "def roman_to_int(s):\n    values = {'I': 1, 'V': 5, 'X': 10, 'L': 50, 'C': 100, 'D': 500, 'M': 1000}\n    total = 0\n    i = 0\n    \n    while i < len(s):\n        current_val = values[s[i]]\n        \n        if i + 1 < len(s) and values[s[i + 1]] > current_val:\n            # Subtractive case (e.g., IV, IX, XL, etc.)\n            total += values[s[i + 1]] - current_val\n            i += 2\n        else:\n            # Regular case\n            total += current_val\n            i += 1\n    \n    return total", "test": "def check(candidate):\n    assert candidate('III') == 3\n    assert candidate('IV') == 4\n    assert candidate('MCMXCIV') == 1994\n    assert candidate('IX') == 9\n    assert candidate('LVIII') == 58\n    assert candidate('CD') == 400\n    assert candidate('CM') == 900"}
{"task_id": "ExtendedEval/so_normalize_path", "prompt": "Normalize a Unix-style path by resolving '.' and '..' components and removing redundant slashes. Do not access the filesystem. Keep leading '/' for absolute paths.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "normalize_path", "canonical_solution": "def normalize_path(path):\n    parts = []\n    is_absolute = path.startswith('/')\n    \n    for component in path.split('/'):\n        if component == '' or component == '.':\n            continue\n        elif component == '..':\n            if parts and parts[-1] != '..':\n                parts.pop()\n            elif not is_absolute:\n                parts.append('..')\n        else:\n            parts.append(component)\n    \n    result = '/'.join(parts)\n    \n    if is_absolute:\n        result = '/' + result\n    elif not result:\n        result = '.'\n    \n    return result", "test": "def check(candidate):\n    assert candidate('/a//b/./c/../d/') == '/a/b/d'\n    assert candidate('a/b/../../c') == 'c'\n    assert candidate('./././') == '.'\n    assert candidate('/') == '/'\n    assert candidate('../../a/b') == '../../a/b'\n    assert candidate('/a/b/../..') == '/'\n    assert candidate('a/b/c/../../..') == '.'}"}
{"task_id": "ExtendedEval/so_split_csv", "prompt": "Split a CSV line by commas, respecting double quotes. Quotes may enclose commas, and doubled quotes \"\" represent a literal quote inside a quoted field. Return a list of fields.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "split_csv", "canonical_solution": "def split_csv(line):\n    result = []\n    field = []\n    i = 0\n    in_quotes = False\n    \n    while i < len(line):\n        char = line[i]\n        \n        if in_quotes:\n            if char == '\"':\n                if i + 1 < len(line) and line[i + 1] == '\"':\n                    # Escaped quote\n                    field.append('\"')\n                    i += 2\n                    continue\n                else:\n                    # End of quoted field\n                    in_quotes = False\n            else:\n                field.append(char)\n            i += 1\n        else:\n            if char == ',':\n                result.append(''.join(field))\n                field = []\n                i += 1\n            elif char == '\"':\n                in_quotes = True\n                i += 1\n            else:\n                field.append(char)\n                i += 1\n    \n    result.append(''.join(field))\n    return result", "test": "def check(candidate):\n    assert candidate('a,b,c') == ['a', 'b', 'c']\n    assert candidate('\"a,1\",b,\"c\"') == ['a,1', 'b', 'c']\n    assert candidate('a,\"b, c\",d') == ['a', 'b, c', 'd']\n    assert candidate('\"hello \"\"world\"\"\",test') == ['hello \"world\"', 'test']\n    assert candidate('') == ['']"}
{"task_id": "ExtendedEval/so_is_balanced", "prompt": "Check if a string of brackets is balanced. Supports (), {}, []. Return True if balanced, False otherwise.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "is_balanced", "canonical_solution": "def is_balanced(s):\n    pairs = {')': '(', ']': '[', '}': '{'}\n    stack = []\n    \n    for char in s:\n        if char in '([{':\n            stack.append(char)\n        elif char in pairs:\n            if not stack or stack.pop() != pairs[char]:\n                return False\n    \n    return len(stack) == 0", "test": "def check(candidate):\n    assert candidate('()[]{}') == True\n    assert candidate('(]') == False\n    assert candidate('([{}])') == True\n    assert candidate('') == True\n    assert candidate('((()))') == True\n    assert candidate('([)]') == False\n    assert candidate('{[()]}') == True"}
{"task_id": "ExtendedEval/so_regex_email", "prompt": "Validate if a string is a valid email address (simple version). A valid email has the form name@domain.tld where name and domain contain letters, digits, dots, underscores, and hyphens, and tld is 2-6 letters.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "is_valid_email", "canonical_solution": "import re\n\ndef is_valid_email(email):\n    pattern = r'^[A-Za-z0-9._-]+@[A-Za-z0-9._-]+\\.[A-Za-z]{2,6}$'\n    return bool(re.match(pattern, email))", "test": "def check(candidate):\n    assert candidate('test@example.com') == True\n    assert candidate('bad@domain') == False\n    assert candidate('hello@world.org') == True\n    assert candidate('user.name@sub.domain.co') == True\n    assert candidate('test_123@example-site.info') == True\n    assert candidate('@domain.com') == False\n    assert candidate('test@.com') == False\n    assert candidate('test@domain.') == False"}
{"task_id": "ExtendedEval/so_json_flatten", "prompt": "Flatten a nested JSON/dict into a single-level dict with dot-separated keys.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "flatten_json", "canonical_solution": "def flatten_json(data, parent_key='', separator='.'):\n    items = {}\n    \n    if not isinstance(data, dict):\n        return {parent_key: data} if parent_key else {}\n    \n    for key, value in data.items():\n        new_key = parent_key + separator + key if parent_key else key\n        \n        if isinstance(value, dict):\n            items.update(flatten_json(value, new_key, separator))\n        else:\n            items[new_key] = value\n    \n    return items", "test": "def check(candidate):\n    assert candidate({'a': 1, 'b': {'c': 2, 'd': 3}}) == {'a': 1, 'b.c': 2, 'b.d': 3}\n    assert candidate({'x': {'y': {'z': 10}}}) == {'x.y.z': 10}\n    assert candidate({}) == {}\n    assert candidate({'a': 1}) == {'a': 1}\n    assert candidate({'a': {'b': {'c': {'d': 1}}}}) == {'a.b.c.d': 1}"}
{"task_id": "ExtendedEval/so_datetime_diff", "prompt": "Given two date strings in format YYYY-MM-DD, return the absolute difference in days.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "days_diff", "canonical_solution": "from datetime import datetime\n\ndef days_diff(date1, date2):\n    date_format = '%Y-%m-%d'\n    d1 = datetime.strptime(date1, date_format)\n    d2 = datetime.strptime(date2, date_format)\n    return abs((d1 - d2).days)", "test": "def check(candidate):\n    assert candidate('2023-01-01', '2023-01-10') == 9\n    assert candidate('2020-02-28', '2020-03-01') == 2\n    assert candidate('2023-12-31', '2024-01-01') == 1\n    assert candidate('2023-01-01', '2023-01-01') == 0\n    assert candidate('2024-02-29', '2024-03-01') == 1"}
{"task_id": "ExtendedEval/so_matrix_rotate", "prompt": "Rotate an n x n matrix 90 degrees clockwise in-place. Return the modified matrix.\n\nPlease return only the Python function code, without any explanations or additional text.", "entry_point": "rotate_matrix", "canonical_solution": "def rotate_matrix(matrix):\n    n = len(matrix)\n    \n    # Transpose the matrix\n    for i in range(n):\n        for j in range(i, n):\n            matrix[i][j], matrix[j][i] = matrix[j][i], matrix[i][j]\n    \n    # Reverse each row\n    for row in matrix:\n        row.reverse()\n    \n    return matrix", "test": "def check(candidate):\n    assert candidate([[1, 2], [3, 4]]) == [[3, 1], [4, 2]]\n    assert candidate([[1]]) == [[1]]\n    \n    matrix1 = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]\n    expected1 = [[7, 4, 1], [8, 5, 2], [9, 6, 3]]\n    assert candidate(matrix1) == expected1\n    \n    matrix2 = [[5, 1, 9, 11], [2, 4, 8, 10], [13, 3, 6, 7], [15, 14, 12, 16]]\n    expected2 = [[15, 13, 2, 5], [14, 3, 4, 1], [12, 6, 8, 9], [16, 7, 10, 11]]\n    assert candidate(matrix2) == expected2"}
{"task_id":"ExtendedEval/so_lru_cache","prompt":"Implement a simple LRU (least recently used) cache class with get(key) and put(key,value) methods. Capacity is fixed; evict least recently used item on overflow. Return only the Python function/class code, no explanations or test cases.","entry_point":"LRUCache","canonical_solution":"from collections import OrderedDict\n\nclass LRUCache:\n    def __init__(self, capacity):\n        self.cap = capacity\n        self.od = OrderedDict()\n    def get(self, key):\n        if key not in self.od:\n            return -1\n        val = self.od.pop(key)\n        self.od[key] = val\n        return val\n    def put(self, key, val):\n        if key in self.od:\n            self.od.pop(key)\n        elif len(self.od) >= self.cap:\n            self.od.popitem(last=False)\n        self.od[key] = val","test":[{"input":"[('put',1,1),('put',2,2),('get',1),('put',3,3),('get',2)]","output":"[None,None,1,None,-1]"}]}
{"task_id":"ExtendedEval/so_word_wrap","prompt":"Implement word wrap: given a string and max width, break into lines not exceeding width, breaking at spaces if possible. Return only the Python function code, no explanations or test cases.","entry_point":"word_wrap","canonical_solution":"def word_wrap(s, width):\n    words = s.split()\n    lines = []\n    cur = []\n    cur_len = 0\n    for w in words:\n        if cur_len + len(w) + (1 if cur else 0) <= width:\n            cur.append(w)\n            cur_len += len(w) + (1 if cur else 0)\n        else:\n            lines.append(' '.join(cur))\n            cur = [w]\n            cur_len = len(w)\n    if cur:\n        lines.append(' '.join(cur))\n    return lines","test":[{"input":"\"This is a test of wrapping\", 7","output":"['This is','a test','of','wrapping']"}]}
{"task_id":"ExtendedEval/so_csv_to_dict","prompt":"Parse a CSV header and one row into a dictionary mapping column names to values. Assume no quotes or commas inside values. Return only the Python function code, no explanations or test cases.","entry_point":"csv_to_dict","canonical_solution":"def csv_to_dict(header, row):\n    cols = header.split(',')\n    vals = row.split(',')\n    return dict(zip(cols, vals))","test":[{"input":"\"a,b,c\", \"1,2,3\"","output":"{'a':'1','b':'2','c':'3'}"}]}
{"task_id":"ExtendedEval/so_pascal_triangle","prompt":"Generate the first n rows of Pascal's Triangle as a list of lists. Return only the Python function code, no explanations or test cases.","entry_point":"pascal_triangle","canonical_solution":"def pascal_triangle(n):\n    res = []\n    for i in range(n):\n        row = [1]*(i+1)\n        for j in range(1,i):\n            row[j] = res[i-1][j-1] + res[i-1][j]\n        res.append(row)\n    return res","test":[{"input":"5","output":"[[1],[1,1],[1,2,1],[1,3,3,1],[1,4,6,4,1]]"}]}
{"task_id":"ExtendedEval/so_int_to_roman","prompt":"Convert an integer 1 <= n <= 3999 to Roman numeral string. Return only the Python function code, no explanations or test cases.","entry_point":"int_to_roman","canonical_solution":"def int_to_roman(num):\n    vals = [1000,900,500,400,100,90,50,40,10,9,5,4,1]\n    syms = [\"M\",\"CM\",\"D\",\"CD\",\"C\",\"XC\",\"L\",\"XL\",\"X\",\"IX\",\"V\",\"IV\",\"I\"]\n    res = []\n    for v,s in zip(vals,syms):\n        while num >= v:\n            res.append(s)\n            num -= v\n    return ''.join(res)","test":[{"input":"3","output":"\"III\""},{"input":"58","output":"\"LVIII\""},{"input":"1994","output":"\"MCMXCIV\""}]}
{"task_id":"ExtendedEval/so_prime_sieve","prompt":"Implement the Sieve of Eratosthenes: given n, return all primes <= n. Return only the Python function code, no explanations or test cases.","entry_point":"sieve_primes","canonical_solution":"def sieve_primes(n):\n    if n < 2:\n        return []\n    sieve = [True]*(n+1)\n    sieve[0]=sieve[1]=False\n    for i in range(2,int(n**0.5)+1):\n        if sieve[i]:\n            for j in range(i*i,n+1,i):\n                sieve[j]=False\n    return [i for i in range(2,n+1) if sieve[i]]","test":[{"input":"10","output":"[2,3,5,7]"},{"input":"1","output":"[]"}]}
{"task_id":"ExtendedEval/gh_env_parse","prompt":".env parser: Given a string with .env file contents, parse KEY=VALUE pairs. Ignore blank lines and lines starting with '#'. Trim surrounding spaces around keys and values. Values may be quoted with single or double quotes; if quoted, unquote without interpreting escapes. Return only the Python function code, no explanations or test cases.","entry_point":"parse_env","canonical_solution":"def parse_env(text):\n    def unquote(v):\n        v=v.strip()\n        if len(v)>=2 and ((v[0]==v[-1]==\"'\") or (v[0]==v[-1]=='\"')):\n            return v[1:-1]\n        return v\n    out={}\n    for line in text.splitlines():\n        line=line.strip()\n        if not line or line.startswith('#'):\n            continue\n        if '=' not in line:\n            continue\n        k,v=line.split('=',1)\n        k=k.strip()\n        v=unquote(v)\n        out[k]=v\n    return out","test":[{"input":"\"\"\"\n# comment\nUSER = 'alice'\nPORT=8080\nDEBUG = true\nEMPTY=\n\"\"\"","output":"{'USER':'alice','PORT':'8080','DEBUG':'true','EMPTY':''}"},{"input":"\"KEY=value\\nNAME=Bob\\n#X=1\"","output":"{'KEY':'value','NAME':'Bob'}"}]}
{"task_id":"ExtendedEval/gh_semver_compare","prompt":"Compare two semantic version strings MAJOR.MINOR.PATCH optionally with a prerelease suffix after '-'. Return 1 if v1>v2, -1 if v1<v2, else 0. Numeric parts compared numerically; a release without prerelease is greater than the same version with prerelease. Prerelease strings are compared lexicographically if needed. Return only the Python function code, no explanations or test cases.","entry_point":"semver_cmp","canonical_solution":"def semver_cmp(v1, v2):\n    def split(v):\n        core, *pre = v.split('-',1)\n        a=list(map(int, core.split('.')))\n        while len(a)<3: a.append(0)\n        p=pre[0] if pre else None\n        return a,p\n    a1,p1=split(v1); a2,p2=split(v2)\n    if a1!=a2:\n        return 1 if a1>a2 else -1\n    if p1==p2:\n        return 0\n    if p1 is None: return 1\n    if p2 is None: return -1\n    return (1 if p1>p2 else -1) if p1!=p2 else 0","test":[{"input":"\"1.2.3\",\"1.2.3\"","output":"0"},{"input":"\"1.2.3\",\"1.2.3-alpha\"","output":"1"},{"input":"\"1.10.0\",\"1.2.99\"","output":"1"},{"input":"\"2.0.0-alpha\",\"2.0.0-beta\"","output":"-1"}]}
{"task_id":"ExtendedEval/gh_markdown_toc","prompt":"Extract Markdown headings from text. Return a list of tuples (level, title) for lines starting with 1..6 '#' followed by a space. Do not strip inline hashes inside the title. Return only the Python function code, no explanations or test cases.","entry_point":"extract_headings","canonical_solution":"def extract_headings(md):\n    out=[]\n    for line in md.splitlines():\n        i=0\n        while i < len(line) and i<6 and line[i]=='#':\n            i+=1\n        if i and i<=6 and len(line)>i and line[i]==' ':\n            out.append((i, line[i+1:]))\n    return out","test":[{"input":"\"# Title\\ntext\\n## Section 1\\n### Sub ## part\\n####### not heading\"","output":"[(1,'Title'),(2,'Section 1'),(3,'Sub ## part')]"},{"input":"\"no headings\"","output":"[]"}]}
{"task_id":"ExtendedEval/gh_gitignore_match","prompt":"Implement a simple .gitignore matcher. Given a path and a list of glob patterns, return True if any pattern matches the path. Use Unix-style matching; treat patterns as matching anywhere (no directory anchoring). Return only the Python function code, no explanations or test cases.","entry_point":"is_ignored","canonical_solution":"import fnmatch\n\ndef is_ignored(path, patterns):\n    for pat in patterns:\n        if fnmatch.fnmatch(path, pat):\n            return True\n    return False","test":[{"input":"\"src/main.py\", ['*.py']","output":"True"},{"input":"\"build/output.bin\", ['build/*','*.log']","output":"True"},{"input":"\"docs/readme.md\", ['*.rst']","output":"False"}]}
{"task_id":"ExtendedEval/gh_url_join","prompt":"Join a base URL and a relative path like browsers do. Use Python's standard behavior to resolve '..' and absolute paths. Return only the Python function code, no explanations or test cases.","entry_point":"url_join","canonical_solution":"from urllib.parse import urljoin\n\ndef url_join(base, path):\n    return urljoin(base, path)","test":[{"input":"\"https://example.com/a/b/\",\"c\"","output":"\"https://example.com/a/b/c\""},{"input":"\"https://example.com/a/b/\",\"/x/y\"","output":"\"https://example.com/x/y\""},{"input":"\"https://example.com/a/b\",\"../c\"","output":"\"https://example.com/c\""}]}
{"task_id":"ExtendedEval/gh_json_minify","prompt":"Minify JSON-with-comments: remove // line comments and /* ... */ block comments, then strip whitespace around the result. Do not alter content inside string literals (double-quoted). Return only the Python function code, no explanations or test cases.","entry_point":"json_minify","canonical_solution":"def json_minify(s):\n    out=[]; i=0; n=len(s); in_str=False\n    while i<n:\n        ch=s[i]\n        if in_str:\n            out.append(ch)\n            if ch=='\\\\':\n                if i+1<n: out.append(s[i+1]); i+=2; continue\n            if ch=='\"': in_str=False\n            i+=1; continue\n        if ch=='\"':\n            in_str=True; out.append(ch); i+=1; continue\n        if ch=='/' and i+1<n and s[i+1]=='/':\n            i+=2\n            while i<n and s[i]!='\\n': i+=1\n            continue\n        if ch=='/' and i+1<n and s[i+1]=='*':\n            i+=2\n            while i+1<n and not (s[i]=='*' and s[i+1]=='/'): i+=1\n            i+=2; continue\n        out.append(ch); i+=1\n    return ''.join(out).strip()","test":[{"input":"\"{\\n  // comment\\n  \\\"a\\\": 1, /* block */ \\n  \\\"b\\\": \\\"/not comment/* ok */\\\"\\n}\"","output":"\"{\\n  \\n  \\\"a\\\": 1,  \\n  \\\"b\\\": \\\"/not comment/* ok */\\\"\\n}\""}]}
{"task_id":"ExtendedEval/gh_dedupe_paths_ci","prompt":"Given a list of file paths, remove duplicates case-insensitively (Windows-style), preserving the first occurrence order. Return only the Python function code, no explanations or test cases.","entry_point":"dedupe_paths_ci","canonical_solution":"def dedupe_paths_ci(paths):\n    seen=set(); out=[]\n    for p in paths:\n        k=p.lower()\n        if k not in seen:\n            seen.add(k); out.append(p)\n    return out","test":[{"input":"['Readme.md','README.md','src/app.py','Src/App.py']","output":"['Readme.md','src/app.py']"},{"input":"[]","output":"[]"}]}
{"task_id":"ExtendedEval/gh_license_detect_simple","prompt":"Detect a common license name in a text blob. Return 'MIT', 'Apache-2.0', 'GPL', or 'Unknown'. Heuristics: contains 'mit license' -> MIT; 'apache license' and '2.0' -> Apache-2.0; 'gnu general public license' -> GPL. Return only the Python function code, no explanations or test cases.","entry_point":"detect_license","canonical_solution":"def detect_license(text):\n    t=text.lower()\n    if 'mit license' in t:\n        return 'MIT'\n    if 'apache license' in t and '2.0' in t:\n        return 'Apache-2.0'\n    if 'gnu general public license' in t:\n        return 'GPL'\n    return 'Unknown'","test":[{"input":"\"Permission is hereby granted... MIT License\"","output":"\"MIT\""},{"input":"\"Apache License Version 2.0\"","output":"\"Apache-2.0\""},{"input":"\"Some random text\"","output":"\"Unknown\""}]}
{"task_id":"ExtendedEval/gh_changelog_latest","prompt":"Given a CHANGELOG string with lines like '## [X.Y.Z] - YYYY-MM-DD', return the latest version by semantic version precedence (ignore prerelease). If none found, return ''. Return only the Python function code, no explanations or test cases.","entry_point":"latest_changelog_version","canonical_solution":"def latest_changelog_version(text):\n    vers=[]\n    for line in text.splitlines():\n        line=line.strip()\n        if line.startswith('## [') and ']' in line:\n            v=line[4: line.find(']')]\n            parts=list(map(int,(v.split('.')+['0','0','0'])[:3]))\n            vers.append((parts, v))\n    if not vers:\n        return ''\n    return max(vers)[1]","test":[{"input":"\"## [1.0.0] - 2020-01-01\\n## [1.2.0] - 2021-01-01\\n## [1.1.9] - 2020-06-01\"","output":"\"1.2.0\""},{"input":"\"No versions here\"","output":"\"\""}]}
{"task_id":"ExtendedEval/gh_parse_github_slug","prompt":"Parse a GitHub URL of a repository, issue, or pull request and return a tuple (owner, repo, kind, id_or_None) where kind is 'repo', 'issue', or 'pull'. Accept https URLs with or without trailing slashes. Return only the Python function code, no explanations or test cases.","entry_point":"parse_github_url","canonical_solution":"import re\n\ndef parse_github_url(url):\n    m=re.match(r'^https?://github\\.com/([^/]+)/([^/]+)(?:/(issues|pull)/?(\\d+)?)?/?$', url)\n    if not m:\n        return ('','','',None)\n    owner, repo, kind, num = m.group(1), m.group(2), m.group(3) or 'repo', m.group(4)\n    return (owner, repo, kind, int(num) if num else None)","test":[{"input":"\"https://github.com/pallets/flask\"","output":"('pallets','flask','repo',None)"},{"input":"\"https://github.com/python/cpython/issues/12345\"","output":"('python','cpython','issues',12345)"},{"input":"\"https://github.com/psf/requests/pull/1/\"","output":"('psf','requests','pull',1)"}]}
{"task_id": "ExtendedEval/164", "prompt": "def is_valid_ipv4(ip: str) -> bool:\n    \"\"\"\n    Check if a string is a valid IPv4 address.\n    Valid format: four integers 0-255 separated by dots, no leading zeros.\n    \n    Examples:\n    is_valid_ipv4('192.168.1.1') => True\n    is_valid_ipv4('256.1.1.1') => False  \n    is_valid_ipv4('01.1.1.1') => False\n    is_valid_ipv4('1.1.1') => False\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "is_valid_ipv4", "canonical_solution": "def is_valid_ipv4(ip: str) -> bool:\n    parts = ip.split('.')\n    if len(parts) != 4:\n        return False\n    \n    for part in parts:\n        if not part or (part[0] == '0' and len(part) > 1):\n            return False\n        if not part.isdigit():\n            return False\n        if int(part) > 255:\n            return False\n    \n    return True", "test": "def check(candidate):\n    assert candidate('192.168.1.1') == True\n    assert candidate('0.0.0.0') == True\n    assert candidate('255.255.255.255') == True\n    assert candidate('256.1.1.1') == False\n    assert candidate('01.1.1.1') == False\n    assert candidate('1.1.1') == False\n    assert candidate('1.1.1.1.1') == False\n    assert candidate('a.1.1.1') == False\n    assert candidate('1..1.1') == False\n    assert candidate('') == False"}
{"task_id": "ExtendedEval/166", "prompt": "def longest_common_subsequence(text1: str, text2: str) -> int:\n    \"\"\"\n    Find the length of the longest common subsequence between two strings.\n    A subsequence is derived by deleting some or no elements without changing\n    the order of remaining elements.\n    \n    Examples:\n    longest_common_subsequence('abcde', 'ace') => 3\n    longest_common_subsequence('abc', 'abc') => 3\n    longest_common_subsequence('abc', 'def') => 0\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "longest_common_subsequence", "canonical_solution": "def longest_common_subsequence(text1: str, text2: str) -> int:\n    m, n = len(text1), len(text2)\n    dp = [[0] * (n + 1) for _ in range(m + 1)]\n    \n    for i in range(1, m + 1):\n        for j in range(1, n + 1):\n            if text1[i-1] == text2[j-1]:\n                dp[i][j] = dp[i-1][j-1] + 1\n            else:\n                dp[i][j] = max(dp[i-1][j], dp[i][j-1])\n    \n    return dp[m][n]", "test": "def check(candidate):\n    assert candidate('abcde', 'ace') == 3\n    assert candidate('abc', 'abc') == 3\n    assert candidate('abc', 'def') == 0\n    assert candidate('', 'abc') == 0\n    assert candidate('abc', '') == 0\n    assert candidate('bl', 'yby') == 1\n    assert candidate('ABCDGH', 'AEDFHR') == 3"}
{"task_id": "ExtendedEval/167", "prompt": "def edit_distance(word1: str, word2: str) -> int:\n    \"\"\"\n    Calculate the minimum edit distance (Levenshtein distance) between two words.\n    Operations allowed: insert, delete, replace a character.\n    \n    Examples:\n    edit_distance('horse', 'ros') => 3\n    edit_distance('intention', 'execution') => 5\n    edit_distance('', 'abc') => 3\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "edit_distance", "canonical_solution": "def edit_distance(word1: str, word2: str) -> int:\n    m, n = len(word1), len(word2)\n    dp = [[0] * (n + 1) for _ in range(m + 1)]\n    \n    # Initialize base cases\n    for i in range(m + 1):\n        dp[i][0] = i\n    for j in range(n + 1):\n        dp[0][j] = j\n    \n    for i in range(1, m + 1):\n        for j in range(1, n + 1):\n            if word1[i-1] == word2[j-1]:\n                dp[i][j] = dp[i-1][j-1]\n            else:\n                dp[i][j] = 1 + min(\n                    dp[i-1][j],    # delete\n                    dp[i][j-1],    # insert\n                    dp[i-1][j-1]   # replace\n                )\n    \n    return dp[m][n]", "test": "def check(candidate):\n    assert candidate('horse', 'ros') == 3\n    assert candidate('intention', 'execution') == 5\n    assert candidate('', 'abc') == 3\n    assert candidate('abc', '') == 3\n    assert candidate('same', 'same') == 0\n    assert candidate('a', 'b') == 1\n    assert candidate('abc', 'yabd') == 2"}
{"task_id": "ExtendedEval/168", "prompt": "def count_islands(grid: list) -> int:\n    \"\"\"\n    Count the number of islands in a 2D binary grid.\n    An island is surrounded by water and formed by connecting adjacent\n    lands horizontally or vertically (not diagonally).\n    \n    Examples:\n    count_islands([['1','1','0','0','0'],\n                   ['1','1','0','0','0'],\n                   ['0','0','1','0','0'],\n                   ['0','0','0','1','1']]) => 3\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "count_islands", "canonical_solution": "def count_islands(grid: list) -> int:\n    if not grid or not grid[0]:\n        return 0\n    \n    rows, cols = len(grid), len(grid[0])\n    islands = 0\n    \n    def dfs(i, j):\n        if i < 0 or i >= rows or j < 0 or j >= cols or grid[i][j] == '0':\n            return\n        \n        grid[i][j] = '0'  # mark as visited\n        \n        # Check all 4 directions\n        dfs(i + 1, j)\n        dfs(i - 1, j)\n        dfs(i, j + 1)\n        dfs(i, j - 1)\n    \n    for i in range(rows):\n        for j in range(cols):\n            if grid[i][j] == '1':\n                islands += 1\n                dfs(i, j)\n    \n    return islands", "test": "def check(candidate):\n    grid1 = [['1','1','1','1','0'],\n             ['1','1','0','1','0'],\n             ['1','1','0','0','0'],\n             ['0','0','0','0','0']]\n    assert candidate(grid1) == 1\n    \n    grid2 = [['1','1','0','0','0'],\n             ['1','1','0','0','0'],\n             ['0','0','1','0','0'],\n             ['0','0','0','1','1']]\n    assert candidate(grid2) == 3\n    \n    assert candidate([]) == 0\n    assert candidate([['0','0'],['0','0']]) == 0\n    assert candidate([['1']]) == 1"}
{"task_id": "ExtendedEval/170", "prompt": "def binary_search_insert(nums: list, target: int) -> int:\n    \"\"\"\n    Find the index where target should be inserted in sorted array to maintain order.\n    If target exists, return the index of first occurrence.\n    \n    Examples:\n    binary_search_insert([1,3,5,6], 5) => 2\n    binary_search_insert([1,3,5,6], 2) => 1\n    binary_search_insert([1,3,5,6], 7) => 4\n    binary_search_insert([1,3,5,6], 0) => 0\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "binary_search_insert", "canonical_solution": "def binary_search_insert(nums: list, target: int) -> int:\n    left, right = 0, len(nums)\n    \n    while left < right:\n        mid = (left + right) // 2\n        if nums[mid] < target:\n            left = mid + 1\n        else:\n            right = mid\n    \n    return left", "test": "def check(candidate):\n    assert candidate([1,3,5,6], 5) == 2\n    assert candidate([1,3,5,6], 2) == 1\n    assert candidate([1,3,5,6], 7) == 4\n    assert candidate([1,3,5,6], 0) == 0\n    assert candidate([], 1) == 0\n    assert candidate([1], 0) == 0\n    assert candidate([1], 2) == 1\n    assert candidate([1,1,1], 1) == 0"}
{"task_id": "ExtendedEval/171", "prompt": "def merge_k_sorted_lists(lists: list) -> list:\n    \"\"\"\n    Merge k sorted lists into one sorted list.\n    Each list is sorted in ascending order.\n    \n    Examples:\n    merge_k_sorted_lists([[1,4,5],[1,3,4],[2,6]]) => [1,1,2,3,4,4,5,6]\n    merge_k_sorted_lists([]) => []\n    merge_k_sorted_lists([[]]) => []\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "merge_k_sorted_lists", "canonical_solution": "def merge_k_sorted_lists(lists: list) -> list:\n    import heapq\n    \n    if not lists:\n        return []\n    \n    heap = []\n    \n    # Initialize heap with first element from each list\n    for i, lst in enumerate(lists):\n        if lst:\n            heapq.heappush(heap, (lst[0], i, 0))  # (value, list_index, element_index)\n    \n    result = []\n    \n    while heap:\n        val, list_idx, elem_idx = heapq.heappop(heap)\n        result.append(val)\n        \n        # Add next element from the same list\n        if elem_idx + 1 < len(lists[list_idx]):\n            next_val = lists[list_idx][elem_idx + 1]\n            heapq.heappush(heap, (next_val, list_idx, elem_idx + 1))\n    \n    return result", "test": "def check(candidate):\n    assert candidate([[1,4,5],[1,3,4],[2,6]]) == [1,1,2,3,4,4,5,6]\n    assert candidate([]) == []\n    assert candidate([[]]) == []\n    assert candidate([[1],[2],[3]]) == [1,2,3]\n    assert candidate([[1,2,3]]) == [1,2,3]\n    assert candidate([[-1,0,1],[2,3,4]]) == [-1,0,1,2,3,4]"}
{"task_id": "ExtendedEval/172", "prompt": "def sliding_window_maximum(nums: list, k: int) -> list:\n    \"\"\"\n    Find the maximum in each sliding window of size k.\n    \n    Examples:\n    sliding_window_maximum([1,3,-1,-3,5,3,6,7], 3) => [3,3,5,5,6,7]\n    sliding_window_maximum([1], 1) => [1]\n    sliding_window_maximum([1,-1], 1) => [1,-1]\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "sliding_window_maximum", "canonical_solution": "def sliding_window_maximum(nums: list, k: int) -> list:\n    from collections import deque\n    \n    if not nums or k == 0:\n        return []\n    \n    if k == 1:\n        return nums\n    \n    dq = deque()  # stores indices\n    result = []\n    \n    for i in range(len(nums)):\n        # Remove indices outside current window\n        while dq and dq[0] <= i - k:\n            dq.popleft()\n        \n        # Remove indices of smaller elements\n        while dq and nums[dq[-1]] < nums[i]:\n            dq.pop()\n        \n        dq.append(i)\n        \n        # Add maximum to result if window is complete\n        if i >= k - 1:\n            result.append(nums[dq[0]])\n    \n    return result", "test": "def check(candidate):\n    assert candidate([1,3,-1,-3,5,3,6,7], 3) == [3,3,5,5,6,7]\n    assert candidate([1], 1) == [1]\n    assert candidate([1,-1], 1) == [1,-1]\n    assert candidate([9,11], 2) == [11]\n    assert candidate([4,-2], 2) == [4]\n    assert candidate([1,2,3,4], 2) == [2,3,4]"}
{"task_id": "ExtendedEval/173", "prompt": "def longest_increasing_subsequence(nums: list) -> int:\n    \"\"\"\n    Find the length of the longest strictly increasing subsequence.\n    \n    Examples:\n    longest_increasing_subsequence([10,9,2,5,3,7,101,18]) => 4\n    longest_increasing_subsequence([0,1,0,3,2,3]) => 4\n    longest_increasing_subsequence([7,7,7,7,7,7,7]) => 1\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "longest_increasing_subsequence", "canonical_solution": "def longest_increasing_subsequence(nums: list) -> int:\n    if not nums:\n        return 0\n    \n    # Binary search approach - O(n log n)\n    import bisect\n    \n    tails = []\n    \n    for num in nums:\n        pos = bisect.bisect_left(tails, num)\n        if pos == len(tails):\n            tails.append(num)\n        else:\n            tails[pos] = num\n    \n    return len(tails)", "test": "def check(candidate):\n    assert candidate([10,9,2,5,3,7,101,18]) == 4\n    assert candidate([0,1,0,3,2,3]) == 4\n    assert candidate([7,7,7,7,7,7,7]) == 1\n    assert candidate([]) == 0\n    assert candidate([1,3,6,7,9,4,10,5,6]) == 6\n    assert candidate([10,22,9,33,21,50,41,60]) == 5"}
{"task_id": "ExtendedEval/174", "prompt": "def find_median_sorted_arrays(nums1: list, nums2: list) -> float:\n    \"\"\"\n    Find the median of two sorted arrays in O(log(min(m,n))) time.\n    \n    Examples:\n    find_median_sorted_arrays([1,3], [2]) => 2.0\n    find_median_sorted_arrays([1,2], [3,4]) => 2.5\n    find_median_sorted_arrays([0,0], [0,0]) => 0.0\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "find_median_sorted_arrays", "canonical_solution": "def find_median_sorted_arrays(nums1: list, nums2: list) -> float:\n    # Ensure nums1 is the smaller array\n    if len(nums1) > len(nums2):\n        nums1, nums2 = nums2, nums1\n    \n    m, n = len(nums1), len(nums2)\n    \n    left, right = 0, m\n    \n    while left <= right:\n        partition1 = (left + right) // 2\n        partition2 = (m + n + 1) // 2 - partition1\n        \n        maxLeft1 = float('-inf') if partition1 == 0 else nums1[partition1 - 1]\n        maxLeft2 = float('-inf') if partition2 == 0 else nums2[partition2 - 1]\n        \n        minRight1 = float('inf') if partition1 == m else nums1[partition1]\n        minRight2 = float('inf') if partition2 == n else nums2[partition2]\n        \n        if maxLeft1 <= minRight2 and maxLeft2 <= minRight1:\n            if (m + n) % 2 == 0:\n                return (max(maxLeft1, maxLeft2) + min(minRight1, minRight2)) / 2.0\n            else:\n                return max(maxLeft1, maxLeft2)\n        \n        elif maxLeft1 > minRight2:\n            right = partition1 - 1\n        else:\n            left = partition1 + 1", "test": "def check(candidate):\n    assert candidate([1,3], [2]) == 2.0\n    assert candidate([1,2], [3,4]) == 2.5\n    assert candidate([0,0], [0,0]) == 0.0\n    assert candidate([], [1]) == 1.0\n    assert candidate([2], []) == 2.0\n    assert candidate([1,3], [2,4]) == 2.5\n    assert candidate([1,2,3], [4,5,6]) == 3.5"}
{"task_id": "ExtendedEval/175", "prompt": "def regular_expression_match(s: str, p: str) -> bool:\n    \"\"\"\n    Implement regular expression matching with '.' and '*'.\n    '.' matches any single character\n    '*' matches zero or more of the preceding element\n    \n    Examples:\n    regular_expression_match('aa', 'a') => False\n    regular_expression_match('aa', 'a*') => True\n    regular_expression_match('ab', '.*') => True\n    regular_expression_match('aab', 'c*a*b') => True\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "regular_expression_match", "canonical_solution": "def regular_expression_match(s: str, p: str) -> bool:\n    dp = {}\n    \n    def dfs(i, j):\n        if (i, j) in dp:\n            return dp[(i, j)]\n        \n        if j == len(p):\n            return i == len(s)\n        \n        first_match = i < len(s) and (p[j] == s[i] or p[j] == '.')\n        \n        if j + 1 < len(p) and p[j + 1] == '*':\n            result = (dfs(i, j + 2) or  # match 0 occurrences\n                     (first_match and dfs(i + 1, j)))  # match 1+ occurrences\n        else:\n            result = first_match and dfs(i + 1, j + 1)\n        \n        dp[(i, j)] = result\n        return result\n    \n    return dfs(0, 0)", "test": "def check(candidate):\n    assert candidate('aa', 'a') == False\n    assert candidate('aa', 'a*') == True\n    assert candidate('ab', '.*') == True\n    assert candidate('aab', 'c*a*b') == True\n    assert candidate('mississippi', 'mis*is*p*.') == False\n    assert candidate('', '') == True\n    assert candidate('', 'a*') == True\n    assert candidate('a', 'ab*') == True"}
{"task_id": "ExtendedEval/164", "prompt": "def is_valid_ipv4(ip: str) -> bool:\n    \"\"\"\n    Check if a string is a valid IPv4 address.\n    Valid format: four integers 0-255 separated by dots, no leading zeros.\n    \n    Examples:\n    is_valid_ipv4('192.168.1.1') => True\n    is_valid_ipv4('256.1.1.1') => False  \n    is_valid_ipv4('01.1.1.1') => False\n    is_valid_ipv4('1.1.1') => False\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "is_valid_ipv4", "canonical_solution": "def is_valid_ipv4(ip: str) -> bool:\n    parts = ip.split('.')\n    if len(parts) != 4:\n        return False\n    \n    for part in parts:\n        if not part or (part[0] == '0' and len(part) > 1):\n            return False\n        if not part.isdigit():\n            return False\n        if int(part) > 255:\n            return False\n    \n    return True", "test": "def check(candidate):\n    assert candidate('192.168.1.1') == True\n    assert candidate('0.0.0.0') == True\n    assert candidate('255.255.255.255') == True\n    assert candidate('256.1.1.1') == False\n    assert candidate('01.1.1.1') == False\n    assert candidate('1.1.1') == False\n    assert candidate('1.1.1.1.1') == False\n    assert candidate('a.1.1.1') == False\n    assert candidate('1..1.1') == False\n    assert candidate('') == False"}
{"task_id": "ExtendedEval/165", "prompt": "def knapsack_01(weights: list, values: list, capacity: int) -> int:\n    \"\"\"\n    Solve the 0/1 knapsack problem using dynamic programming.\n    Return the maximum value that can be obtained.\n    \n    Args:\n    - weights: list of item weights\n    - values: list of item values  \n    - capacity: knapsack capacity\n    \n    Examples:\n    knapsack_01([1, 2, 3], [60, 100, 120], 5) => 220\n    knapsack_01([10, 20, 30], [60, 100, 120], 50) => 220\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "knapsack_01", "canonical_solution": "def knapsack_01(weights: list, values: list, capacity: int) -> int:\n    n = len(weights)\n    dp = [[0] * (capacity + 1) for _ in range(n + 1)]\n    \n    for i in range(1, n + 1):\n        for w in range(capacity + 1):\n            if weights[i-1] <= w:\n                dp[i][w] = max(\n                    dp[i-1][w],  # don't take item\n                    dp[i-1][w - weights[i-1]] + values[i-1]  # take item\n                )\n            else:\n                dp[i][w] = dp[i-1][w]\n    \n    return dp[n][capacity]", "test": "def check(candidate):\n    assert candidate([1, 2, 3], [60, 100, 120], 5) == 220\n    assert candidate([10, 20, 30], [60, 100, 120], 50) == 220\n    assert candidate([2, 3, 4, 5], [3, 4, 5, 6], 5) == 7\n    assert candidate([1], [10], 1) == 10\n    assert candidate([1], [10], 0) == 0\n    assert candidate([], [], 10) == 0"}
{"task_id": "ExtendedEval/166", "prompt": "def longest_common_subsequence(text1: str, text2: str) -> int:\n    \"\"\"\n    Find the length of the longest common subsequence between two strings.\n    A subsequence is derived by deleting some or no elements without changing\n    the order of remaining elements.\n    \n    Examples:\n    longest_common_subsequence('abcde', 'ace') => 3\n    longest_common_subsequence('abc', 'abc') => 3\n    longest_common_subsequence('abc', 'def') => 0\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "longest_common_subsequence", "canonical_solution": "def longest_common_subsequence(text1: str, text2: str) -> int:\n    m, n = len(text1), len(text2)\n    dp = [[0] * (n + 1) for _ in range(m + 1)]\n    \n    for i in range(1, m + 1):\n        for j in range(1, n + 1):\n            if text1[i-1] == text2[j-1]:\n                dp[i][j] = dp[i-1][j-1] + 1\n            else:\n                dp[i][j] = max(dp[i-1][j], dp[i][j-1])\n    \n    return dp[m][n]", "test": "def check(candidate):\n    assert candidate('abcde', 'ace') == 3\n    assert candidate('abc', 'abc') == 3\n    assert candidate('abc', 'def') == 0\n    assert candidate('', 'abc') == 0\n    assert candidate('abc', '') == 0\n    assert candidate('bl', 'yby') == 1\n    assert candidate('ABCDGH', 'AEDFHR') == 3"}
{"task_id": "ExtendedEval/167", "prompt": "def edit_distance(word1: str, word2: str) -> int:\n    \"\"\"\n    Calculate the minimum edit distance (Levenshtein distance) between two words.\n    Operations allowed: insert, delete, replace a character.\n    \n    Examples:\n    edit_distance('horse', 'ros') => 3\n    edit_distance('intention', 'execution') => 5\n    edit_distance('', 'abc') => 3\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "edit_distance", "canonical_solution": "def edit_distance(word1: str, word2: str) -> int:\n    m, n = len(word1), len(word2)\n    dp = [[0] * (n + 1) for _ in range(m + 1)]\n    \n    # Initialize base cases\n    for i in range(m + 1):\n        dp[i][0] = i\n    for j in range(n + 1):\n        dp[0][j] = j\n    \n    for i in range(1, m + 1):\n        for j in range(1, n + 1):\n            if word1[i-1] == word2[j-1]:\n                dp[i][j] = dp[i-1][j-1]\n            else:\n                dp[i][j] = 1 + min(\n                    dp[i-1][j],    # delete\n                    dp[i][j-1],    # insert\n                    dp[i-1][j-1]   # replace\n                )\n    \n    return dp[m][n]", "test": "def check(candidate):\n    assert candidate('horse', 'ros') == 3\n    assert candidate('intention', 'execution') == 5\n    assert candidate('', 'abc') == 3\n    assert candidate('abc', '') == 3\n    assert candidate('same', 'same') == 0\n    assert candidate('a', 'b') == 1\n    assert candidate('abc', 'yabd') == 2"}
{"task_id": "ExtendedEval/168", "prompt": "def count_islands(grid: list) -> int:\n    \"\"\"\n    Count the number of islands in a 2D binary grid.\n    An island is surrounded by water and formed by connecting adjacent\n    lands horizontally or vertically (not diagonally).\n    \n    Examples:\n    count_islands([['1','1','0','0','0'],\n                   ['1','1','0','0','0'],\n                   ['0','0','1','0','0'],\n                   ['0','0','0','1','1']]) => 3\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "count_islands", "canonical_solution": "def count_islands(grid: list) -> int:\n    if not grid or not grid[0]:\n        return 0\n    \n    rows, cols = len(grid), len(grid[0])\n    islands = 0\n    \n    def dfs(i, j):\n        if i < 0 or i >= rows or j < 0 or j >= cols or grid[i][j] == '0':\n            return\n        \n        grid[i][j] = '0'  # mark as visited\n        \n        # Check all 4 directions\n        dfs(i + 1, j)\n        dfs(i - 1, j)\n        dfs(i, j + 1)\n        dfs(i, j - 1)\n    \n    for i in range(rows):\n        for j in range(cols):\n            if grid[i][j] == '1':\n                islands += 1\n                dfs(i, j)\n    \n    return islands", "test": "def check(candidate):\n    grid1 = [['1','1','1','1','0'],\n             ['1','1','0','1','0'],\n             ['1','1','0','0','0'],\n             ['0','0','0','0','0']]\n    assert candidate(grid1) == 1\n    \n    grid2 = [['1','1','0','0','0'],\n             ['1','1','0','0','0'],\n             ['0','0','1','0','0'],\n             ['0','0','0','1','1']]\n    assert candidate(grid2) == 3\n    \n    assert candidate([]) == 0\n    assert candidate([['0','0'],['0','0']]) == 0\n    assert candidate([['1']]) == 1"}
{"task_id": "ExtendedEval/169", "prompt": "def valid_parentheses_stack(s: str) -> bool:\n    \"\"\"\n    Check if string of parentheses is valid using stack approach.\n    Valid means: (), [], {} are properly nested and matched.\n    \n    Examples:\n    valid_parentheses_stack('()[]{}') => True\n    valid_parentheses_stack('([)]') => False\n    valid_parentheses_stack('{[()]}') => True\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "valid_parentheses_stack", "canonical_solution": "def valid_parentheses_stack(s: str) -> bool:\n    stack = []\n    mapping = {')': '(', ']': '[', '}': '{'}\n    \n    for char in s:\n        if char in mapping:\n            if not stack or stack.pop() != mapping[char]:\n                return False\n        else:\n            stack.append(char)\n    \n    return len(stack) == 0", "test": "def check(candidate):\n    assert candidate('()[]{}') == True\n    assert candidate('([)]') == False\n    assert candidate('{[()]}') == True\n    assert candidate('') == True\n    assert candidate('(') == False\n    assert candidate(')') == False\n    assert candidate('((()))') == True\n    assert candidate('({[]})') == True"}
{"task_id": "ExtendedEval/170", "prompt": "def binary_search_insert(nums: list, target: int) -> int:\n    \"\"\"\n    Find the index where target should be inserted in sorted array to maintain order.\n    If target exists, return the index of first occurrence.\n    \n    Examples:\n    binary_search_insert([1,3,5,6], 5) => 2\n    binary_search_insert([1,3,5,6], 2) => 1\n    binary_search_insert([1,3,5,6], 7) => 4\n    binary_search_insert([1,3,5,6], 0) => 0\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "binary_search_insert", "canonical_solution": "def binary_search_insert(nums: list, target: int) -> int:\n    left, right = 0, len(nums)\n    \n    while left < right:\n        mid = (left + right) // 2\n        if nums[mid] < target:\n            left = mid + 1\n        else:\n            right = mid\n    \n    return left", "test": "def check(candidate):\n    assert candidate([1,3,5,6], 5) == 2\n    assert candidate([1,3,5,6], 2) == 1\n    assert candidate([1,3,5,6], 7) == 4\n    assert candidate([1,3,5,6], 0) == 0\n    assert candidate([], 1) == 0\n    assert candidate([1], 0) == 0\n    assert candidate([1], 2) == 1\n    assert candidate([1,1,1], 1) == 0"}
{"task_id": "ExtendedEval/171", "prompt": "def merge_k_sorted_lists(lists: list) -> list:\n    \"\"\"\n    Merge k sorted lists into one sorted list.\n    Each list is sorted in ascending order.\n    \n    Examples:\n    merge_k_sorted_lists([[1,4,5],[1,3,4],[2,6]]) => [1,1,2,3,4,4,5,6]\n    merge_k_sorted_lists([]) => []\n    merge_k_sorted_lists([[]]) => []\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "merge_k_sorted_lists", "canonical_solution": "def merge_k_sorted_lists(lists: list) -> list:\n    import heapq\n    \n    if not lists:\n        return []\n    \n    heap = []\n    \n    # Initialize heap with first element from each list\n    for i, lst in enumerate(lists):\n        if lst:\n            heapq.heappush(heap, (lst[0], i, 0))  # (value, list_index, element_index)\n    \n    result = []\n    \n    while heap:\n        val, list_idx, elem_idx = heapq.heappop(heap)\n        result.append(val)\n        \n        # Add next element from the same list\n        if elem_idx + 1 < len(lists[list_idx]):\n            next_val = lists[list_idx][elem_idx + 1]\n            heapq.heappush(heap, (next_val, list_idx, elem_idx + 1))\n    \n    return result", "test": "def check(candidate):\n    assert candidate([[1,4,5],[1,3,4],[2,6]]) == [1,1,2,3,4,4,5,6]\n    assert candidate([]) == []\n    assert candidate([[]]) == []\n    assert candidate([[1],[2],[3]]) == [1,2,3]\n    assert candidate([[1,2,3]]) == [1,2,3]\n    assert candidate([[-1,0,1],[2,3,4]]) == [-1,0,1,2,3,4]"}
{"task_id": "ExtendedEval/172", "prompt": "def sliding_window_maximum(nums: list, k: int) -> list:\n    \"\"\"\n    Find the maximum in each sliding window of size k.\n    \n    Examples:\n    sliding_window_maximum([1,3,-1,-3,5,3,6,7], 3) => [3,3,5,5,6,7]\n    sliding_window_maximum([1], 1) => [1]\n    sliding_window_maximum([1,-1], 1) => [1,-1]\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "sliding_window_maximum", "canonical_solution": "def sliding_window_maximum(nums: list, k: int) -> list:\n    from collections import deque\n    \n    if not nums or k == 0:\n        return []\n    \n    if k == 1:\n        return nums\n    \n    dq = deque()  # stores indices\n    result = []\n    \n    for i in range(len(nums)):\n        # Remove indices outside current window\n        while dq and dq[0] <= i - k:\n            dq.popleft()\n        \n        # Remove indices of smaller elements\n        while dq and nums[dq[-1]] < nums[i]:\n            dq.pop()\n        \n        dq.append(i)\n        \n        # Add maximum to result if window is complete\n        if i >= k - 1:\n            result.append(nums[dq[0]])\n    \n    return result", "test": "def check(candidate):\n    assert candidate([1,3,-1,-3,5,3,6,7], 3) == [3,3,5,5,6,7]\n    assert candidate([1], 1) == [1]\n    assert candidate([1,-1], 1) == [1,-1]\n    assert candidate([9,11], 2) == [11]\n    assert candidate([4,-2], 2) == [4]\n    assert candidate([1,2,3,4], 2) == [2,3,4]"}
{"task_id": "ExtendedEval/173", "prompt": "def longest_increasing_subsequence(nums: list) -> int:\n    \"\"\"\n    Find the length of the longest strictly increasing subsequence.\n    \n    Examples:\n    longest_increasing_subsequence([10,9,2,5,3,7,101,18]) => 4\n    longest_increasing_subsequence([0,1,0,3,2,3]) => 4\n    longest_increasing_subsequence([7,7,7,7,7,7,7]) => 1\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "longest_increasing_subsequence", "canonical_solution": "def longest_increasing_subsequence(nums: list) -> int:\n    if not nums:\n        return 0\n    \n    # Binary search approach - O(n log n)\n    import bisect\n    \n    tails = []\n    \n    for num in nums:\n        pos = bisect.bisect_left(tails, num)\n        if pos == len(tails):\n            tails.append(num)\n        else:\n            tails[pos] = num\n    \n    return len(tails)", "test": "def check(candidate):\n    assert candidate([10,9,2,5,3,7,101,18]) == 4\n    assert candidate([0,1,0,3,2,3]) == 4\n    assert candidate([7,7,7,7,7,7,7]) == 1\n    assert candidate([]) == 0\n    assert candidate([1,3,6,7,9,4,10,5,6]) == 6\n    assert candidate([10,22,9,33,21,50,41,60]) == 5"}
{"task_id": "ExtendedEval/174", "prompt": "def find_median_sorted_arrays(nums1: list, nums2: list) -> float:\n    \"\"\"\n    Find the median of two sorted arrays in O(log(min(m,n))) time.\n    \n    Examples:\n    find_median_sorted_arrays([1,3], [2]) => 2.0\n    find_median_sorted_arrays([1,2], [3,4]) => 2.5\n    find_median_sorted_arrays([0,0], [0,0]) => 0.0\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "find_median_sorted_arrays", "canonical_solution": "def find_median_sorted_arrays(nums1: list, nums2: list) -> float:\n    # Ensure nums1 is the smaller array\n    if len(nums1) > len(nums2):\n        nums1, nums2 = nums2, nums1\n    \n    m, n = len(nums1), len(nums2)\n    \n    left, right = 0, m\n    \n    while left <= right:\n        partition1 = (left + right) // 2\n        partition2 = (m + n + 1) // 2 - partition1\n        \n        maxLeft1 = float('-inf') if partition1 == 0 else nums1[partition1 - 1]\n        maxLeft2 = float('-inf') if partition2 == 0 else nums2[partition2 - 1]\n        \n        minRight1 = float('inf') if partition1 == m else nums1[partition1]\n        minRight2 = float('inf') if partition2 == n else nums2[partition2]\n        \n        if maxLeft1 <= minRight2 and maxLeft2 <= minRight1:\n            if (m + n) % 2 == 0:\n                return (max(maxLeft1, maxLeft2) + min(minRight1, minRight2)) / 2.0\n            else:\n                return max(maxLeft1, maxLeft2)\n        \n        elif maxLeft1 > minRight2:\n            right = partition1 - 1\n        else:\n            left = partition1 + 1", "test": "def check(candidate):\n    assert candidate([1,3], [2]) == 2.0\n    assert candidate([1,2], [3,4]) == 2.5\n    assert candidate([0,0], [0,0]) == 0.0\n    assert candidate([], [1]) == 1.0\n    assert candidate([2], []) == 2.0\n    assert candidate([1,3], [2,4]) == 2.5\n    assert candidate([1,2,3], [4,5,6]) == 3.5"}
{"task_id": "ExtendedEval/175", "prompt": "def regular_expression_match(s: str, p: str) -> bool:\n    \"\"\"\n    Implement regular expression matching with '.' and '*'.\n    '.' matches any single character\n    '*' matches zero or more of the preceding element\n    \n    Examples:\n    regular_expression_match('aa', 'a') => False\n    regular_expression_match('aa', 'a*') => True\n    regular_expression_match('ab', '.*') => True\n    regular_expression_match('aab', 'c*a*b') => True\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "regular_expression_match", "canonical_solution": "def regular_expression_match(s: str, p: str) -> bool:\n    dp = {}\n    \n    def dfs(i, j):\n        if (i, j) in dp:\n            return dp[(i, j)]\n        \n        if j == len(p):\n            return i == len(s)\n        \n        first_match = i < len(s) and (p[j] == s[i] or p[j] == '.')\n        \n        if j + 1 < len(p) and p[j + 1] == '*':\n            result = (dfs(i, j + 2) or  # match 0 occurrences\n                     (first_match and dfs(i + 1, j)))  # match 1+ occurrences\n        else:\n            result = first_match and dfs(i + 1, j + 1)\n        \n        dp[(i, j)] = result\n        return result\n    \n    return dfs(0, 0)", "test": "def check(candidate):\n    assert candidate('aa', 'a') == False\n    assert candidate('aa', 'a*') == True\n    assert candidate('ab', '.*') == True\n    assert candidate('aab', 'c*a*b') == True\n    assert candidate('mississippi', 'mis*is*p*.') == False\n    assert candidate('', '') == True\n    assert candidate('', 'a*') == True\n    assert candidate('a', 'ab*') == True"}
{"task_id": "ExtendedEval/176", "prompt": "def serialize_deserialize_tree(root) -> str:\n    \"\"\"\n    Design an algorithm to serialize and deserialize a binary tree.\n    Represent tree as: TreeNode with val, left, right attributes.\n    Return serialized string that can uniquely reconstruct the tree.\n    \n    Use format: \"1,2,None,None,3,4,None,None,5,None,None\"\n    None represents null nodes in preorder traversal.\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "serialize_deserialize_tree", "canonical_solution": "class TreeNode:\n    def __init__(self, val=0, left=None, right=None):\n        self.val = val\n        self.left = left\n        self.right = right\n\ndef serialize(root):\n    def preorder(node):\n        if not node:\n            vals.append(\"None\")\n        else:\n            vals.append(str(node.val))\n            preorder(node.left)\n            preorder(node.right)\n    \n    vals = []\n    preorder(root)\n    return ','.join(vals)\n\ndef deserialize(data):\n    def build_tree():\n        val = next(vals)\n        if val == \"None\":\n            return None\n        \n        node = TreeNode(int(val))\n        node.left = build_tree()\n        node.right = build_tree()\n        return node\n    \n    vals = iter(data.split(','))\n    return build_tree()\n\ndef serialize_deserialize_tree(root):\n    serialized = serialize(root)\n    deserialized = deserialize(serialized)\n    return serialized", "test": "def check(candidate):\n    # Test with simple tree\n    root = TreeNode(1)\n    root.left = TreeNode(2)\n    root.right = TreeNode(3)\n    \n    result = candidate(root)\n    assert '1,2,None,None,3,None,None' == result\n    \n    # Test with None\n    assert candidate(None) == 'None'\n    \n    # Test single node\n    single = TreeNode(5)\n    assert candidate(single) == '5,None,None'"}
{"task_id": "ExtendedEval/177", "prompt": "def word_break(s: str, word_dict: list) -> bool:\n    \"\"\"\n    Determine if string can be segmented into dictionary words.\n    \n    Examples:\n    word_break('leetcode', ['leet','code']) => True\n    word_break('applepenapple', ['apple','pen']) => True\n    word_break('catsandog', ['cats','dog','sand','and','cat']) => False\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "word_break", "canonical_solution": "def word_break(s: str, word_dict: list) -> bool:\n    word_set = set(word_dict)\n    n = len(s)\n    dp = [False] * (n + 1)\n    dp[0] = True\n    \n    for i in range(1, n + 1):\n        for j in range(i):\n            if dp[j] and s[j:i] in word_set:\n                dp[i] = True\n                break\n    \n    return dp[n]", "test": "def check(candidate):\n    assert candidate('leetcode', ['leet','code']) == True\n    assert candidate('applepenapple', ['apple','pen']) == True\n    assert candidate('catsandog', ['cats','dog','sand','and','cat']) == False\n    assert candidate('', []) == True\n    assert candidate('a', ['a']) == True\n    assert candidate('ab', ['a','b']) == True\n    assert candidate('abc', ['ab','bc']) == False"}
{"task_id": "ExtendedEval/178", "prompt": "def n_queens(n: int) -> int:\n    \"\"\"\n    Count the number of solutions to the N-Queens problem.\n    Place n queens on n×n chessboard so no two attack each other.\n    \n    Examples:\n    n_queens(4) => 2\n    n_queens(1) => 1\n    n_queens(8) => 92\n    \n    Return only the Python function code, no explanations.\n    \"\"\"\n", "entry_point": "n_queens", "canonical_solution": "def n_queens(n: int) -> int:\n    def is_safe(board, row, col):\n        # Check column\n        for i in range(row):\n            if board[i] == col:\n                return False\n        \n        # Check diagonals\n        for i in range(row):\n            if abs(board[i] - col) == abs(i - row):\n                return False\n        \n        return True\n    \n    def solve(board, row):\n        if row == n:\n            return 1\n        \n        count = 0\n        for col in range(n):\n            if is_safe(board, row, col):\n                board[row] = col\n                count += solve(board, row + 1)\n                board[row] = -1\n        \n        return count\n    \n    board = [-1] * n\n    return solve(board, 0)", "test": "def check(candidate):\n    assert candidate(1) == 1\n    assert candidate(4) == 2\n    assert candidate(8) == 92\n    assert candidate(0) == 1\n    assert candidate(2) == 0\n    assert candidate(3) == 0"}