Skip to content

Commit 9bbc1d6

Browse files
committed
Fix cache issue
1 parent b4aee2d commit 9bbc1d6

3 files changed

Lines changed: 62 additions & 14 deletions

File tree

.github/scripts/check_gtfs_route_lines_picto.py

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,18 @@
1010
from collections import defaultdict
1111

1212

13-
def download_gtfs_zip(url, timeout=30):
13+
def with_cache_bust(url, cache_bust_token=None):
14+
if not cache_bust_token:
15+
return url
16+
sep = '&' if '?' in url else '?'
17+
return f'{url}{sep}cb={cache_bust_token}'
18+
19+
20+
def download_gtfs_zip(url, timeout=30, cache_bust_token=None):
1421
"""Download GTFS zip bytes using explicit headers.
1522
Some hosts block default Python urllib user agents in CI environments.
1623
"""
24+
final_url = with_cache_bust(url, cache_bust_token=cache_bust_token)
1725
headers = {
1826
'User-Agent': (
1927
'Mozilla/5.0 (X11; Linux x86_64) '
@@ -23,8 +31,10 @@ def download_gtfs_zip(url, timeout=30):
2331
'Accept': 'application/zip,application/octet-stream,*/*;q=0.8',
2432
'Accept-Language': 'en-US,en;q=0.9',
2533
'Referer': 'https://hexatransit.fr/',
34+
'Cache-Control': 'no-cache',
35+
'Pragma': 'no-cache',
2636
}
27-
req = urllib.request.Request(url, headers=headers, method='GET')
37+
req = urllib.request.Request(final_url, headers=headers, method='GET')
2838
with urllib.request.urlopen(req, timeout=timeout) as resp:
2939
return resp.read()
3040

@@ -58,16 +68,19 @@ def gather_lines_picto(root_dir):
5868
return agencies, files_read
5969

6070

61-
def check_gtfs_for_agencies(agencies, timeout=30):
71+
def check_gtfs_for_agencies(agencies, timeout=30, cache_bust_token=None):
6272
errors = []
6373
total_agencies = len(agencies)
6474
count = 0
6575
for agency, line_ids in agencies.items():
6676
count += 1
6777
url = f'https://hexatransit.fr/datasets/gtfs/{agency}.zip'
68-
print(f'[{count}/{total_agencies}] Checking GTFS for agency "{agency}" -> {url}')
78+
if cache_bust_token:
79+
print(f'[{count}/{total_agencies}] Checking GTFS for agency "{agency}" -> {url} (cache-bust enabled)')
80+
else:
81+
print(f'[{count}/{total_agencies}] Checking GTFS for agency "{agency}" -> {url}')
6982
try:
70-
data = download_gtfs_zip(url, timeout=timeout)
83+
data = download_gtfs_zip(url, timeout=timeout, cache_bust_token=cache_bust_token)
7184
except urllib.error.HTTPError as e:
7285
if e.code == 403:
7386
msg = (
@@ -143,6 +156,7 @@ def main():
143156
parser = argparse.ArgumentParser(description='Check GTFS routes for line IDs listed in lines_picto.csv files under a logo directory.')
144157
parser.add_argument('--logo-dir', default='logo', help='Path to the logo directory to search (default: logo)')
145158
parser.add_argument('--timeout', type=int, default=30, help='Network timeout seconds when downloading GTFS (default: 30)')
159+
parser.add_argument('--cache-bust', action='store_true', help='Append cache-busting query parameter and no-cache headers to GTFS downloads')
146160
args = parser.parse_args()
147161

148162
if not os.path.isdir(args.logo_dir):
@@ -158,7 +172,17 @@ def main():
158172
for p in files_read:
159173
print(' -', p)
160174

161-
errors = check_gtfs_for_agencies(agencies, timeout=args.timeout)
175+
cache_bust_token = None
176+
if args.cache_bust:
177+
run_id = os.getenv('GITHUB_RUN_ID', '').strip()
178+
run_attempt = os.getenv('GITHUB_RUN_ATTEMPT', '').strip()
179+
sha = os.getenv('GITHUB_SHA', '').strip()
180+
if run_id:
181+
cache_bust_token = f'{run_id}-{run_attempt or "1"}-{sha[:7] if sha else "local"}'
182+
else:
183+
cache_bust_token = 'local-run'
184+
185+
errors = check_gtfs_for_agencies(agencies, timeout=args.timeout, cache_bust_token=cache_bust_token)
162186

163187
if errors:
164188
print('\nGTFS verification errors:')

.github/scripts/check_gtfs_route_trafic.py

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,18 @@
1111
from collections import defaultdict
1212

1313

14-
def download_gtfs_zip(url, timeout=30):
14+
def with_cache_bust(url, cache_bust_token=None):
15+
if not cache_bust_token:
16+
return url
17+
sep = '&' if '?' in url else '?'
18+
return f'{url}{sep}cb={cache_bust_token}'
19+
20+
21+
def download_gtfs_zip(url, timeout=30, cache_bust_token=None):
1522
"""Download GTFS zip bytes using explicit headers.
1623
Some hosts block default Python urllib user agents in CI environments.
1724
"""
25+
final_url = with_cache_bust(url, cache_bust_token=cache_bust_token)
1826
headers = {
1927
'User-Agent': (
2028
'Mozilla/5.0 (X11; Linux x86_64) '
@@ -24,8 +32,10 @@ def download_gtfs_zip(url, timeout=30):
2432
'Accept': 'application/zip,application/octet-stream,*/*;q=0.8',
2533
'Accept-Language': 'en-US,en;q=0.9',
2634
'Referer': 'https://hexatransit.fr/',
35+
'Cache-Control': 'no-cache',
36+
'Pragma': 'no-cache',
2737
}
28-
req = urllib.request.Request(url, headers=headers, method='GET')
38+
req = urllib.request.Request(final_url, headers=headers, method='GET')
2939
with urllib.request.urlopen(req, timeout=timeout) as resp:
3040
return resp.read()
3141

@@ -103,7 +113,7 @@ def process_company(company):
103113
return agencies, files_read
104114

105115

106-
def check_gtfs_for_agencies(agencies, timeout=30):
116+
def check_gtfs_for_agencies(agencies, timeout=30, cache_bust_token=None):
107117
errors = []
108118
total = len(agencies)
109119
idx = 0
@@ -113,9 +123,12 @@ def check_gtfs_for_agencies(agencies, timeout=30):
113123
print(f'[{idx}/{total}] Agency "{aid}": no lineIds to check, skipping')
114124
continue
115125
url = f'https://hexatransit.fr/datasets/gtfs/{aid}.zip'
116-
print(f'[{idx}/{total}] Checking GTFS for agency "{aid}" -> {url}')
126+
if cache_bust_token:
127+
print(f'[{idx}/{total}] Checking GTFS for agency "{aid}" -> {url} (cache-bust enabled)')
128+
else:
129+
print(f'[{idx}/{total}] Checking GTFS for agency "{aid}" -> {url}')
117130
try:
118-
dataz = download_gtfs_zip(url, timeout=timeout)
131+
dataz = download_gtfs_zip(url, timeout=timeout, cache_bust_token=cache_bust_token)
119132
except urllib.error.HTTPError as e:
120133
if e.code == 403:
121134
msg = (
@@ -192,6 +205,7 @@ def main():
192205
parser = argparse.ArgumentParser(description='Check GTFS routes for line IDs listed in trafic.json files under a logo directory.')
193206
parser.add_argument('--logo-dir', default='logo', help='Path to the logo directory to search (default: logo)')
194207
parser.add_argument('--timeout', type=int, default=30, help='Network timeout seconds when downloading GTFS (default: 30)')
208+
parser.add_argument('--cache-bust', action='store_true', help='Append cache-busting query parameter and no-cache headers to GTFS downloads')
195209
args = parser.parse_args()
196210

197211
if not os.path.isdir(args.logo_dir):
@@ -207,7 +221,17 @@ def main():
207221
for p in files_read:
208222
print(' -', p)
209223

210-
errors = check_gtfs_for_agencies(agencies, timeout=args.timeout)
224+
cache_bust_token = None
225+
if args.cache_bust:
226+
run_id = os.getenv('GITHUB_RUN_ID', '').strip()
227+
run_attempt = os.getenv('GITHUB_RUN_ATTEMPT', '').strip()
228+
sha = os.getenv('GITHUB_SHA', '').strip()
229+
if run_id:
230+
cache_bust_token = f'{run_id}-{run_attempt or "1"}-{sha[:7] if sha else "local"}'
231+
else:
232+
cache_bust_token = 'local-run'
233+
234+
errors = check_gtfs_for_agencies(agencies, timeout=args.timeout, cache_bust_token=cache_bust_token)
211235

212236
if errors:
213237
print('\nGTFS verification errors:')

.github/workflows/deploy.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,10 @@ jobs:
4040
uses: actions/checkout@v6
4141

4242
- name: Check GTFS routes match trafic.json
43-
run: python .github/scripts/check_gtfs_route_trafic.py
43+
run: python .github/scripts/check_gtfs_route_trafic.py --cache-bust
4444

4545
- name: Check GTFS routes match lines_picto
46-
run: python .github/scripts/check_gtfs_route_lines_picto.py
46+
run: python .github/scripts/check_gtfs_route_lines_picto.py --cache-bust
4747

4848
deploy:
4949
if: github.ref == 'refs/heads/main'

0 commit comments

Comments
 (0)