1111from collections import defaultdict
1212
1313
14- def download_gtfs_zip (url , timeout = 30 ):
14+ def with_cache_bust (url , cache_bust_token = None ):
15+ if not cache_bust_token :
16+ return url
17+ sep = '&' if '?' in url else '?'
18+ return f'{ url } { sep } cb={ cache_bust_token } '
19+
20+
21+ def download_gtfs_zip (url , timeout = 30 , cache_bust_token = None ):
1522 """Download GTFS zip bytes using explicit headers.
1623 Some hosts block default Python urllib user agents in CI environments.
1724 """
25+ final_url = with_cache_bust (url , cache_bust_token = cache_bust_token )
1826 headers = {
1927 'User-Agent' : (
2028 'Mozilla/5.0 (X11; Linux x86_64) '
@@ -24,8 +32,10 @@ def download_gtfs_zip(url, timeout=30):
2432 'Accept' : 'application/zip,application/octet-stream,*/*;q=0.8' ,
2533 'Accept-Language' : 'en-US,en;q=0.9' ,
2634 'Referer' : 'https://hexatransit.fr/' ,
35+ 'Cache-Control' : 'no-cache' ,
36+ 'Pragma' : 'no-cache' ,
2737 }
28- req = urllib .request .Request (url , headers = headers , method = 'GET' )
38+ req = urllib .request .Request (final_url , headers = headers , method = 'GET' )
2939 with urllib .request .urlopen (req , timeout = timeout ) as resp :
3040 return resp .read ()
3141
@@ -103,7 +113,7 @@ def process_company(company):
103113 return agencies , files_read
104114
105115
106- def check_gtfs_for_agencies (agencies , timeout = 30 ):
116+ def check_gtfs_for_agencies (agencies , timeout = 30 , cache_bust_token = None ):
107117 errors = []
108118 total = len (agencies )
109119 idx = 0
@@ -113,9 +123,12 @@ def check_gtfs_for_agencies(agencies, timeout=30):
113123 print (f'[{ idx } /{ total } ] Agency "{ aid } ": no lineIds to check, skipping' )
114124 continue
115125 url = f'https://hexatransit.fr/datasets/gtfs/{ aid } .zip'
116- print (f'[{ idx } /{ total } ] Checking GTFS for agency "{ aid } " -> { url } ' )
126+ if cache_bust_token :
127+ print (f'[{ idx } /{ total } ] Checking GTFS for agency "{ aid } " -> { url } (cache-bust enabled)' )
128+ else :
129+ print (f'[{ idx } /{ total } ] Checking GTFS for agency "{ aid } " -> { url } ' )
117130 try :
118- dataz = download_gtfs_zip (url , timeout = timeout )
131+ dataz = download_gtfs_zip (url , timeout = timeout , cache_bust_token = cache_bust_token )
119132 except urllib .error .HTTPError as e :
120133 if e .code == 403 :
121134 msg = (
@@ -192,6 +205,7 @@ def main():
192205 parser = argparse .ArgumentParser (description = 'Check GTFS routes for line IDs listed in trafic.json files under a logo directory.' )
193206 parser .add_argument ('--logo-dir' , default = 'logo' , help = 'Path to the logo directory to search (default: logo)' )
194207 parser .add_argument ('--timeout' , type = int , default = 30 , help = 'Network timeout seconds when downloading GTFS (default: 30)' )
208+ parser .add_argument ('--cache-bust' , action = 'store_true' , help = 'Append cache-busting query parameter and no-cache headers to GTFS downloads' )
195209 args = parser .parse_args ()
196210
197211 if not os .path .isdir (args .logo_dir ):
@@ -207,7 +221,17 @@ def main():
207221 for p in files_read :
208222 print (' -' , p )
209223
210- errors = check_gtfs_for_agencies (agencies , timeout = args .timeout )
224+ cache_bust_token = None
225+ if args .cache_bust :
226+ run_id = os .getenv ('GITHUB_RUN_ID' , '' ).strip ()
227+ run_attempt = os .getenv ('GITHUB_RUN_ATTEMPT' , '' ).strip ()
228+ sha = os .getenv ('GITHUB_SHA' , '' ).strip ()
229+ if run_id :
230+ cache_bust_token = f'{ run_id } -{ run_attempt or "1" } -{ sha [:7 ] if sha else "local" } '
231+ else :
232+ cache_bust_token = 'local-run'
233+
234+ errors = check_gtfs_for_agencies (agencies , timeout = args .timeout , cache_bust_token = cache_bust_token )
211235
212236 if errors :
213237 print ('\n GTFS verification errors:' )
0 commit comments