Skip to content

Commit 25a1f44

Browse files
Copilotmwang87
andauthored
fix: use curl with 429-aware retry backoff in get_data.sh to prevent 0-byte test data files
Agent-Logs-Url: https://github.com/mwang87/MassQueryLanguage/sessions/b86a5a85-c2cb-4848-beee-f29e1e1d10c8 Co-authored-by: mwang87 <96528+mwang87@users.noreply.github.com>
1 parent afb5fa1 commit 25a1f44

1 file changed

Lines changed: 33 additions & 3 deletions

File tree

tests/get_data.sh

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,40 @@
11
mkdir -p data
22
cd data
33

4-
# Rate limit: at most 10 files/minute => sleep 6s between downloads.
4+
# Download with retry on HTTP 429 rate limiting.
5+
# Uses curl to properly detect the HTTP status and avoid leaving 0-byte files.
56
download() {
6-
wget --no-verbose --tries=3 --waitretry=5 --output-document="$1" "$2"
7-
sleep 6
7+
filename="$1"
8+
url="$2"
9+
max_retries=8
10+
retry=0
11+
12+
while [ "$retry" -lt "$max_retries" ]; do
13+
http_code=$(curl -L -o "$filename" -w "%{http_code}" --silent --show-error "$url")
14+
curl_exit=$?
15+
16+
if [ "$curl_exit" -eq 0 ] && [ "$http_code" = "200" ] && [ -s "$filename" ]; then
17+
echo "OK: $filename ($(wc -c < "$filename") bytes)"
18+
sleep 7
19+
return 0
20+
fi
21+
22+
rm -f "$filename"
23+
24+
if [ "$http_code" = "429" ]; then
25+
wait_time=$((60 + retry * 30))
26+
echo "Rate limited (429) for $filename, waiting ${wait_time}s before retry $((retry+1))/$max_retries ..."
27+
sleep "$wait_time"
28+
else
29+
echo "Download failed (HTTP $http_code, curl exit $curl_exit) for $filename, retrying in 15s ..."
30+
sleep 15
31+
fi
32+
33+
retry=$((retry + 1))
34+
done
35+
36+
echo "ERROR: Failed to download $filename from $url after $max_retries attempts"
37+
return 1
838
}
939

1040
download GNPS00002_A3_p.mzML "https://massiveproxy.gnps2.org/massiveproxy/MSV000084494/ccms_peak/raw/GNPS00002_A3_p.mzML"

0 commit comments

Comments
 (0)