-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcheckurl
More file actions
executable file
·73 lines (64 loc) · 2.57 KB
/
checkurl
File metadata and controls
executable file
·73 lines (64 loc) · 2.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/env bash
#USAGE: checkurl 'http://url-to-check.org'
#
#EXAMPLE: checkurl 'https://xkcd.com/1319/'
#
#REQUIRES:
# bashlogging
# curl
#
#DESCRIPTION:
# Check if a web page is accessible. Useful as a utility function for
# identifying dead links. Exit status is 0 if the web page is accessible.
# If the page is inaccessible the script prints a warning message before
# exiting with status 1.
#
# Author: Stuart A. Knock
# Originally Written: 2015-11-28
# https://github.com/stuart-knock/bash-tools
#
#TODO: make it capable of handling multiple urls in an efficient way.
#TODO: probably won't call this at a command line very often, so,
# maybe make it just a function checkurl() that's importable into
# other scripts with a 'source urlchecker'
#A URL you know is good as a proxy for web access, by default it's the Goog.
knowngoodurl='http://www.google.com'
#Time to wait before we decide the internet or a webpage isn't reachable.
#You might want to make this shorter if called as part of a script, particularly
#if it is called multiple times. Default: 2
timetowait=2
#Import some simple logging functionality
source 'bashlogging'
LOGFILE='linkchecker.log'
LOGLEVEL=1 #Default to just showing warning and error messages.
#-----------------------------------------------------------------------------#
#Fail early, fail often:
(( $# != 1 )) && { err " Usage: $0 'http://url-to-check.org'"; exit 1; }
#Use a known-to-be-good URL as a proxy for a functional interweb connection.
debug "Checking access to $knowngoodurl as a proxy for web access."
curl --connect-timeout $timetowait --output /dev/null --silent --head --fail "$knowngoodurl"
stateoftheweb=$?
#If the interwebs are working, check for the actual page
if [[ "$stateoftheweb" = "0" ]]; then
debug "The web is alive... it's alive I tell you."
url="$1"
inform "Checking for accessibility of $url"
#Request just the HEAD of the webpage, throw away output, and exit with a failure state.
curl --output /dev/null --silent --head --fail "$url"
headaccess=$?
debug "headaccess=$headaccess"
#Some servers, apparently, deny HEAD requests, so we'll double check to be safe
[[ "$headaccess" != "0" ]] && curl --output /dev/null --silent --fail -r 0-0 "$url" ; firstbyteaccess=$?
debug "firstbyteaccess=$firstbyteaccess"
#If either method was successful,
if [[ "$headaccess" = "0" || "$firstbyteaccess" = "0" ]]; then
inform "URL is ACCESIBLE: $url"
exit 0
else #otherwise the page is, at least currently, inaccessible.
warn "URL is INACCESIBLE: $url"
exit 1
fi
else
err "The interwebs are broken..."
exit 1
fi