-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtxt_crawler.py
More file actions
45 lines (36 loc) · 1.13 KB
/
txt_crawler.py
File metadata and controls
45 lines (36 loc) · 1.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#!/bin/env python3
print('''
__ __ __
/ /___ __/ /_ ___________ __ __/ /__ ____
/ __/\ \ / __/ / __/ __/ _ `/ |/|/ / / -_) __/
\__//_\_\\\\__/__\__/_/ \_,_/|__,__/_/\__/_/
/___/ ''')
import sys, requests
from bs4 import BeautifulSoup
url = sys.argv[1]
if not url.startswith(('http://', 'https://')):
url = 'http://' + url
if not url.endswith('/'):
url = url + '/'
try:
raw = requests.get(url)
except:
url = 'https://' + url
raw = requests.get(url)
soup = BeautifulSoup(raw.content, 'html.parser')
data = str(soup.prettify())
tag_list = data.split('\n')
print()
del raw, soup, data
for i in range(len(tag_list)):
if '.php' in tag_list[i] or \
'.js' in tag_list[i] or \
'.css' in tag_list[i] or \
'.aspx' in tag_list[i] or \
'.html' in tag_list[i] or \
'/*' in tag_list[i] or \
'<!--' in tag_list[i]:
out = tag_list[i].strip()
print('%d : %s' % (i, out))
if input("\n[?] get robots.txt? [Y/n] : ") in ["Y", "y", ""]:
print("\n"+requests.get(url + 'robots.txt').text)