-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDRTpv
More file actions
55 lines (50 loc) · 2.19 KB
/
DRTpv
File metadata and controls
55 lines (50 loc) · 2.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import csv
import datetime
import time
num_per_day = 1000
num_per_minute = 5
all_ips = []
all_times = []
potential_robots_ips_1 = []
all_potential_robots_1 = {}
potential_robots_ips_2 = []
file_loc = input("Enter file directory: ")
file_name = input("Enter file name: ")
status = input("Would you like to know the status every 100000 rows? (slightly slower)(y/n) ")
t1 = time.time()
rows = 0
with open(file_loc+"/"+file_name) as csvfile:
file = csv.reader(csvfile)
for row in file:
if row[0] != "ip":
time_split = row[2].split(":")
total_seconds = datetime.timedelta(hours = int(time_split[0]), minutes = int(time_split[1]), seconds = int(time_split[2])).total_seconds()
all_ips.append(row[0])
all_times.append(total_seconds)
if row[0] in potential_robots_ips_1 or all_ips.count(row[0]) > num_per_day:
if row[0] not in potential_robots_ips_1: #if it's not in potential_robots_ips_1, add current ip to all_potential_robots_1 and reloop to find the other 99 it missed
potential_robots_ips_1.append(row[0])
all_potential_robots_1.update({row[0]: [total_seconds]})
num = 0
for i in range(len(all_ips)-2, -1, -1):
if all_ips[i] == row[0]:
all_potential_robots_1[row[0]].insert(0, all_times[i])
num += 1
if num >= 99:
break
else: #else, add the time to the dictionary
all_potential_robots_1[row[0]].append(total_seconds)
if status == "y":
rows += 1
if rows%100000 == 0:
print(rows, "rows processed")
#check the more than 5 per minute condition for each ip address
for k, v in all_potential_robots_1.items():
for i in range(len(v)-(num_per_minute+1)):
if v[i+num_per_minute+1] - v[i] <= 60:
potential_robots_ips_2.append(k)
break
t2 = time.time()
print("DRTpv robots:", potential_robots_ips_1)
print("There are",len(potential_robots_ips_1),"DRTpv robots in",file_name)
print("Finished in",t2-t1,"seconds")