-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdata_generator.py
More file actions
executable file
·70 lines (57 loc) · 1.86 KB
/
data_generator.py
File metadata and controls
executable file
·70 lines (57 loc) · 1.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/python2.6 -W ignore
import sys
import numpy
import random
MAX_INT32 = 0xffffffff
def error():
print "usage: \n"
exit()
def opts(argv):
params = {}
params['zipf_alpha'] = float(argv[1])
params['vote_number'] = int(argv[2])
params['file_number'] = int(argv[3])
params['file_zipf_alpha'] = float(argv[4])
params['file_prefix'] = argv[5]
return params
def generate_votes(alpha, total_number):
if(alpha == 1):
votes = []
for i in xrange(0, total_number):
votes.append(random.getrandbits(32))
return votes
votes = numpy.random.zipf(alpha, total_number)
return votes
def get_file_id(alpha, file_number):
return numpy.random.zipf(alpha) % file_number
def write_votes(votes, file_number, alpha, prefix):
file_list = []
data_list = []
for i in xrange(0, file_number):
data_list.append([])
print "divide votes into seperate bucket"
bucket = generate_votes(alpha, len(votes))
print "bucket initalized"
for i in xrange(0, len(votes)):
if votes[i] > MAX_INT32:
votes[i] = votes[i] % (MAX_INT32+1)
if bucket[i] >= file_number:
bucket[i] = bucket[i] % file_number
data_list[bucket[i]].append(votes[i])
print "save votes into files"
for i in xrange(0, file_number):
f = open(prefix+str(i), 'w')
for item in data_list[i]:
f.write(str(random.getrandbits(32))+','+str(item)+'\n')
f.close()
def main(argv):
if len(sys.argv) < 6:
error
print argv
params = opts(argv)
print "generate votes...\n"
votes = list(generate_votes(params['zipf_alpha'], params['vote_number']))
print "vote generated, start writing\n"
write_votes(votes, params['file_number'], params['file_zipf_alpha'], params['file_prefix'])
if __name__ == "__main__":
main(sys.argv)