-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfbparser_daterange.py
More file actions
206 lines (162 loc) · 6.27 KB
/
fbparser_daterange.py
File metadata and controls
206 lines (162 loc) · 6.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
#!/usr/bin/python
# -*- coding: utf-8 -*-
# import sys, getopt
import urllib2
import json
import csv
import datetime
from dbconnect import connection
import gc
def delete_existing(company):
c, conn = connection()
conn.set_character_set('utf8')
c.execute('SET NAMES utf8;')
c.execute('SET CHARACTER SET utf8;')
c.execute('SET character_set_connection=utf8;')
c.execute("set session sql_mode='';")
conn.commit()
# first delete existing
data = c.execute("delete from facebook where company = (%s)", [company])
conn.commit()
c.close()
conn.close()
gc.collect()
return data
def db_insert(company, data):
c, conn = connection()
conn.set_character_set('utf8')
c.execute('SET NAMES utf8;')
c.execute('SET CHARACTER SET utf8;')
c.execute('SET character_set_connection=utf8;')
c.execute("set session sql_mode='';")
conn.commit()
# then insert new
created = ''
message = ''
post_type = ''
likes = None
shares = None
comments = None
try:
if data[3] and data[3].strip() is not '':
created = data[3]
except IndexError as e:
print e
try:
if data[1] and data[1].strip() is not '':
message = data[1]
except IndexError as e:
print e
try:
if data[2] and data[2].strip() is not '':
post_type = data[2]
except IndexError as e:
print e
try:
if data[4] and data[4].strip() is not '':
likes = data[4]
except IndexError as e:
print e
try:
if data[5] and data[5].strip() is not '':
shares = data[5]
except IndexError as e:
print e
try:
if data[6] and data[6].strip() is not '':
comments = data[6]
except IndexError as e:
print e
query = "insert into facebook (company, created, message, type, likes, shares, comments) values (%s, %s, %s, %s, %s, %s, %s)"
c.execute(query, (company, created, message.encode('utf-8'), post_type, likes, shares, comments))
conn.commit()
c.close()
conn.close()
gc.collect()
datetimeformat = '%Y-%m-%d %H:%M:%S'
datetimegreekformat = '%d-%m-%Y %H:%M:%S'
def render_to_json(graph_url):
#render graph url call to JSON
print graph_url+"\n"
web_response = urllib2.urlopen(graph_url)
readable_page = web_response.read()
json_data = json.loads(readable_page)
return json_data
def getFBposts(company, graphurl, since, until):
graph_url = "https://graph.facebook.com/v2.5"
if company and since and until:
access_token="156677781373009|7303b3e525e283b63d026d817d96b3b6"
test_url=graph_url+"/"+company+"/posts?fields=message%2Ccreated_time%2Clikes.limit(1).summary(true)%2Cshares%2Ccomments.limit(1).summary(true)%2Ctype&since="+since+"&until="+until+"&limit=10&access_token="+access_token
json_postdata = render_to_json(test_url)
return json_postdata
elif company and graphurl:
json_postdata = render_to_json(graphurl)
return json_postdata
else:
return None
# def FBParser(companies, population):
def FBParser(companies, since, until):
since = datetime.datetime.strptime(since, '%d/%m/%Y').strftime('%Y-%m-%d')
until = datetime.datetime.strptime(until, '%d/%m/%Y').strftime('%Y-%m-%d')
#simple data pull App Secret and App ID
APP_ID="xxxxxxxxxxxxxxx"
APP_SECRET="yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy"
access_token="xxxxxxxxxxxxxxx|yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy"
#to find go to page's FB page, at the end of URL find username
#e.g. http://facebook.com/walmart, walmart is the username
# list_companies = ["vodafonegreece", "cosmote"]
list_companies = companies
fblist=[]
for company in list_companies:
delete_existing(company)
pages=[]
#extract post data
json_postdata = getFBposts(company, None, since, until)
json_fbposts = json_postdata['data']
pages.append(json_fbposts)
c=0
# while 'paging' in json_postdata and 'next' in json_postdata['paging'] and c< (int(int(population)/10)-1):
while 'paging' in json_postdata and 'next' in json_postdata['paging']:
nexturl = json_postdata['paging']['next']
json_postdata = getFBposts(company, nexturl, None, None)
json_fbposts = json_postdata['data']
pages.append(json_fbposts)
c=c+1
#print post messages and ids
for page in pages:
# for post in json_fbposts:
for post in page:
fb=[]
try:
#try to print out data
fb.append(post["id"])
fb.append(post["message"].strip().replace("\n", " - "))
fb.append(post["type"])
# fb.append( datetime.datetime.strftime( datetime.datetime.strptime(post["created_time"].replace('+0000','').replace('T', ' '), datetimeformat), datetimegreekformat ) )
fb.append( str(datetime.datetime.strptime(post["created_time"].replace('+0000','').replace('T', ' '), datetimeformat)) )
if "likes" in post:
if "summary" in post["likes"]:
fb.append(str(post["likes"]["summary"]["total_count"]))
else:
fb.append('')
else:
fb.append('')
if "shares" in post:
if "count" in post["shares"]:
fb.append(str(post["shares"]["count"]))
else:
fb.append('')
else:
fb.append('')
if "comments" in post:
if "summary" in post["comments"]:
fb.append(str(post["comments"]["summary"]["total_count"]))
else:
fb.append('')
else:
fb.append('')
except Exception as e:
print "Key error:"+str(e)
fblist.append(fb)
db_insert(company, fb)
return fblist