reddit-cli/pages.py at master · Hebo/reddit-cli · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import urllib, urllib2
import json
import re, htmlentitydefs #html escaping

def smart_truncate(content, length=100, suffix='...'):
    """truncate on word boundaries"""
    if len(content) <= length:
        return content
    else:
        return ' '.join(content[:length+1].split(' ')[0:-1]) + suffix


def unescape(text):
    """Remove HTML or XML character references and entities from a text string"""
    def fixup(m):
        text = m.group(0)
        if text[:2] == "&#":
            # character reference
            try:
                if text[:3] == "&#x":
                    return unichr(int(text[3:-1], 16))
                else:
                    return unichr(int(text[2:-1]))
            except ValueError:
                pass
        else:
            # named entity
            try:
                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
            except KeyError:
                pass
        return text # leave as is
    return re.sub("&#?\w+;", fixup, text)


class Story:
    """holds json data of a single reddit story"""
    def __init__(self, object):
        """create story from dict object representation"""
        assert isinstance(object, dict), "json object is not a dict: %s" % type(object)
        self.object = object

    def __getattr__(self, name):
        """pull elements directly from the stored object"""
        if self.object.has_key(name):
            return self.object.get(name)
        else:
            raise AttributeError, name

    def format_lines(self, length):
        """prepare story as a two-string tuple of correct length"""
        line1 = "{0}".format(unescape(
                            smart_truncate(self.title.encode('utf-8'), length=length-3)
                            ))
        line2 = "{0} points   {1} comments   {2}   {3}".format(
                                    self.score,
                                    self.num_comments,
                                    self.domain,
                                    "/r/" + self.subreddit,
                                    )
        return (line1, line2[:length])


class BadSubredditError(Exception):
    pass

class Navigation:
    """handles the navigation properties of a single page"""
    def __init__(self, next, count, stack):
        self.next = next
        self.count = count
        self.stack = stack # store id of the last story on each page in a stack

# Cheating by using classmethod decorator to semi-emulate a singleton pattern
class RedditHandler:
    """handles user credentials and downloading a page"""
    # Special thanks to PhillipTaylor's "reddit_monitor" for the login code
    def __init__(self):

        #Because the login is an ajax post before we need cookies.
        #That's what made this code annoying to write.
        #This code should work against either cookielib or ClientCookie depending on
        #which ever one you have.
        try:
            import cookielib

            #Were taking references to functions / objects here
            #so later on we don't need to worry about which actual
            #import we used.
            self.Request = urllib2.Request
            self.urlopen = urllib2.urlopen

            cookie_jar = cookielib.LWPCookieJar()
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie_jar))
            urllib2.install_opener(opener)

        except ImportError:
            try:
                import ClientCookie

                self.Request = ClientCookie.Request
                self.urlopen = ClientCookie.urlopen

                cookie_jar = ClientCookie.LWPCookieJar()
                opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cookie_jar))

            except ImportError:
                raise ImportError("""This code is dependent on either
                         \'cookielib\' or \'ClientCookie\'
                         #and you have neither.
                        """)

        self.user = None

    def login(self, user, passwd):

        params = urllib.urlencode({
            'id' : '#login_login-main',
            'op' : 'login-main',
            'passwd' : passwd,
            'user' : user
        })

        try:
            req = self.Request("http://www.reddit.com/post/login", params)
            result = self.urlopen(req).read()

            if result.find("logged: false") != -1:
                return False

        except Exception, e:
            print "Error: %s", e.message
            return False

        self.user = user
        return True

    def download_stories(self, subreddit, nav=None, direction=None):
        """download json from reddit and return list of stories"""
        if subreddit is None:
            url = "http://www.reddit.com/.json"
        else:
            url = "http://www.reddit.com/r/" + subreddit + "/.json"

        if nav is None:
            nav = Navigation(None, 0, ["start"])

        if not direction is None:
            if direction == "prev":
                # the end of the stack marks the start of the current page,
                # so we discard it and get a reference to the last page
                if not nav.stack[-1] == "start":
                    nav.count -= 25
                    nav.stack.pop()
                    prev = nav.stack[-1]
                    url += "?count={0}&after={1}".format(nav.count, prev)
            elif direction == "next":
                nav.stack.append(nav.next)
                nav.count += 25
                url += "?count={0}&after={1}".format(nav.count, nav.next)
            else:
                raise Exception, "Bad paging direction given"

        stream = None
        json_data = None
        try:
            stream = urllib2.urlopen(url)
            json_data = stream.read()
        except urllib2.HTTPError as err:
            if err.getcode() in (400,404):
                raise BadSubredditError
            else:
                raise
        if json_data == "{error: 404}":
            raise BadSubredditError
        elif re.search(r'/search\?q=', stream.url):
            raise BadSubredditError

        stories_raw = json.loads(json_data)
        stories = []
        for i in stories_raw['data']['children']:
            stories.append(Story(i['data']))

        # Identifier for last/first story on the page for pagination
        nav.next = stories_raw['data']['after']
        return ( stories, nav )