bracket-pool-simulator/getBracketsFromPool.py at main · thomascomer/bracket-pool-simulator · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from splinter import browser
import splinter
import os
import re


def getBfromP(groupID: str, path="html_sources/", year="2019", entry_limit=40):
    print("Connecting...", end=' ')
    with browser.Browser(headless=True) as b:
        if path == "html_sources/":
            path += groupID + '/'
        print("Connecting...", end=' ')
        myurl = "http://fantasy.espn.com/tournament-challenge-bracket/" + str(year) + "/en/group?groupID=" + groupID + "&_301_=" + str(year)
        b.visit(myurl)
        group_html = b.html_snapshot()
        with open(group_html) as f:
            group_text = f.read()
            try:
                re.search(r"<header class=\"group-header\">.*?</header>", group_text)[0][29:-9] + '/'
            except TypeError:
                raise ValueError("This pool is not publicly accessible, therefore the program cannot run")
        mylinks = []
        links = b.links.find_by_partial_href("entry?entryID=")
        for i, j in enumerate(links):
            mylinks.append(j.value)
        if len(mylinks) > entry_limit:
            raise ValueError("This program is intended for small groups. It is too expensive to visit " + len(mylinks) + " URLs.")
        linkcount = 0
        for i in mylinks:
            linkcount += 1
            if linkcount % 10 == 1:
                print("\nProgress: " + str(linkcount) + '/' + str(len(mylinks)), end=' ')
            else:
                print(str(linkcount) + '/' + str(len(mylinks)), end=' ')
            try:
                b.links.find_by_text(i).click()
                if "game" in b.url:
                    ID = re.search(r"entryID=\d*", b.url)[0]
                    urlBase = "http://fantasy.espn.com/tournament-challenge-bracket/" + str(year) + "/en/entry?"
                    b.visit(urlBase + ID)
                notFound = False
            except splinter.exceptions.ElementDoesNotExist:
                print('\n' + i, "could not be found")
                notFound = True
            screenshot_path = b.html_snapshot()
            with open(screenshot_path) as f1:
                entryName = b.title.split(' -')[0]
                if notFound is True:
                    entryName = i
                entryName = entryName.replace('/', '_')
                try:
                    with open(path + entryName, 'w') as f2:
                        if notFound is False:
                            all_text = f1.read()
                            f2.write(all_text)
                        else:
                            f2.write(i + " could not be found")
                except (FileNotFoundError, NotADirectoryError):
                    try:
                        os.mkdir(path)
                    except (FileNotFoundError, NotADirectoryError):
                        os.mkdir(path.split('/')[0])
                        os.mkdir(path)
                    with open(path + entryName, 'w') as f2:
                        if notFound is False:
                            all_text = f1.read()
                            f2.write(all_text)
                        else:
                            f2.write(i + " could not be found")
            b.visit(myurl)
        print('\n', end='')


if __name__ == "__main__":
    getBfromP("5013")