-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patheshop_crawler.py
More file actions
127 lines (93 loc) · 3.61 KB
/
eshop_crawler.py
File metadata and controls
127 lines (93 loc) · 3.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException
def trim_price(string):
return int(string.replace("₩", "").replace(",", ""))
def eshop_crawl():
game_data = []
count = 100
# set driver
driver = webdriver.Firefox(executable_path="./geckodriver")
detail_driver = webdriver.Firefox(executable_path="./geckodriver")
try:
# load page
driver.get("https://store.nintendo.co.kr/games")
driver.find_element_by_class_name("popup-close").click()
# search game list
game_list = driver.find_elements_by_class_name("category-product-item")
for game in game_list:
url_text = game.find_element_by_class_name("category-product-item-title-link")
name = url_text.text
url = url_text.get_attribute("href")
# detail crawl
game_data.append(eshop_detail_crawl(name, url, detail_driver))
count -= 1
if count == 0:
break
finally:
driver.quit()
detail_driver.quit()
return game_data
def eshop_detail_get_attribute(driver, key):
try:
element = driver.find_element_by_class_name(key)
element_text = element.find_element_by_class_name("product-attribute-val").text
return element_text
except NoSuchElementException:
return None
def eshop_detail_crawl(name, url, driver):
result = dict()
# load page
driver.get(url)
# title
result['title'] = name
# idx
result['idx'] = url.replace("https://store.nintendo.co.kr/", "")
# attributes
genres = eshop_detail_get_attribute(driver, 'game_category')
releaseDate = eshop_detail_get_attribute(driver, 'release_date')
playerCount = eshop_detail_get_attribute(driver, 'no_of_players')
languages = eshop_detail_get_attribute(driver, 'supported_languages')
if genres == None:
result['genres'] = []
else:
result['genres'] = genres.split(", ")
if languages == None:
result["languages"] = []
else:
result["languages"] = languages.split(", ")
if playerCount == None:
result["playerCount"] = None
else:
result["playerCount"] = playerCount.replace("명", "")
if releaseDate == None:
result['releaseDate'] = None
else:
result['releaseDate'] = "-".join(map(lambda s: s if len(s) > 1 else '0'+s, releaseDate[:-1].split(". ")))
# images
result["images"] = None
# specific info
result["nintendoStore"] = dict()
# url
result["nintendoStore"]["url"] = url
# description
try:
result["nintendoStore"]['description'] = driver.find_element_by_class_name('description').text
except NoSuchElementException:
result["nintendoStore"]['description'] = None
# price
try:
price_list = driver.find_elements_by_class_name("price")
if len(price_list) == 1:
result["nintendoStore"]['price'] = trim_price(price_list[0].text)
result["nintendoStore"]['salePrice'] = None
elif len(price_list) == 2:
result["nintendoStore"]['price'] = trim_price(price_list[1].text)
result["nintendoStore"]['salePrice'] = trim_price(price_list[0].text)
except NoSuchElementException:
result["nintendoStore"]['price'] = None
result["nintendoStore"]['salePrice'] = None
except StaleElementReferenceException:
result["nintendoStore"]['price'] = None
result["nintendoStore"]['salePrice'] = None
result["coupang"] = None
return result