-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcoupang_crawler.py
More file actions
99 lines (77 loc) · 3.03 KB
/
coupang_crawler.py
File metadata and controls
99 lines (77 loc) · 3.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from convert.key import key_match
from selenium.common.exceptions import NoSuchElementException
# result sample
# ...
# ["마리오파티슈퍼스타즈"] =
# {
# "price": 64800
# "sale_price": 54000 (None, if not sale period)
# "rating": 90
# "rating_count": 356
# },
# ...
def trim_price(string):
return int(string.replace("₩", "").replace(",", ""))
def trim_rating(string):
return int(string.replace("width:", "").replace("%", "").replace(" ", "").replace(";", ""))
def trim_rating_count(string):
return int(string.replace("(", "").replace(")", "").replace(" ", ""))
def coupang_crawl(driver):
result = dict()
for i in range(1, 5):
# set url
url_front = 'https://www.coupang.com/np/products/brand-shop?brandName=%EB%8B%8C%ED%85%90%EB%8F%84&listSize=36&filterType=rocket&customPriceRange=false&minPrice=&maxPrice='
url_page = '&page=%d' % i
url_end = '&trcid=&traid=&channel=user&sorter=latestAsc&filter=&component=395209&rating=0'
# load page
driver.get(url_front + url_page + url_end)
# load list
game_list = driver.find_elements_by_class_name("baby-product-link")
for game in game_list:
game_data = dict()
# name
name = game.find_element_by_class_name("name").text
product_name = key_match(name)
# skipped by a reason
if product_name == "SKIP":
# print("skipped: ", name)
continue
# not supported by switch
if product_name == "ELSE":
# print("not supported: ", name)
continue
# price
price = 0
original_price = 0
try:
price = trim_price(game.find_element_by_class_name("price-value").text)
except NoSuchElementException:
pass
try:
original_price = trim_price(game.find_element_by_class_name('base-price').text)
except NoSuchElementException:
pass
if original_price == 0:
game_data["price"] = price
game_data["sale_price"] = None
else:
game_data["price"] = original_price
game_data["sale_price"] = price
# rating
try:
game_data['rating'] = trim_rating(game.find_element_by_class_name('rating').get_attribute("style"))
except NoSuchElementException:
game_data['rating'] = None
try:
game_data['rating_count'] = trim_rating_count(game.find_element_by_class_name('rating-total-count').text)
except NoSuchElementException:
game_data['rating_count'] = None
# url
try:
game_data['url'] = game.get_attribute("href")
except NoSuchElementException:
game_data['url'] = None
# save result
result[product_name] = game_data
driver.delete_all_cookies()
return result