-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgoogleImageScraping.py
More file actions
82 lines (64 loc) · 3.56 KB
/
googleImageScraping.py
File metadata and controls
82 lines (64 loc) · 3.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import os
import time
import requests
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
# this is a function to fetch images in "google image"
def fetch_images(query, max_links_to_fetch, webdriver_path):
options = Options()
options.binary_location = webdriver_path
#service = Service(webdriver_path)
driver=webdriver.Chrome(options=options)
# search_box = driver.find_element_by_xpath("//textarea[@class='gLFyf']")
# search_box.send_keys(query)
# search_box.send_keys(Keys.ENTER)
#-----------------------------------------------------------------------------------------
# dont forget to change the url here for your specified task
# the purpose of fill in it with link and not automated with a query to search for is that i had a complicated task, i could't find the best prompts
# so as a quik solution i try a bunch of prompts untill
# i find the accurate result then i copy past the link here to guarantis good retrived images (my task)
driver.get('https://www.google.com/search?q=coriandre+culinaire&tbm=isch&ved=2ahUKEwj9iJzb8f2CAxWYjycCHanCBQcQ2-cCegQIABAA&oq=coriandre+culinaire&gs_lcp=CgNpbWcQAzoFCAAQgAQ6BAgAEB46BggAEAgQHlDFD1j4H2D4ImgAcAB4AIABbogB3QqSAQQxLjEymAEAoAEBqgELZ3dzLXdpei1pbWfAAQE&sclient=img&ei=9QVyZb26BZifnsEPqYWXOA&bih=703&biw=1536')
image_urls = set()
image_count = 0
results_start = 0
while image_count < max_links_to_fetch:
thumbnail_results = driver.find_elements(By.CSS_SELECTOR,"img.rg_i.Q4LuWd")
number_results = len(thumbnail_results)
for img in thumbnail_results[results_start:number_results]:
try:
img.click()
time.sleep(2)
actual_images = driver.find_elements(By.CSS_SELECTOR,'img.sFlh5c.pT0Scc.iPVvYb')
for actual_image in actual_images:
if actual_image.get_attribute('src') and 'http' in actual_image.get_attribute('src'):
image_urls.add(actual_image.get_attribute('src'))
image_count += 1
except Exception as e:
print(e)
results_start = len(thumbnail_results)
if len(image_urls) >= max_links_to_fetch:
break
driver.quit()
return image_urls
# this is a function to download fetched images from "google image"
def download_images(folder_path, file_prefix, urls):
for i, url in enumerate(urls):
try:
response = requests.get(url)
with open(os.path.join(folder_path, f"{file_prefix}_{i+1}.jpg"), "wb") as file:
file.write(response.content)
except Exception as e:
print(f"Error downloading image {i+1}: {e}")
# i was working with a query variable to make fetching easy but its not always good to find accurate prompts, this is what i was talking about above in fetching method
query = 'corianderCulinaire' # Replace 'your search query' with the term you want to search for
max_links = 90 # Number of image links you want to fetch
folder = 'image_folder_coriander' # Folder to save the images
# Path to your Chrome webdriver make sure to install the accurate webdriver depending on your navigator version
webdriver_path = 'C:\\Users\\hp\\Desktop\\seleniumScrape\\chrome-win64\\chrome.exe'
# Fetch image URLs
image_links = fetch_images(query, max_links, webdriver_path)
# Download images
download_images(folder, query, image_links)