-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscraper.py
More file actions
35 lines (27 loc) · 976 Bytes
/
scraper.py
File metadata and controls
35 lines (27 loc) · 976 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import requests
from bs4 import BeautifulSoup
# Base URL with pagination
base_url = "https://annuaire-entreprises.data.gouv.fr/rechercherterme=&tranche_effectif_salarie=03&sap=J&page="
# Number of pages to scrape
total_pages = 528
# List to store company names
company_names = []
# Loop through each page
for page in range(1, total_pages + 1):
# Fetch page content
url = f"{base_url}{page}"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
# Find all elements with the desired class
results = soup.find_all('div', class_='style_result-item__fKcQt')
# Extract company names
for result in results:
name_tag = result.find('span')
if name_tag:
company_name = name_tag.text.strip()
company_names.append(company_name)
print(f"Page {page} scraped successfully.")
# Print all company names
print("Scraped Company Names:")
for name in company_names:
print(name)