|
Don’t overload servers or scrape sites that prohibit it
Requirements
Python 3.x
Google Chrome
ChromeDriver (same version as your Chrome browser)
Install Python packages:
pip install selenium
Sample Code: Scraping Hotel Info via Google Search
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import time
# Setup Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless") # Run in background
chrome_options.add_argument("--disable-gpu")
# Provide path to chromedriver
service = Service(executable_path='path/to/chromedriver') # Update this
# Initialize the browser
driver = webdriver.Chrome(service=service, options=chrome_options)
# Replace with your target hotel
hotel_query = "Taj Mahal Hotel Mumbai"
# Google Search URL
driver.get(f"https://www.google.com/search?q={hotel_query}")
time.sleep(2) # Wait for page to load
try:
hotel_name = driver.find_element(By.XPATH, '//div[@data-attrid="title"]/span').text
address = driver.find_element(By.XPATH, '//span[contains(text(),"Address")]/following-sibling::span').text
phone = driver.find_element(By.XPATH, '//span[contains(text(),"Phone")]/following-sibling::span').text
hours = driver.find_element(By.XPATH, '//span[contains(text(),"Hours")]/following-sibling::span').text
print("Hotel Name:", hotel_name)
print("Address:", address)
print("Contact Number:", phone)
print("Working Hours:", hours)
except Exception as e:
print("Some details could not be found:", e)
# Close the browser
driver.quit()
|
|