Web Scraping Online Market-Places with Python and Selenium


Spider

12.8.23

Objectives

Key Concepts

Tools and Techniques

Practical Exercises

Create the .env file and add environment variables:

GRAILED_USERNAME="username_goes_here"
GRAILED_PASSWORD="password_goes_here"

Create “auth.py” and import dependencies and modules. Create variables for program access.

import os
from dotenv import load_dotenv

# Env Variables
load_dotenv('.env')
username = os.getenv('GRAILED_USERNAME')
password = os.getenv('GRAILED_PASSWORD')

Functions are defined below. For brevity, dependencies and modules needed are not listed but will likely be auto-imported in an IDE.

# Initiate headless chrome driver
def init_driver(headless=False):
    chrome_options = webdriver.ChromeOptions()
    if headless:
        chrome_options.add_argument('--headless')
    return webdriver.Chrome(options=chrome_options)

The driver, the interface for browser interactions, can be any browser driver. Chrome is used here for its popularity.

In the login_grailed() function, we target elements for actions. Initially, I used time.sleep() but found a native Selenium function more universally applicable.

EC.element_to_be_clickable(): This function waits for an element to be ready in the DOM before acting on it.

Using try & catch statements allows for easier debugging through exception messages in the console.

def login_grailed(url, driver):
    sign_in_url = url.rstrip('/') + "/users/sign_up/"
    driver.get(sign_in_url)
    try:
        # Click on the login link
        wait = WebDriverWait(driver, 10)
        login_link = wait.until(EC.presence_of_element_located((By.XPATH, "//a[@href='/users/sign_up' and text()='Login']")))
        login_link.click()
    except TimeoutException:
        print("Login link not available")
        driver.quit()
        return

    try:
        # Click on the login button
        login_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//button[text()='Log in with Email']")))
        login_button.click()
    except TimeoutException:
        print("Login button not available")
        driver.quit()
        return

    try:
        # Input username
        actions = ActionChains(driver)
        email_input_field = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID,'email')))
        actions.move_to_element(email_input_field).click().perform()
        for character in username:
            actions.send_keys(character).perform()
            time.sleep(0.15)
    except TimeoutException:
        print("Email input field not available")
        driver.quit()
        return

    try:
        # Input password
        password_input_field = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'password')))
        actions.move_to_element(password_input_field).click().perform()
        for character in password:
            actions.send_keys(character).perform()
            time.sleep(0.012)
    except TimeoutException:
        print("Password input field not available")
        driver.quit()
        return

    try:
        # Submit login
        time.sleep(3)
        actions.send_keys(Keys.RETURN).perform()
        time.sleep(2)
        # Add check for successful login here
    except Exception as e:
        print("Error submitting login: ", e)
        driver.quit()
        return

Key Takeaways

Questions and Curiosities

Additional Resources

Personal Reflection


farm-gallery arena

Tags