Data Science Portfolio

Scrape Crypto Prices with Selenium

Inspect crypto.com source code to obtain relevant XPaths

scrape-crypto-prices-1.png

Examine table element

scrape-crypto-prices-2.png

Extract XPath for <tr> element

scrape-crypto-prices-3.png

Extract XPath for <td> element

scrape-crypto-prices-4.png

Import libraries

import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException

Instantiate the WebDriver

url = 'https://crypto.com/price'

chrome_options = Options()  
chrome_options.add_argument('--headless')
chrome_options.add_argument('--window-size=1920x1080')

driver = webdriver.Chrome(executable_path='./chromedriver', options=chrome_options)
driver.get(url)

time.sleep(3)

print(driver.title)
Top 50 Cryptocurrency Prices, Coin Market Cap, Price Charts And Historical Data | Crypto.com

Calculate the number of rows and columns in the table

tr_xpath = '/html/body/div[1]/div[3]/div[2]/div/div[4]/div[1]/table/tbody/tr'
td_xpath = '/html/body/div[1]/div[3]/div[2]/div/div[4]/div[1]/table/tbody/tr[1]/td'

rows = 1+len(driver.find_elements_by_xpath(tr_xpath))
cols = 1+len(driver.find_elements_by_xpath(td_xpath))

print(rows)
print(cols)
51
10

Initialize empty lists for the desired data

name = []
symbol = []
price = []
one_day_chg = []
one_day_vol = []
market_cap = []

Loop through the table and collect the data

xpath_prefix = '/html/body/div[1]/div[3]/div[2]/div/div[4]/div[1]/table/tbody/tr['

for i in range(1, rows):
    for j in range(3, cols-2):
        try:
            value = driver.find_element_by_xpath(xpath_prefix+str(i)+']/td['+str(j)+']').text
            if j == 3:
                name.append(value.split('\n')[0])
                symbol.append(value.split('\n')[1])
            elif j == 4:
                price.append(value)
            elif j == 5:
                one_day_chg.append(value)
            elif j == 6:
                one_day_vol.append(value)
            elif j == 7:
                market_cap.append(value)
        except NoSuchElementException:
            pass

Store data in pandas DataFrame

crypto_data = {'Name': name, 'Symbol': symbol, 'Price': price, '24 Hour Change': one_day_chg,\
               '24 Hour Volume': one_day_vol, 'Market Cap': market_cap}

df = pd.DataFrame(data=crypto_data)

df.head()
Name Symbol Price 24 Hour Change 24 Hour Volume Market Cap
0 Bitcoin BTC $37,059.32 -5.09% $19.06 B $700.84 B
1 Ethereum ETH $2,706.02 -3.98% $13.84 B $321.49 B
2 Tether USDT $1.01 +0.00% $44.06 B $77.98 B
3 BNB BNB $367.60 -4.82% $1.28 B $60.81 B
4 USD Coin USDC $1.00 -0.01% $2.72 B $50.42 B

Quit the driver

driver.quit()

Complete scraper

import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException

url = 'https://crypto.com/price'

chrome_options = Options()  
chrome_options.add_argument('--headless')
chrome_options.add_argument('--window-size=1920x1080')

driver = webdriver.Chrome(executable_path='./chromedriver', options=chrome_options)
driver.get(url)

time.sleep(3)

print(driver.title)

tr_xpath = '/html/body/div[1]/div[3]/div[2]/div/div[4]/div[1]/table/tbody/tr'
td_xpath = '/html/body/div[1]/div[3]/div[2]/div/div[4]/div[1]/table/tbody/tr[1]/td'

rows = 1+len(driver.find_elements_by_xpath(tr_xpath))
cols = 1+len(driver.find_elements_by_xpath(td_xpath))

print(rows)
print(cols)

name = []
symbol = []
price = []
one_day_chg = []
one_day_vol = []
market_cap = []

xpath_prefix = '/html/body/div[1]/div[3]/div[2]/div/div[4]/div[1]/table/tbody/tr['

for i in range(1, rows):
    for j in range(3, cols-2):
        try:
            value = driver.find_element_by_xpath(xpath_prefix+str(i)+']/td['+str(j)+']').text
            if j == 3:
                name.append(value.split('\n')[0])
                symbol.append(value.split('\n')[1])
            elif j == 4:
                price.append(value)
            elif j == 5:
                one_day_chg.append(value)
            elif j == 6:
                one_day_vol.append(value)
            elif j == 7:
                market_cap.append(value)
        except NoSuchElementException:
            pass

crypto_data = {'Name': name, 'Symbol': symbol, 'Price': price, '24 Hour Change': one_day_chg,\
               '24 Hour Volume': one_day_vol, 'Market Cap': market_cap}

df = pd.DataFrame(data=crypto_data)

df.head()

driver.quit()