Scrape your name from first page of google
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################## | |
This script helps in scraping data from google | |
############################################### | |
from splinter import Browser | |
import pandas as pd | |
# open a browser | |
browser = Browser('firefox') | |
# Width, Height | |
browser.driver.set_window_size(640, 480) | |
browser.visit('https://www.google.com') | |
# I recommend using single quotes | |
search_bar_xpath = '//*[@id="lst-ib"]' | |
# I recommend using single quotes | |
search_bar_xpath = '//*[@id="lst-ib"]' | |
# index 0 to select from the list | |
search_bar = browser.find_by_xpath(search_bar_xpath)[0] | |
# Change the text in bold in the below line of code to desired text | |
search_bar.fill("python") | |
# Now let's set up code to click the search button! | |
search_button_xpath = '//*[@id="tsf"]/div[2]/div[3]/center/input[1]' | |
search_button = browser.find_by_xpath(search_button_xpath)[0] | |
search_button.click() | |
search_results_xpath = '//h3[@class="r"]/a' # simple, right? | |
search_results = browser.find_by_xpath(search_results_xpath) | |
scraped_data = [] | |
for search_result in search_results: | |
title = search_result.text.encode('utf8') # trust me | |
link = search_result["href"] | |
scraped_data.append((title, link)) # put in tuples | |
df = pd.DataFrame(data=scraped_data, columns=["Title", "Link"]) | |
df.to_csv("python.csv") | |
print("Job done") |
Comments
Post a Comment