I'm trying to put together a script for web scraping, however it's a lot of data and using selenium has not been a good solution. To do all the downloading I would demand at least 12 days. How can I optimize this script? I wonder if having a form request slows down the speed, is there another way?
from selenium import webdriver
import click
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import re
import pandas as pd
from tabulate import tabulate
import os
import time
os.chdir('c:\Users\vivian.ribeiro\desktop\python')
#define o caminho
url = "http://www.siapec.adepara.pa.gov.br/siapecest/controletransito/guiatransito/consultapublicagta.wsp"
#cria a sessao do chrome
driver = webdriver.Chrome()
driver.implicitly_wait(30) #atribui 30 segundos de espera pro selenium antes dele lancar uma excecao - AJAX permite tempo de carregamento distinto
driver.get(url) #resolver captcha manualmente para abrir a pagina
gtas=range(1500000,3500000)
datalist = []
for i in gtas:
elem = driver.find_element_by_xpath('//*[@id="div_id"]/table/tbody/tr/td[2]/input')
elem.click() #clica
elem.send_keys(i)
elem2 = driver.find_element_by_xpath('//*[@id="btnPesquisar"]')
elem2.click() #clica
elem = driver.find_element_by_xpath('//*[@id="div_id"]/table/tbody/tr/td[2]/input')
elem.clear()
soup=BeautifulSoup(driver.page_source,'lxml')
table = soup.find_all('table')[0]
df = pd.read_html(str(table),header=0)
datalist.append(df[0])
#fim do loop
#fechar o brownser
driver.quit()