Transparent data download with application form

0

I'm trying to put together a script for web scraping, however it's a lot of data and using selenium has not been a good solution. To do all the downloading I would demand at least 12 days. How can I optimize this script? I wonder if having a form request slows down the speed, is there another way?

from selenium import webdriver
import click
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import re
import pandas as pd
from tabulate import tabulate
import os
import time

os.chdir('c:\Users\vivian.ribeiro\desktop\python') 

#define o caminho
url = "http://www.siapec.adepara.pa.gov.br/siapecest/controletransito/guiatransito/consultapublicagta.wsp"

#cria a sessao do chrome
driver = webdriver.Chrome()
driver.implicitly_wait(30) #atribui 30 segundos de espera pro selenium antes dele lancar uma excecao - AJAX permite tempo de carregamento distinto

driver.get(url) #resolver captcha manualmente para abrir a pagina

gtas=range(1500000,3500000)
datalist = []

for i in gtas: 
    elem = driver.find_element_by_xpath('//*[@id="div_id"]/table/tbody/tr/td[2]/input')
    elem.click() #clica
    elem.send_keys(i)
    elem2 = driver.find_element_by_xpath('//*[@id="btnPesquisar"]')
    elem2.click() #clica
    elem = driver.find_element_by_xpath('//*[@id="div_id"]/table/tbody/tr/td[2]/input')
    elem.clear()
    soup=BeautifulSoup(driver.page_source,'lxml')
    table = soup.find_all('table')[0]
    df = pd.read_html(str(table),header=0)
    datalist.append(df[0])

#fim do loop

#fechar o brownser
driver.quit()
    
asked by anonymous 18.11.2018 / 19:47

0 answers