I have a problem, python by default when it generates the csv file separates the columns by comma, however I need the created items to be transformed into the respective columns, but I can not do the same thing, could anyone help me? Here is the code and a demonstration of the csv return.
'# -*-
coding: utf-8 -*-
from scrapy import *
from projeto_iruan.items import *
import csv
class imprensaNacional(scrapy.Spider):
name = 'imprensaNacional'
start_urls = ['http://www.imprensanacional.gov.br/leiturajornal?data=11-09-2018&secao=dou3']
output = "output.csv"
custom_settings = {
'FEED_FORMAT': csv
}
def __init__(self):
# empty outputfile
open(self.output, "w").close()
# alternative:
# if os.path.isfile(self.output):
# os.remove(self.output)
def parse(self, response):
url_base = 'http://www.imprensanacional.gov.br/'
script = response.xpath('//*[@class="span8 hierarchy-wrapper"]//*[contains(text(),"AVISO DE LICITA")]')
for urls in script:
links = urls.xpath('.//@href').extract_first().encode('utf-8')
link_completo = url_base + links
yield Request(url=link_completo, callback=self.parseAviso)
def parseAviso(self, response):
with open(self.output, "a") as f:
writer = csv.writer(f)
conteudo = response.xpath('//*[@class="journal-content-article"]')
for info in conteudo:
titulo = info.xpath(u'.//*[@class="identifica"]/text()[contains(.,"N\xba")]').extract_first().encode('utf-8')
pregao = info.xpath(u'.//*[@class="identifica"]/text()[contains(.,"N\xba")]').extract_first().split()[3].encode('utf-8')
uasg = info.xpath(u'.//*[@class="identifica"]/text()[contains(.,"N\xba")]').extract_first().split()[6].encode('utf-8')
tipo = info.xpath('.//*[@class="identifica"]/text()[contains(.,"AVISO")]').extract_first().encode('utf-8')
pregoeiro = info.xpath('.//*[@class="assina"]/text()').extract_first().encode('utf-8')
descricao = info.xpath('.//*[@class="dou-paragraph"]/text()').extract_first().encode('utf-8')
dou = info.xpath('.//*[@class="dou-paragraph"]/text()[contains(.,"(")]').extract_first().encode('utf-8')
orgao = info.xpath('.//*[@class="orgao-dou-data"]/text()').extract_first().encode('utf-8')
data_publicacao = info.xpath('.//*[@class="publicado-dou-data"]/text()').extract_first().encode('utf-8')
edicao_dou = info.xpath('.//*[@class="edicao-dou-data"]/text()').extract_first().encode('utf-8')
secao = info.xpath('.//*[@class="secao-dou"]/text()').extract_first().encode('utf-8')
pagina = info.xpath('.//*[@class="secao-dou-data"]/text()').extract_first().encode('utf-8')
item = ImprensaNacional()
item.set_all()
item['Titulo'] = titulo
item['Tipo'] = tipo
item['Pregao'] = pregao
item['UASG'] = uasg
item['DOU'] = dou
item['DataPublicacao'] = data_publicacao
item['EdicaoDou'] = edicao_dou
item['SecaoDou'] = secao
item['Pagina'] = pagina
item['Orgao'] = orgao
item['Pregoeiro'] = pregoeiro
item['Url'] = response.url
item['Descricao'] = descricao
writer.writerow([ titulo,tipo,pregao,uasg,dou,data_publicacao,edicao_dou,secao,pagina,orgao,pregoeiro,response.url,descricao])
yield {'Titulo': titulo, 'Tipo': tipo, 'Pregao': pregao, 'UASG': uasg,
'DOU': dou, 'DataPublicacao': data_publicacao, 'Edicao': edicao_dou,
'Secao': secao, 'Pagina': pagina, 'Orgao': orgao, 'Pregoeiro': pregao,
'Url': response.url, 'Descricao': descricao}
'