I created a process that takes content from a certain web page filters what is necessary and generates a file *.csv
of this content, you work correctly however the words that have some accentuation ends up giving error in generating time, it is possible to set the UTF-8 format for the created file who knows?
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.logging.Logger;
Logger logger= Logger.getLogger("org.bonitasoft");
URL url = null;
def y = 1
def lista = []
BufferedWriter strW = new BufferedWriter(new FileWriter("C:\TESTE\teste.csv"))
lista.add("EMPRESA;TELEFONE;EMAIL;RAMO;PRODUTO;SITE\n")
while (y <= 2){
url = new URL("http://site");
BufferedReader inFile = new BufferedReader(new InputStreamReader(url.openStream()));
String inLine;
if (!inFile.toString().isEmpty()){
while ((inLine = inFile.readLine()) != null) {
Matcher matcherRamo = Pattern.compile("Ramo:\s<.strong>\s.*").matcher(inLine)
Matcher matcherNome = Pattern.compile("consulta-associados-item-nome-fantasia").matcher(inLine)
Matcher matcherFone = Pattern.compile("<strong>Fone: <.strong>").matcher(inLine)
Matcher matcherEmail = Pattern.compile("<strong>Email: <.strong> <a href=\"mailto:.*\" class=\"link\">").matcher(inLine)
Matcher matcherProduto = Pattern.compile("<span class=\"float-left\">").matcher(inLine)
Matcher matcherSite = Pattern.compile("<strong>Site: <.strong>\s<a href=.* target=\"_blank\">").matcher(inLine)
if (matcherNome.find()){
lista.add(inLine.replace("<h3 class=\"consulta-associados-item-nome-fantasia\">", "").replace("</h3>", "").trim()+";")
}
if (matcherFone.find()){
lista.add(inLine.replace("<strong>Fone: </strong>", "").trim()+";")
}
if (matcherEmail.find()){
lista.add(inLine.replaceFirst("<strong>Email: <.strong> <a href=\"mailto:.*\" class=\"link\">", "").replace("</a>", "").trim()+";")
}
if (matcherRamo.find()){
lista.add(inLine.replace("Ramo: </strong> ", "").replace("<strong>", "").trim()+";")
}
if (matcherProduto.find()){
lista.add(inLine.replace("<span class=\"float-left\">", "").replace("</span>", "").replace("<br>", " | ").trim()+";")
}
if (matcherSite.find()){
lista.add(inLine.replaceFirst("<strong>Site: <.strong>\s<a href=.* target=\"_blank\">", "").replace("</a>", "").trim()+"\n")
}
}
y++
}
inFile.close();
}
strW.write(lista.toArray().toString().replace("[", "").replace("]", "").replace(",", ""))
strW.close();