Code in infinite loop

2

I have developed the following código to get the content of a certain webpage:

import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Logger logger= Logger.getLogger("org.bonitasoft");

URL url = null;
File file = new File("C:\Backup\page.html");

def x = 1;
//while (x = 1){
    url = new URL("http://site");

    BufferedReader inFile = new BufferedReader(new InputStreamReader(url.openStream()));
    BufferedWriter outFile = new BufferedWriter(new FileWriter(file));
    String inputLine;

    while ((inputLine = inFile.readLine()) != null) {
        Matcher matcherRamo = Pattern.compile("Ramo:\s<.strong>\s.*").matcher(inputLine)
        Matcher matcherNome = Pattern.compile("consulta-associados-item-nome-fantasia").matcher(inputLine)
        Matcher matcherFone = Pattern.compile("<strong>Fone: <.strong>").matcher(inputLine)
        Matcher matcherEmail = Pattern.compile("<strong>Email: <.strong> <a href=\"mailto:.*\" class=\"link\">").matcher(inputLine)
        Matcher matcherProduto = Pattern.compile("<span class=\"float-left\">").matcher(inputLine)
        Matcher matcherSite = Pattern.compile("<strong>Site: <.strong>\s<a href=.* target=\"_blank\">").matcher(inputLine)

        if (matcherNome.find()){
            logger.info("NOME: "+inputLine.replace("<h3 class=\"consulta-associados-item-nome-fantasia\">", "").replace("</h3>", "").trim())
        }
        if (matcherFone.find()){
            logger.info("TELEFONE: "+inputLine.replace("<strong>Fone: </strong>", "").trim())
        }
        if (matcherEmail.find()){
            logger.info("EMAIL: "+inputLine.replaceFirst("<strong>Email: <.strong> <a href=\"mailto:.*\" class=\"link\">", "").replace("</a>", "").trim())
        }
        if (matcherRamo.find()){
            logger.info("RAMO: "+inputLine.replace("Ramo: </strong> ", "").replace("<strong>", "").trim())
        }
        if (matcherProduto.find()){
            logger.info("PRODUTO: "+inputLine.replace("<span class=\"float-left\">", "").replace("</span>", "").replace("<br>", " | ").trim())
        }
        if (matcherSite.find()){
            logger.info("SITE: "+inputLine.replaceFirst("<strong>Site: <.strong>\s<a href=.* target=\"_blank\">", "").replace("</a>", "").trim())
        }

        outFile.write(inputLine);
        outFile.newLine();
    }
    //x++
//}
inFile.close();
outFile.flush();
outFile.close();

He is picking up correctly the information I need but when the process is finished, finished reading the entire page and returned the filtered content he starts doing the same again, infinitely, if you can help me thank you.

    
asked by anonymous 07.11.2017 / 14:14

1 answer

0

I solved my question as follows:

import java.nio.charset.StandardCharsets;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.logging.Logger;
Logger logger= Logger.getLogger("org.bonitasoft");

URL url = null;
def y = 1
def lista = []
BufferedWriter strW = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("C:\TESTE\teste.csv"), StandardCharsets.ISO_8859_1));
lista.add("EMPRESA;TELEFONE;EMAIL;RAMO;PRODUTO;SITE\n")

    url = new URL("http://site");
    BufferedReader inFile = new BufferedReader(new InputStreamReader(url.openStream()));
    String inLine;
    if (!inFile.toString().isEmpty()){
        while ((inLine = inFile.readLine()) != null) {
            Matcher matcherRamo = Pattern.compile("Ramo:\s<.strong>\s.*").matcher(inLine)
            Matcher matcherNome = Pattern.compile("consulta-associados-item-nome-fantasia").matcher(inLine)
            Matcher matcherFone = Pattern.compile("<strong>Fone: <.strong>").matcher(inLine)
            Matcher matcherEmail = Pattern.compile("<strong>Email: <.strong> <a href=\"mailto:.*\" class=\"link\">").matcher(inLine)
            Matcher matcherProduto = Pattern.compile("<span class=\"float-left\">").matcher(inLine)
            //Matcher matcherServicos = Pattern.compile("<strong>Atividade: <.strong>").matcher(inLine)
            Matcher matcherSite = Pattern.compile("<strong>Site: <.strong>\s<a href=.* target=\"_blank\">").matcher(inLine)
            Matcher matcherNewLine = Pattern.compile("<!-- .consulta-associados-item-infos-right -->").matcher(inLine)
            if (matcherNome.find()){
                lista.add(inLine.replace("<h3 class=\"consulta-associados-item-nome-fantasia\">", "").replace("</h3>", "").trim()+";")
                //logger.info(inLine.replace("<h3 class=\"consulta-associados-item-nome-fantasia\">", "").replace("</h3>", "").trim()+";")
            }
            if (matcherFone.find()){
                lista.add(inLine.replace("<strong>Fone: </strong>", "").trim()+";")
                //logger.info(inLine.replace("<strong>Fone: </strong>", "").trim()+";")
            }
            if (matcherEmail.find()){
                lista.add(inLine.replaceFirst("<strong>Email: <.strong> <a href=\"mailto:.*\" class=\"link\">", "").replace("</a>", "").trim()+";")
                //logger.info(inLine.replaceFirst("<strong>Email: <.strong> <a href=\"mailto:.*\" class=\"link\">", "").replace("</a>", "").trim()+";")
            }
            if (matcherRamo.find()){
                lista.add(inLine.replace("Ramo: </strong> ", "").replace("<strong>", "").trim()+";")
                //logger.info(inLine.replace("Ramo: </strong> ", "").replace("<strong>", "").trim()+";")
            }
            if (matcherProduto.find()){
                lista.add(inLine.replace("<span class=\"float-left\">", "").replace("</span>", "").replace("<br>", " | ").trim()+";")
                //logger.info(inLine.replace("<span class=\"float-left\">", "").replace("</span>", "").replace("<br>", " | ").trim()+";")
            }
            if (matcherSite.find()){
                lista.add(inLine.replaceFirst("<strong>Site: <.strong>\s<a href=.* target=\"_blank\">", "").replace("</a>", "").trim()+";")
                //logger.info(inLine.replaceFirst("<strong>Site: <.strong>\s<a href=.* target=\"_blank\">", "").replace("</a>", "").trim()+";")
            }
            if (matcherNewLine.find()){
                lista.add("\n")
            }
        }
        y++
    }
    inFile.close();

//logger.info(lista.toArray().toString().replace("[", "").replace("]", "").replace(",", ""))
strW.write(lista.toArray().toString().replace("[", "").replace("]", "").replace(",", ""))
strW.close();
    
08.11.2017 / 13:52