Crawler to make pagination

1

I need a crawler that does pagination on a website.

I'm reading the source code and generating a txt in this way

public class CodFonte {

public static void crawler(String str) throws IOException {

    URL url = new URL(str);
    HttpURLConnection connection = (HttpURLConnection) url.openConnection();
    connection.setReadTimeout(15 * 1000);
    connection.connect();

    // read the output from the server
    BufferedReader reader = new BufferedReader(new InputStreamReader(
            connection.getInputStream()));
    StringBuilder stringBuilder = new StringBuilder();

    String linha = "";

    String path = System.getProperty("user.home") + "\Desktop\"; 
    String fileName = "Fonte Code.txt"; // Nome do arquivo

    FileWriter file = new FileWriter(path + fileName);
    PrintWriter gravarArq = new PrintWriter(file);
    gravarArq.println("SITE -------- " + url);

    while ((linha = reader.readLine()) != null) {
        gravarArq.println(linha);
    }
     file.close();
    reader.close();
}

}

But I need to go to the next page, the url is friendly does not change according to the form request that is via POST.

    
asked by anonymous 30.06.2015 / 21:31

1 answer

2

Doing a post request and getting the feedback, does it help?

HttpURLConnectionExample.java

package com.meupacote.app;

import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;

import javax.net.ssl.HttpsURLConnection;

public class HttpURLConnectionExample {

    private final String USER_AGENT = "Mozilla/5.0";

    public static void main(String[] args) throws Exception {

        HttpURLConnectionExample http = new HttpURLConnectionExample();

        System.out.println("\nTesting 1 - Enviar request via POST");
        http.sendPost();

    }

    // HTTP POST request
    private void sendPost() throws Exception {

        String url = "http://www.url.com/";
        URL obj = new URL(url);
        HttpsURLConnection con = (HttpsURLConnection) obj.openConnection();

        //add reuqest header
        con.setRequestMethod("POST");
        con.setRequestProperty("User-Agent", USER_AGENT);
        con.setRequestProperty("Accept-Language", "en-US,en;q=0.5");

        String urlParameters = "param1=valor1&param2=valor2";

        // Send post request
        con.setDoOutput(true);
        DataOutputStream wr = new DataOutputStream(con.getOutputStream());
        wr.writeBytes(urlParameters);
        wr.flush();
        wr.close();

        int responseCode = con.getResponseCode();
        System.out.println("\Enviando 'POST' request para a URL : " + url);
        System.out.println("Parâmetros parameters : " + urlParameters);
        System.out.println("Response Code: " + responseCode);

        BufferedReader in = new BufferedReader(
                new InputStreamReader(con.getInputStream()));
        String inputLine;
        StringBuffer response = new StringBuffer();

        while ((inputLine = in.readLine()) != null) {
            response.append(inputLine);
        }
        in.close();

        //print result
        System.out.println(response.toString());

    }

}
    
30.06.2015 / 23:03