Crawler to log in to the São Paulo tax note site

1

What I have so far is this:

package br.com.crawler;

import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.InputStreamReader;
import java.net.URL;

import javax.net.ssl.HttpsURLConnection;

public class Crawler {

    private final String USER_AGENT = "Mozilla/5.0";

    public static void main(String[] args) throws Exception {

        Crawler http = new Crawler();

        System.out.println("\nTesting 1 - Enviar request via POST");
        http.sendPost();

    }

    // HTTP POST request
    private void sendPost() throws Exception {

        String url = "https://www.nfp.fazenda.sp.gov.br/login.aspx";
        URL obj = new URL(url);
        HttpsURLConnection con = (HttpsURLConnection) obj.openConnection();

        //add reuqest header
        con.setRequestMethod("POST");
        con.setRequestProperty("User-Agent", USER_AGENT);
        con.setRequestProperty("Accept-Language", "en-US,en;q=0.5");

             String urlParameters = "__EVENTVALIDATION=&"
            + "__EVENTARGUMENT=&"
            + "__VIEWSTATE=/wEPDwUKMTMwMTM2MTg2MA9kFgJmD2QWAgIBD2QWCgIDDxYCHgVjbGFzcwUYYmFycmFBY2Vzc2liaWxpZGFkZUxvZ2luFgQCAQ8WAh4HVmlzaWJsZWhkAgMPFgIfAWdkAgQPFgIfAWhkAgYPDxYCHgRUZXh0BRROb3RhIEZpc2NhbCBQYXVsaXN0YWRkAggPFgIfAWhkAgoPZBYCZg9kFgJmD2QWBAIJDw8WAh8BZ2RkAg8PZBYCAgUPZBYCAgEPZBYCAgEPDxYEHghUYWJJbmRleAENAB4JTWF4TGVuZ3RoAgRkZBgBBR5fX0NvbnRyb2xzUmVxdWlyZVBvc3RCYWNrS2V5X18WCAUtY3RsMDAkQ29udGV1ZG9QYWdpbmEkTG9naW4xJHJkQnRuQ29udHJpYnVpbnRlBTBjdGwwMCRDb250ZXVkb1BhZ2luYSRMb2dpbjEkcmRCdG5OYW9Db250cmlidWludGUFLWN0bDAwJENvbnRldWRvUGFnaW5hJExvZ2luMSRyZEJ0bkNvbnRhYmlsaXN0YQUrY3RsMDAkQ29udGV1ZG9QYWdpbmEkTG9naW4xJHJkQnRuRmF6ZW5kYXJpbwUnY3RsMDAkQ29udGV1ZG9QYWdpbmEkTG9naW4xJHJkQnRuUHJvY29uBTZjdGwwMCRDb250ZXVkb1BhZ2luYSRMb2dpbjEkcmRCdG5BZHZvZ2Fkb1JlcHJlc2VudGFudGUFL2N0bDAwJENvbnRldWRvUGFnaW5hJExvZ2luMSRpbWdCdG5BY2Vzc29DZXJ0Q1BGBTBjdGwwMCRDb250ZXVkb1BhZ2luYSRMb2dpbjEkaW1nQnRuQWNlc3NvQ2VydENOUEo=&"
            + "ctl00$ConteudoPagina$Login1$rblTipo=rdBtnNaoContribuinte&"
            + "ConteudoPagina$Login1$UserName="+user+"&"
            + "ctl00$ConteudoPagina$Login1$Password="+password;

        // Send post request
        con.setDoOutput(true);
        DataOutputStream wr = new DataOutputStream(con.getOutputStream());
        wr.writeBytes(urlParameters);
        wr.flush();
        wr.close();

        int responseCode = con.getResponseCode();
        System.out.println("Enviando 'POST' request para a URL : " + url);
        System.out.println("Parâmetros parameters : " + urlParameters);
        System.out.println("Response Code: " + responseCode);

        BufferedReader in = new BufferedReader(
                new InputStreamReader(con.getInputStream()));
        String inputLine;
        StringBuffer response = new StringBuffer();

        while ((inputLine = in.readLine()) != null) {
            response.append(inputLine);
        }
        in.close();

        //print result
        System.out.println(response.toString());

    }

}

My question is that I do not know which parameters to pass.

    
asked by anonymous 02.07.2015 / 17:04

1 answer

1

Try this:

import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.ProtocolException;
import java.net.URL;
import java.util.stream.IntStream;

import javax.net.ssl.HttpsURLConnection;

public class Crawler {

    private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36";

    private final String user;
    private final String password;
    private final TipoLogin tipo;

    public static enum TipoLogin {
        CONTRIBUINTE_ICMS("rdBtnContribuinte"),
        CONSUMIDOR("rdBtnNaoContribuinte"),
        CONTABILISTA("rdBtnContabilista"),
        FAZENDARIO("rdBtnFazendario"),
        PROCON("rdBtnProcon"),
        REPRESENTANTE_CONTRIBUINTE("rdBtnAdvogadoRepresentante");

        private final String radio;

        private TipoLogin(String radio) {
            this.radio = radio;
        }

        public String getRadio() {
            return radio;
        }
    }

    public static void main(String[] args) throws IOException {
        Crawler http = new Crawler("12345678901", "$enh4", TipoLogin.CONTRIBUINTE_ICMS);
        http.sendPost();
    }

    public Crawler(String user, String password, TipoLogin tipo) {
        this.user = user;
        this.password = password;
        this.tipo = tipo;
    }

    // HTTP POST request
    private void sendPost() throws IOException {
        URL url;
        try {
            url = new URL("https://www.nfp.fazenda.sp.gov.br/login.aspx");
        } catch (MalformedURLException e) {
            throw new AssertionError(e);
        }

        HttpsURLConnection get = (HttpsURLConnection) url.openConnection();
        get.setRequestProperty("User-Agent", USER_AGENT);
        get.setRequestProperty("Accept-Language", "en-US,en;q=0.5");
        get.getResponseCode();
        String page = download(get.getInputStream());

        HttpsURLConnection con = (HttpsURLConnection) url.openConnection();

        try {
            con.setRequestMethod("POST");
        } catch (ProtocolException e) {
            throw new AssertionError(e);
        }
        con.setRequestProperty("User-Agent", USER_AGENT);
        con.setRequestProperty("Accept-Language", "en-US,en;q=0.5");

        String urlParameters = "__EVENTTARGET=" + buscarCampo(page, "__EVENTTARGET")
                + "&__EVENTARGUMENT=" + buscarCampo(page, "__EVENTARGUMENT")
                + "&__VIEWSTATE=" + buscarCampo(page, "__VIEWSTATE")
                + "&__EVENTVALIDATION=" + buscarCampo(page, "__EVENTVALIDATION")
                + "&ctl00$ddlTipoUsuario=#rdBtnNaoContribuinte"
                + "&ctl00$UserNameAcessivel="
                + "&ctl00$PasswordAcessivel="
                + "&ctl00$ConteudoPagina$Login1$rblTipo=" + tipo.getRadio()
                + "&ctl00$ConteudoPagina$Login1$UserName=" + escapeURI(user)
                + "&ctl00$ConteudoPagina$Login1$Password=" + escapeURI(password);

        System.out.println("Parâmetros parameters : " + urlParameters);

        // Send post request
        con.setDoOutput(true);
        try (DataOutputStream wr = new DataOutputStream(con.getOutputStream())) {
            wr.writeBytes(urlParameters);
            wr.flush();
        }

        int responseCode = con.getResponseCode();
        System.out.println("Enviando 'POST' request para a URL : " + url);
        System.out.println("Response Code: " + responseCode);

        String response = download(con.getInputStream());

        //print result
        System.out.println(response);
    }

    private static String download(InputStream is) throws IOException {
        StringBuilder response = new StringBuilder(1024);
        try (BufferedReader in = new BufferedReader(new InputStreamReader(is))) {
            String inputLine;

            while ((inputLine = in.readLine()) != null) {
                response.append(inputLine);
            }
        }
        return response.toString();
    }

    private static String buscarCampo(String html, String campo) {
        String input = "<input type=\"hidden\" name=\"" + campo + "\" id=\"" + campo + "\" value=\"";
        int a = html.indexOf(input);
        if (a == -1) return "";
        int b = html.indexOf('\"', a + input.length());
        return html.substring(a + input.length(), b);
    }

    private static final String[] HEX = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F"};

    private static String escapeURI(byte c) {
        boolean ok = (c >= 'A' && c <= 'Z')
                || (c >= 'a' && c <= 'z')
                || (c >= '0' && c <= '9')
                || c == '-' || c == '.' || c == '_' || c == '~'
                || c == '$' || c == '#';
        return ok ? String.valueOf((char) c) : "%" + HEX[c >>> 4] + HEX[c & 0xF];
    }

    public static String escapeURI(String in) {
        StringBuilder sb = new StringBuilder(in.length() * 2);
        try {
            byte[] bytes = in.getBytes("UTF-8");
            IntStream.range(0, bytes.length).mapToObj(i -> escapeURI(bytes[i])).forEach(sb::append);
        } catch (UnsupportedEncodingException e) {
            throw new AssertionError(e);
        }
        return sb.toString();
    }
}

These __EVENTTARGET , __EVENTARGUMENT , __VIEWSTATE , and __EVENTVALIDATION fields are problematic. Possibly these fields may have random values defined by the server that it expects to read back. Because of this, first I make a GET on the page to get the value of these fields and then I do the POST with the value of all the fields.

Note the ctl00$ddlTipoUsuario , ctl00$UserNameAcessivel , and ctl00$PasswordAcessivel fields. These fields are at the top of the form and are sent along with the request, even if they are not needed.

At the end, the form fields that interest you (whose values are passed in the constructor call within main(String[]) ) are these:

  • The ctl00$ConteudoPagina$Login1$rblTipo that corresponds to the buttons radios, and can be rdBtnContribuinte , rdBtnNaoContribuinte , rdBtnContabilista , rdBtnFazendario , rdBtnProcon or rdBtnAdvogadoRepresentante . >

  • The ctl00$ConteudoPagina$Login1$UserName which is the name of the user.

  • The ctl00$ConteudoPagina$Login1$Password is the password.

Note that I'm using a user's encoding and password to "escape" special characters.

There are probably more things I left behind. Let me know if you can or not.

    
03.07.2015 / 19:08