Copy HTML from a page display in a textview

2

I need a direction.

What method do you use to "download" the html of a page (texts only) for example: link and displayed it in a textView?

    
asked by anonymous 14.03.2018 / 04:46

1 answer

1

Considering the example url, we can do as follows:

By analyzing the page, I considered that the content is within DIV with id mw-content-text .

So, let's download the page, through the Jsoup library, let's search for this DIV and get the text

Here's an example:

Add library (build.grad (app)):

implementation 'org.jsoup:jsoup:1.11.2'

Add permission (AndroidManifest.xml):

 <uses-permission android:name="android.permission.INTERNET"/>

MainActivity.java

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;

public class MainActivity extends AppCompatActivity {


    private TextView txtView;
    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_main);
        txtView = TextView.class.cast(findViewById(R.id.txtView));
        new Dowload().execute("https://pt.wikipedia.org/wiki/Tancredo_Neves");
    }

    class Dowload extends AsyncTask<String, Void, String>{
        @Override
        protected String doInBackground(String... strings) {

            /**
             * Pegamos o primeiro item da lista....
             */
            final String txtUrl = strings[0];

            try{
                /**
                 * Criamos a URL
                 */
                final URL url = new URL(txtUrl);
                /**
                 * Criamos a conexão com a URL
                 */
                HttpURLConnection con = HttpURLConnection.class.cast(url.openConnection());
                /**
                 * Infromamos o método de requisição
                 */
                con.setRequestMethod("GET");
                /**
                 * Se o código de resposta for diferente de OK (200)
                 * Então retornamos null;
                 */
                if(HttpURLConnection.HTTP_OK != con.getResponseCode()){
                    return null;
                }

                /**
                 * Vamos copiar o conteúdo do response....
                 */
                final BufferedReader reader = new BufferedReader(new InputStreamReader(con.getInputStream()));
                String line;
                final StringBuffer buffer = new StringBuffer();
                while( (line = reader.readLine()) != null ){
                    buffer.append(line);
                }
                /**
                 * Vamos transformar a String contendo o html em um objeto Document do Jsoup
                 */
                final Document document = Jsoup.parse(buffer.toString());
                /**
                 * Vamos procurar as DIV's que tenham id (#) igual a mw-content-text
                 */
                final Elements divs = document.select("div#mw-content-text");

                /**
                 * Se nulo, ou vazio, não encontrou, então retorna nulo!
                 */
                if(null == divs || divs.size() == 0 ){
                    return null;
                }
                /**
                 * pegamos o primeiro elemento
                 */
                final Element div = divs.first();
                /**
                 * Pegamos o texto deste elemento
                 */
                final String txt = div.text();
                return txt;

            }catch (final Exception e){
                e.printStackTrace();
                return null;
            }
        }

        @Override
        protected void onPostExecute(String s) {
            if(null != s){
                txtView.setText(s);
            }else{
                txtView.setText("Não foi possível carregar o conteúdo!");
            }
        }
    }
}
    
16.03.2018 / 19:33