I'm using an example I found called AndroidJsoup to get the source HTML
of a certain page, but I'm not getting just the code snippet I want that is in a certain%
In short, AndroidJsoup should run, taking <script>
, applying HTML
and returning regex
Follow my source along with the reference and example of the String resultado1
page to be taken. Also the HTML
removed from my script regex
.
Android MainActivity.java
package com.survivingwithandroid.jsoup;
import android.os.AsyncTask;
import android.os.Bundle;
import android.support.v7.app.ActionBarActivity;
import android.util.Log;
import android.view.Menu;
import android.view.MenuItem;
import android.view.View;
import android.widget.Button;
import android.widget.EditText;
import org.jsoup.Jsoup;
import org.jsoup.nodes.DataNode;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class MainActivity extends ActionBarActivity {
private EditText respText;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
final EditText edtUrl = (EditText) findViewById(R.id.edtURL);
Button btnGo = (Button) findViewById(R.id.btnGo);
respText = (EditText) findViewById(R.id.edtResp);
btnGo.setOnClickListener(new View.OnClickListener() {
@Override
public void onClick(View view) {
String siteUrl = edtUrl.getText().toString();
( new ParseURL() ).execute(new String[]{siteUrl});
}
});
}
@Override
public boolean onCreateOptionsMenu(Menu menu) {
// Inflate the menu; this adds items to the action bar if it is present.
getMenuInflater().inflate(R.menu.main, menu);
return true;
}
@Override
public boolean onOptionsItemSelected(MenuItem item) {
// Handle action bar item clicks here. The action bar will
// automatically handle clicks on the Home/Up button, so long
// as you specify a parent activity in AndroidManifest.xml.
int id = item.getItemId();
if (id == R.id.action_settings) {
return true;
}
return super.onOptionsItemSelected(item);
}
private class ParseURL extends AsyncTask<String, Void, String> {
@Override
protected String doInBackground(String... strings) {
StringBuffer buffer = new StringBuffer();
try {
Log.d("JSwa", "Connecting to ["+strings[0]+"]");
Document doc = Jsoup.connect(strings[0]).get();
Log.d("JSwa", "Connected to ["+strings[0]+"]");
// Get document (HTML page) title
String title = doc.title();
Log.d("JSwA", "Title ["+title+"]");
buffer.append("Title: " + title + "\r\n");
// Get meta info
Elements metaElems = doc.select("meta");
buffer.append("META DATA\r\n");
for (Element metaElem : metaElems) {
String name = metaElem.attr("name");
String content = metaElem.attr("content");
buffer.append("name ["+name+"] - content ["+content+"] \r\n");
}
Elements topicList = doc.select("h2.topic");
buffer.append("Topic list\r\n");
for (Element topic : topicList) {
String data = topic.text();
buffer.append("Data [" + data + "] \r\n");
}
//==========
Elements scriptElements = doc.getElementsByTag("script");
buffer.append("Variavel resultado1\r\n");
for (Element element :scriptElements ){
for (DataNode node : element.dataNodes()) {
System.out.println(node.getWholeData());
String scriptdata = node.getWholeData();
buffer.append("StriptData [" + scriptdata + "] \r\n");
//String resultado1
}
System.out.println("-------------------");
}
//==========
}
catch(Throwable t) {
t.printStackTrace();
}
return buffer.toString();
}
@Override
protected void onPreExecute() {
super.onPreExecute();
}
@Override
protected void onPostExecute(String s) {
super.onPostExecute(s);
respText.setText(s);
}
}
}
Home HTML example page
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<script type="text/javascript">
function var1() {
...etc...
}
</script>
<title>Link das Pessoas</title>
</head>
<body>
<div>Conteudo</div>
<script>
function(...)
etc valorM = (valores de xyz);
etc valorE = (valores de xy);
pegavalor(function() {
...funcoes_diversars(Conteudo dinamico e estatico...http://arquivosdofulano.com/pessoas
...Conteudo dinamico e estatico)
})
</script>
<div>Conteudo #2</div>
<script type="text/javascript">
var google...
</script>
</html>
Regex to get the value of php
:
/(([http]+[https]:\/\/)(.*?).(com\/pessoas))/
Note: I removed from php, I know if I need to change something in this regex to java.
If possible a code that would allow me to add other resultado1
to capture other values in a string. Ex regex
...
Source AndroidJsoup
Source Source Code