I'm developing an HTML5 application that uses the Wikipedia API to give the definition of what we ask. I used the code they made available in a forum in another Stack language, and in it I noticed the use of regular expressions to eliminate certain parts of the extracted text (links, references, tags, etc.).
However, I would like to remove texts in parentheses as in the example below:
What would stay like this
Application software (application or application) is a computer program that aims to help your user to perform ...
It would look like this
Software application is a computer program that aims to help your user to play ...
Well, I have the code below and I would like some help with the regular expressions (which I have not yet learned, but I'm trying) to solve my problem:
if(d.getElementById('q').value !== "") {
algo = d.getElementById('q').value
}
var definir = function(algo) {
startFetch(algo, 1, 1000);
d.getElementById("feedback").value = "Definir > " + algo; // Palavra ou sentença a ser definida
d.getElementById('q').value = algo;
search();
d.getElementById("resposta").value = "Só um momento...";
voz();
}
var textbox = d.getElementById("resposta");
var tempscript = null, minchars, maxchars, attempts;
function startFetch(algo, minimumCharacters, maximumCharacters, isRetry) {
if (tempscript) return;
if (!isRetry) {attempts = 0;
minchars = minimumCharacters;
maxchars = maximumCharacters;
}
tempscript = d.createElement("script");
tempscript.type = "text/javascript";
tempscript.id = "tempscript";
tempscript.src = "https://pt.wikipedia.org/w/api.php?action=query&titles="
+ algo // Palavra ou sentença a ser definida
+ "&redirects="
+ "&prop=extracts"
+ "&exchars="
+ maxchars // Máximo de caracteres a ser "puxado"
+ "&exintro"
+ "&format=json"
+ "&callback=onFetchComplete"
+ "&requestid="
+ Math.floor(Math.random()*999999).toString();
d.body.appendChild(tempscript);
}
function onFetchComplete(data, algo) {
d.body.removeChild(tempscript);
tempscript = null
var s = getFirstProp(data.query.pages).extract;
s = htmlDecode(stripTags(s));
if (s.length > minchars || attempts++ > 5) {
d.getElementById("resposta").value = s;
d.getElementById("feedback").value = "Definindo...";
voz(); // Lê em voz alta a definição da palavra ou sentença
espera(); // Delay para se aproximar do carregamento do áudio
d.getElementById("log").value += "Definir > "
+ algo // Palavra ou sentença a ser definida
+ "\n"
+ s // Definição carregada
+ "\n\n";
saveHist(); // Salva a definição no log de conversa
}
else {
d.getElementById('resposta').value = "Não encontrei a definição, "+nomeDoUsuario+".";
voz();
}
}
function getFirstProp(obj) {
for (var i in obj) return obj[i];
}
function stripTags(s) {
// Abaixo está meu problema ***************************************************
return s.replace(/<\w+(\s+("[^"]*"|'[^']*'|[^>])+)?>|<\/\w+>/gi, "");
// Acima está meu problema ****************************************************
}
function htmlDecode(input){
var e = document.createElement("div"); e.innerHTML = input; return e.childNodes.length === 0 ? "" : e.childNodes[0].nodeValue;
}