Which way can I use regular expression to capture only some attributes of links

5

I need basic example (s) of how I can compose a tiny script to just get the contents of the attribute href .

I will simplify the explanation in two groups, they are: A and B

In Group A we have the respective links started with slash / preceded by the word - manga .

So it looks like this:

Group A

<a href='/manga?v=1234567890'>A</a>
<a href='/manga?v=1234567890'>A</a>
<a href='/manga?v=1234567890'>A</a>

etc ...

Group B

<a href='/caqui?v=1234567890'>B</a>
<a href='/caqui?v=1234567890'>B</a>
<a href='/caqui?v=1234567890'>B</a>

etc ...

  

How do you handle one of these groups by bringing forward only links to the manga tag on the page? The rest should not appear.

Warning! - All of them do not have class nor id , but it has identical values in its attribute if compared to the same group belonging to the same segment. I just want manga .

So let's go!

I've made the loop to loop through the elements on the tag a , it returns me all existing links on the page. See:

var link = document.getElementsByTagName('a');

for (var i = 0; i < link.length; i++) {

    document.getElementById("resultado").innerHTML+= "<br>"+link[i].getAttribute('href')+"<br>"

}

What I ask here is the joining of link[i].getAttribute('href') with a regular expression

function ExtrairID(url){
    var regExp = /^.*((manga\?))\??v?=?([^#\&\?]*).*/;
    var match = url.match(regExp);
    if ( match && match[7].length == 10 ){
        return match[7];
    }else{
        alert("Não foi possível extrair a ID.");
    }
}

I've already made some attempts on my own, so I ended up losing myself so I decided to come here and ask. Without further ado, I await a reply or brief explanation.

    
asked by anonymous 14.12.2017 / 06:44

4 answers

3

An option without using regex, just checking to see if in href has the word manga :

var els = document.querySelectorAll("a");
var resultado = '';
for(var x=0; x<els.length; x++){
   var href = els[x].getAttribute("href");
   resultado += href.indexOf("manga") != -1 ? href : '';
}
console.log(resultado);
<a href='/manga?v=1234567890'>A</a>
<a href='/manga?v=1234567890'>A</a>
<a href='/manga?v=1234567890'>A</a>
<a href='/caqui?v=1234567891'>B</a>
<a href='/caqui?v=1234567891'>B</a>
<a href='/caqui?v=1234567891'>B</a>

Or you can use the specific selector, without using indexOf :

var els = document.querySelectorAll("a[href*='manga']");
var resultado = '';
for(var x=0; x<els.length; x++){
   resultado += els[x].getAttribute("href");
}
console.log(resultado);
<a href='/manga?v=1234567890'>A</a>
<a href='/manga?v=1234567890'>A</a>
<a href='/manga?v=1234567890'>A</a>
<a href='/caqui?v=1234567891'>B</a>
<a href='/caqui?v=1234567891'>B</a>
<a href='/caqui?v=1234567891'>B</a>
    
15.12.2017 / 03:05
2

You can easily get all links with a href that starts with /manga with the CSS selector:

a[href^='/manga']

Using document.querySelectorAll () :

links = document.querySelectorAll("a[href^='/manga']");


In addition, all HTMLAnchorElement (as <a> ) have the property .search that returns the query string (from ? on ).

Then use this regex to get the value of the v parameter:

[?&]v=(\d+)


Code:

var links = document.querySelectorAll("a[href^='/manga']"),
    x,
    re = /[?&]v=(\d+)/,
    m;

for (x of links) {
    if (m = re.exec(x.search)) {
        console.log(m[1]);
    }
}
Grupo A e B:
<a href='/manga?v=1234567890'>A</a>
<a href='/manga?v=1234567891'>A</a>
<a href='/caqui?v=1234567892'>B</a>
<a href='/caqui?v=1234567893'>B</a>
<a href='/manga?v=1234567894'>A</a>
<a href='/caqui?v=1234567895'>B</a>
    
14.12.2017 / 22:37
2

First do test to check if < in> URL contains /manga

  • \ d Digits only
  • {10} 10-digit limit

var LINKS = document.querySelectorAll('a');
var RESULTADO = document.querySelector('#resultado');

function extrair(url) {
  // Verifica se contém '/manga'
  if (/\/manga/.test(url)) {
    RESULTADO.innerHTML += /\d{10}/g.exec(url) + "<br>";
  }
}

for (var i = 0; i < LINKS.length; i++) {
  extrair(LINKS[i].getAttribute('href'));
}
<h3>Grupo A e B</h3>
<a href='/manga?v=4533567894'>A</a>
<a href='/caqui?v=1234567490'>B</a>
<a href='/manga?v=7634567890'>A</a>
<a href='/caqui?v=1234567888'>B</a>
<a href='/manga?v=2345567899'>A</a>
<a href='/caqui?v=1234567234'>B</a>

<h3>Resultado da Expressão Regular - Grupo A</h3>
<div id="resultado"></div>

ECMAScript 5

let LINKS = document.querySelectorAll('a');
let RESULTADO = document.querySelector('#resultado');

const extrair = (url) => {
  // Verifica se contém '/manga'
  if (/\/manga/.test(url)) {
    RESULTADO.innerHTML += /\d{10}/g.exec(url) + "<br>";
  }
}

LINKS.forEach((link) => {
  extrair(link.getAttribute('href'));
});
<h3>Grupo A e B</h3>
<a href='/manga?v=4533567894'>A</a>
<a href='/caqui?v=1234567490'>B</a>
<a href='/manga?v=7634567890'>A</a>
<a href='/caqui?v=1234567888'>B</a>
<a href='/manga?v=2345567899'>A</a>
<a href='/caqui?v=1234567234'>B</a>

<h3>Resultado da Expressão Regular - Grupo A</h3>
<div id="resultado"></div>
  

Note:

    
14.12.2017 / 07:09
1

Instead of match[7] use match[3]

entry: ExtrairID("aaaa/manga?v=1234567890")

function ExtrairID(url){
    var regExp = /^.*((manga\?))\??v?=?([^#\&\?]*).*/;
    var match = url.match(regExp);
    console.log(match);
    if ( match && match[3].length == 10 ){
        return match[3];
    }else{
        alert("Não foi possível extrair a ID.");
    }
}

match result:

["/manga?v=1234567890", "/manga?", "/manga?", "1234567890", index: 0, input: "/manga?v=1234567890"]
0 : "/manga?v=1234567890"
1 : "/manga?"
2 : "/manga?"
3 : "1234567890"
index : 0
input : "/manga?v=1234567890"

return 1234567890

Edit:

var link = document.getElementsByTagName('a');
for (var i = 0; i < link.length; i++) { 
  ExtrairID(link[i].getAttribute('href')) 
}
    
14.12.2017 / 09:18