JavaScript check broken links

1

Is it possible to check broken links in an html file using javascript and if broken is flagging with different color, or flag links that are not broken with a specific color like when they are visited?

I have this script that lists the links on a page, maybe the verification can be implemented on it.

javascript: var w = window.open('', '', 'height=300, width=300');
var a = document.getElementsByTagName('a');
var b = a.length; 

if(b != 0){ 
  w.document.write('<h1> Lista de Links </h1> '); 

  for (i = 0; i < b; i++){ 
    w.document.write('<pre><a href=\'' + a[i] + '\'>' + a[i] + '</a>' + '</pre> ');
  }

} else{ 
    w.document.write('Nenhum link encontrado');
}
    
asked by anonymous 26.06.2018 / 04:35

3 answers

2
  

As reported by the AP, the links are from the same domain, so there is no   problems with CORS.

Use XMLHttpRequest() by traversing each <a> tag. As Ajax makes a request, in the code below I put a time of 1 second for each query, but you increase or decrease (or even remove setTimeout ). The explanations are all in the code:

document.addEventListener("DOMContentLoaded", function(){ // verifica se o DOM foi carregado
   var links = document.body.querySelectorAll("a"); // seleciona todas as tags <a>
   var nums_links = links.length; // conta a quantidade
   var conta = 0; // contador

   function checaLinks(){
      var url_ = links[conta].href; // seta o href do link como a URL ser consultada
      var http = new XMLHttpRequest(); // cria o objeto XHR
      http.open("GET", url_, true); // consulta o link
      http.onreadystatechange = function(){ // retorno do Ajax
         if(http.readyState == 4){ // retorno completado
            if(http.status != 200){ // se for diferente de 200, é porque a URL não existe (cód. HTTP 404)
               links[conta].style.color = "red"; // muda a cor do link
            }

            if(conta < nums_links-1){ // verifica se o contador ainda está dentro do range do número de links
               setTimeout(checaLinks, 1000); // chama novamente a função após 1 segundo
            }
            conta++; // incrementa o contador
         }
      }
      http.send(null);
   }

   checaLinks();  // chama a função
});
    
27.06.2018 / 21:05
3

Good evening!

You can request the link from the site in question and check your return

Example ..

$.ajax({
    url: "https://www.seusite.com",
    type: "HEAD"
}).done(function() { 
    alert('Site existe!');
}).fail(function() { 
    alert('Site não existe!');
})

According to the return of the request, you find out if it is On or Off

I hope I have helped:)

    
27.06.2018 / 03:31
1

If the links are not for the same domain, you will have problems with the @dvd solution there, due to CORS . In that case, just with javascript you will not be able to. You will need a proxy in the middle of the path. I found one that practically answers in github called php-cross-domain-proxy , just needed some modifications, which I'll put here.

Copy the two files to the server, here I used XAMPP in the <XAMPP>\htdocs\testes folder. This one adds one class to the broken links and another to the ok links to modify their background after clicking the check links button.

proxy.php

<?php

/**
 * AJAX Cross Domain (PHP) Proxy 0.8
 * Copyright (C) 2016 Iacovos Constantinou (https://github.com/softius)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

/**
 * Enables or disables filtering for cross domain requests.
 * Recommended value: true
 */
define('CSAJAX_FILTERS', false);

/**
 * If set to true, $valid_requests should hold only domains i.e. a.example.com, b.example.com, usethisdomain.com
 * If set to false, $valid_requests should hold the whole URL ( without the parameters ) i.e. http://example.com/this/is/long/url/
 * Recommended value: false (for security reasons - do not forget that anyone can access your proxy)
 */
define('CSAJAX_FILTER_DOMAIN', false);

/**
 * Set debugging to true to receive additional messages - really helpful on development
 */
define('CSAJAX_DEBUG', true);

/**
 * A set of valid cross domain requests
 */
$valid_requests = array(
    // 'example.com'
);

/**
 * Set extra multiple options for cURL
 * Could be used to define CURLOPT_SSL_VERIFYPEER & CURLOPT_SSL_VERIFYHOST for HTTPS
 * Also to overwrite any other options without changing the code
 * See http://php.net/manual/en/function.curl-setopt-array.php
 */
$curl_options = array(
    // CURLOPT_SSL_VERIFYPEER => false,
    // CURLOPT_SSL_VERIFYHOST => 2,
    //// parece não ser seguro isso, mas é mais prático ;)
    CURLOPT_SSL_VERIFYHOST => 0,
    CURLOPT_SSL_VERIFYPEER => 0,
);

/* * * STOP EDITING HERE UNLESS YOU KNOW WHAT YOU ARE DOING * * */

// identify request headers
$request_headers = array( );
foreach ($_SERVER as $key => $value) {
    if (strpos($key, 'HTTP_') === 0  ||  strpos($key, 'CONTENT_') === 0) {
        $headername = str_replace('_', ' ', str_replace('HTTP_', '', $key));
        $headername = str_replace(' ', '-', ucwords(strtolower($headername)));
        if (!in_array($headername, array( 'Host', 'X-Proxy-Url' ))) {
            $request_headers[] = "$headername: $value";
        }
    }
}

// identify request method, url and params
$request_method = $_SERVER['REQUEST_METHOD'];
if ('GET' == $request_method) {
    $request_params = $_GET;
} elseif ('POST' == $request_method) {
    $request_params = $_POST;
    if (empty($request_params)) {
        $data = file_get_contents('php://input');
        if (!empty($data)) {
            $request_params = $data;
        }
    }
} elseif ('PUT' == $request_method || 'DELETE' == $request_method) {
    $request_params = file_get_contents('php://input');
} else {
    $request_params = null;
}

// Get URL from 'csurl' in GET or POST data, before falling back to X-Proxy-URL header.
if (isset($_REQUEST['csurl'])) {
    $request_url = urldecode($_REQUEST['csurl']);
} elseif (isset($_SERVER['HTTP_X_PROXY_URL'])) {
    $request_url = urldecode($_SERVER['HTTP_X_PROXY_URL']);
} else {
    header($_SERVER['SERVER_PROTOCOL'] . ' 404 Not Found');
    header('Status: 404 Not Found');
    $_SERVER['REDIRECT_STATUS'] = 404;
    exit;
}

$p_request_url = parse_url($request_url);

// csurl may exist in GET request methods
if (is_array($request_params) && array_key_exists('csurl', $request_params)) {
    unset($request_params['csurl']);
}

// ignore requests for proxy :)
if (preg_match('!' . $_SERVER['SCRIPT_NAME'] . '!', $request_url) || empty($request_url) || count($p_request_url) == 1) {
    csajax_debug_message('Invalid request - make sure that csurl variable is not empty');
    exit;
}

// check against valid requests
if (CSAJAX_FILTERS) {
    $parsed = $p_request_url;
    if (CSAJAX_FILTER_DOMAIN) {
        if (!in_array($parsed['host'], $valid_requests)) {
            csajax_debug_message('Invalid domain - ' . $parsed['host'] . ' does not included in valid requests');
            exit;
        }
    } else {
        $check_url = isset($parsed['scheme']) ? $parsed['scheme'] . '://' : '';
        $check_url .= isset($parsed['user']) ? $parsed['user'] . ($parsed['pass'] ? ':' . $parsed['pass'] : '') . '@' : '';
        $check_url .= isset($parsed['host']) ? $parsed['host'] : '';
        $check_url .= isset($parsed['port']) ? ':' . $parsed['port'] : '';
        $check_url .= isset($parsed['path']) ? $parsed['path'] : '';
        if (!in_array($check_url, $valid_requests)) {
            csajax_debug_message('Invalid domain - ' . $request_url . ' does not included in valid requests');
            exit;
        }
    }
}

// append query string for GET requests
if ($request_method == 'GET' && count($request_params) > 0 && (!array_key_exists('query', $p_request_url) || empty($p_request_url['query']))) {
    $request_url .= '?' . http_build_query($request_params);
}

// let the request begin
$ch = curl_init($request_url);
curl_setopt($ch, CURLOPT_HTTPHEADER, $request_headers);   // (re-)send headers
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);     // return response
curl_setopt($ch, CURLOPT_HEADER, true);       // enabled response headers
// add data for POST, PUT or DELETE requests
if ('POST' == $request_method) {
    $post_data = is_array($request_params) ? http_build_query($request_params) : $request_params;
    curl_setopt($ch, CURLOPT_POST, true);
    curl_setopt($ch, CURLOPT_POSTFIELDS,  $post_data);
} elseif ('PUT' == $request_method || 'DELETE' == $request_method) {
    curl_setopt($ch, CURLOPT_CUSTOMREQUEST, $request_method);
    curl_setopt($ch, CURLOPT_POSTFIELDS, $request_params);
}

// Set multiple options for curl according to configuration
if (is_array($curl_options) && 0 <= count($curl_options)) {
    curl_setopt_array($ch, $curl_options);
}

// retrieve response (headers and content)
$response = curl_exec($ch);
// dava erro quando tinha problema no curl
if ($response === false) {
  header($_SERVER['SERVER_PROTOCOL'] . ' 404 Not Found');
  header('Status: 404 Not Found');
  $_SERVER['REDIRECT_STATUS'] = 404;
  csajax_debug_message('error: ' . curl_error($ch));
  curl_close($ch);
  exit;
}
curl_close($ch);

// split response to header and content
list($response_headers, $response_content) = preg_split('/(\r\n){2}/', $response, 2);

// (re-)send the headers
$response_headers = preg_split('/(\r\n){1}/', $response_headers);
foreach ($response_headers as $key => $response_header) {
    // Rewrite the 'Location' header, so clients will also use the proxy for redirects.
    if (preg_match('/^Location:/', $response_header)) {
        list($header, $value) = preg_split('/: /', $response_header, 2);
        $response_header = 'Location: ' . $_SERVER['REQUEST_URI'] . '?csurl=' . $value;
    }
    if (!preg_match('/^(Transfer-Encoding):/', $response_header)) {
        header($response_header, false);
    }
}

// finally, output the content
print($response_content);

function csajax_debug_message($message)
{
    if (true == CSAJAX_DEBUG) {
        print $message . PHP_EOL;
    }
}

check-links.html

<!DOCTYPE html>
<html>

<head>
  <meta charset="UTF-8">
  <title>Checagem de links</title>
  <meta name="viewport" content="width=device-width, initial-scale=1">

  <link rel='stylesheet prefetch' href='https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap.min.css'>
  <link rel='stylesheet prefetch' href='https://cdnjs.cloudflare.com/ajax/libs/animate.css/3.2.3/animate.min.css'>
  <link rel='stylesheet prefetch' href='https://maxcdn.bootstrapcdn.com/font-awesome/4.5.0/css/font-awesome.min.css'>

  <style>
  .link-ok {
    background: #afa;
  }
  .link-quebrado {
    background: #faa;
  }
  </style>
</head>

<body>
  <div class='container'>
    <div class='jumbotron text-center'>
      <h1>Checagem de links <i class="fa fa-link"></i></h1>
      <p>Verificando quais links da página estão quebrados ou não.</p>
    </div>
    <!-- jumbotron -->
    <p>Aqui no codepen não consigo fazer nenhum GET, então está ficando tudo vermelho :P.</p>
    <p><button id="verificar" class="btn btn-default">Verificar Links</button></p>
    <p>
      <ul>
        <li><a target="_blank" href="https://codepen.io/dudaskank">https://codepen.io/dudaskank</a></li>
        <li><a target="_blank" href="https://codepen.io/werueowruoiuweiuwoieuroiweuoirweuoiruweoiuroiweuroiewu">https://codepen.io/werueowruoiuweiuwoieuroiweuoirweuoiruweoiuroiweuroiewu</a></li>
        <li><a target="_blank" href="https://www.google.com">https://www.google.com</a></li>
        <li><a target="_blank" href="https://www.googlex.com">https://www.googlex.com (não existe, da erro no curl e o proxy manda um 404)</a></li>
        <li><a target="_blank" href="https://www.globo.com">https://www.globo.com</a></li>
        <li><a target="_blank" href="http://dudaskank.com">http://dudaskank.com</a></li>
        <li><a target="_blank" href="http://localhost/testes/nao-existe.html">http://localhost/testes/nao-existe.html</a></li>
        <li><a target="_blank" href="http://localhost/testes/checar-links.html">http://localhost/testes/checar-links.html</a>(sou eu mesmo)</li>
        <li><a target="_blank" href="checar-links.html">checar-links.html</a>(sou eu mesmo de novo, só pra mostrar que funciona com caminhos relativos)</li>
        <li><a target="_blank" href="/testes/checar-links.html">/testes/checar-links.html</a>(outra vez)</li>
      </ul>
    </p>

  </div>
  <!-- content -->
  <script src='http://cdnjs.cloudflare.com/ajax/libs/jquery/2.1.3/jquery.min.js'></script>
  <script src='http://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/js/bootstrap.min.js'></script>
  <script src='https://maxcdn.bootstrapcdn.com/font-awesome/4.5.0/css/font-awesome.min.css'></script>

  <script>
  var botao = document.getElementById("verificar");
  botao.addEventListener("click", verificarTodosLinks, false);

  function verificarTodosLinks() {
    var links = document.getElementsByTagName("a");
    Array.prototype.forEach.call(links, function(link) {    
      verificarLink(link);
    });
    console.log(links);
  }

  function verificarLink(el) {
    var url = el.href;
    // https://gist.github.com/rafaelstz/5a4aa3584061131d714b709ba773c5f8
    var ajax = new XMLHttpRequest();
    // Cria um evento para receber o retorno.
    ajax.onreadystatechange = function() {
      // Caso o state seja 4 e o http.status for 200, é porque a requisiçõe deu certo.
      console.log("link, status: ", url, ajax.status);
      if (ajax.readyState == 4) {
        if (ajax.status == 200) {
          var data = ajax.responseText;
          el.className += " link-ok";
          // Retorno do Ajax
          console.log(data);
        } else {
          el.className += " link-quebrado";
        }
      }
    };
    // Seta tipo de requisição e URL com os parâmetros
    ajax.open("GET", "proxy.php?csurl=" + url, true);
    // Envia a requisição
    ajax.send();
  }
  </script>




</body>

</html>
    
28.06.2018 / 01:53