Book query by ISBN and save result in txt via console.log

2

Context:
I have a list of ISBN (International Standard Book Number) with about 100 records, all Brazilian books, and I wanted to get the information on the book in a faster way, but does not have to be in real time.

To search the book via ISBN, access the ISBN Brazil site at search and I inform the ISBN and it returns the data to me, and then I fill in a spreadsheet, to be very basic it.

In order to do this search a little faster, I just need to type Captcha once and the rest I consult the url below, just changing the ISBN:

http://www.isbn.bn.br/website/consulta/cadastro/isbn/9788566250299

Need:
Based on this, I had the idea, inform the captcha once and then I can search any book by changing the ISBN of the URL, but I wanted to do this automatically, and save in a same space-separated TXT field values. p>

Via console.log I get the information I need, very simply, but it works:

    String.prototype.trim = function() {
        return this.replace(/^\s+|\s+$/g, '');
    };


var livro = '"' + document.getElementsByClassName("conteudo")[0].getElementsByTagName('div')[5].childNodes[3].nodeValue.trim() + 
            '" "' + document.getElementsByClassName("conteudo")[0].getElementsByTagName('div')[6].childNodes[3].nodeValue.trim() + 
            '" "' + document.getElementsByClassName("conteudo")[0].getElementsByTagName('div')[7].childNodes[3].nodeValue.trim() + 
            '" "' + document.getElementsByClassName("conteudo")[0].getElementsByTagName('div')[8].childNodes[3].nodeValue.trim() + 
            '" "' + document.getElementsByClassName("conteudo")[0].getElementsByTagName('div')[10].childNodes[3].nodeValue.trim() + 
            '" "' + document.getElementsByClassName("conteudo")[0].getElementsByTagName('div')[12].childNodes[3].nodeValue.trim() + 
            ';' + document.getElementsByClassName("conteudo")[0].getElementsByTagName('div')[12].childNodes[5].nodeValue.trim() + '"';

console.log(livro);

Return:

"978-85-66250-29-9" "Começando com o linux: comando, serviços e administração" "1" "2013" "135" "Adriano Henrique de Almeida (Organizador);Paulo Eduardo Azevedo Silveira (Organizador);Daniel Romero ( Autor);"

Problem:
I have to do book by book, and I'm so sick of doing this: /, I wanted to know if you have some very simple way to automate this, maybe go through an array as an example below, and return the information even if it is in a simple TXT with spaces as returned above.

Thank you

var isbn = ["9788566250299", "9788555191459", "9788555191039"];

Note:
Using the google Google Books APIs library, some books do not return results, such as shown in this question Search details of a book with google-books-api-in-php , so I would like to do this above, from which the result can be obtained by URL:

https://www.googleapis.com/books/v1/volumes?q=isbn:9788566250299

But it does not have a return, already by the ISBN Brazil site, it has.

    
asked by anonymous 13.05.2016 / 20:11

1 answer

1

Here's a suggestion:)

Make a first manual search, to pass the captcha and stay logged in your IP.

Then put this script in the console:

var isbns = ["9788566250299", "9788555191459", "9788555191039"];

function trim(str) {
    return str.replace(/^\s+|\s+$/g, '');
};

function iterator(numbers, done) {
    var ISBNdata = [];
    var calls = numbers.length;
    isbns.forEach(function(isbn, i) {
        fetch(isbn, i, function(data, index) {
            calls--;
            ISBNdata[index] = data;
            if (calls == 0) done(ISBNdata)
        });
    });
}

function fetch(nr, index, cb) {
    $.ajax('/website/consulta/cadastro/isbn/' + nr).done(function(raw) {
        var data = process(raw);
        console.log('Recebido index', index);
        cb(data, index);
    });
}

function process(raw) {
    var body = raw.match(/<body>([\s\S]+)<\/body>/);
    if (!body) return;
    var proxy = document.createElement('div');
    proxy.innerHTML = body[1];
    var elements = proxy.querySelector('.conteudo').getElementsByTagName('div');
    var data = [5, 6, 7, 8, 10, 12].map(function(i) {
        return elements[i].childNodes[3].nodeValue;
    });
    data.push(elements[12].childNodes[5].nodeValue);
    return data.map(trim);
}

iterator(isbns, function(data) {
    console.log('---------------')
    console.log(JSON.stringify(data));
});

and the result is this:

[
    ["978-85-66250-29-9", "Começando com o linux: comando, serviços e administração", "1", "2013", "135", "Adriano Henrique de Almeida (Organizador)", "Paulo Eduardo Azevedo Silveira (Organizador)"],
    ["978-85-5519-145-9", "Componentes reutilizáveis em Java com reflexão e anotações", "1", "2014", "378", "Eduardo Guerra ( Autor)", "Vivian Matsui (Editor)"],
    ["978-85-5519-103-9", "Containers com Docker: do desenvolvimento à produção", "1", "2015", "127", "Daniel Romero ( Autor)", "Vivian Matsui (Editor)"]
]

About the script:

The iterator function is what separates each ISBN into a separate function call, from here everything happens asynchronously. Each number is passed to fetch which makes an ajax request and when rebeber the response asks the process to filter the results. The fetch function calls back the callback and when all the callbacks have been called, then the "callback mother" (the one we passed at the beginning to iterator ) is called with the data poops:)

Note: Use this technique only if the license to use that site allows you to use your content in the way that you intend to do.     

18.05.2016 / 23:58