Error removing accents

2

I'm having a javascript code to remove accents, it works with any accent, except with the crass, and by code it should work with kernel as well. I can not find the bug.

var teste = "Çaptúra de Tela 2016-04-27 às 18.21.24.png à à";


function removerAcentos( s ) {
        var map={"â":"a","Â":"A","à":"a","À":"A","á":"a","Á":"A","ã":"a","Ã":"A","ê":"e","Ê":"E","è":"e","È":"E","é":"e","É":"E","î":"i","Î":"I","ì":"i","Ì":"I","í":"i","Í":"I","õ":"o","Õ":"O","ô":"o","Ô":"O","ò":"o","Ò":"O","ó":"o","Ó":"O","ü":"u","Ü":"U","û":"u","Û":"U","ú":"u","Ú":"U","ù":"u","Ù":"U","ç":"c","Ç":"C"};

        console.log('remove acentos',s.replace(/[\W\[\] ]/g,function(a){return map[a]||a}));

        return s.replace(/[\W\[\] ]/g,function(a){return map[a]||a});
    }

  console.log(removerAcentos(teste));
    
asked by anonymous 20.08.2016 / 21:33

1 answer

4

Although it does not look like these à are not the same character ... take a look at this comparison: link , and if you use the same (seemingly the same) code already works: link

What happens is that UNICODE has two possibilities, or the full character with accent, or two entities ... one the letter and another the accent as add-on .

In ES6 it's already possible to normalize this with .normalize and the code is very simple. Regex is a combination of the possible accents codes:

var teste = "Çaptúra de Tela 2016-04-27 às 18.21.24.png à à";

function removerAcentos(s) {
  return s.normalize('NFD').replace(/[\u0300-\u036f]/g, '');
}

console.log(removerAcentos(teste));

To support older browsers you can also use this library that does the same thing as ES6 back. Here's how: link

<script src="https://rawgit.com/walling/unorm/master/lib/unorm.js"></script>
    
21.08.2016 / 08:04