How to do a search by ignoring accent in JavaScript?

43

Suppose I have a list of words in JavaScript (if necessary, already sorted by collation rules ):

var palavras = [
    "acentuacao",
    "divagacão",
    "programaçao",
    "taxação"
];

Notice that I have not used cedilla ( ç ) nor tilde ( ã ) consistently. How can I search in this list by "programming" but ignoring the accent so that multiple search modalities return results? Ex.:

buscar(palavras, "programacao");
buscar(palavras, "programação");

I did a non-Python language , and tried to apply the proposed solution (normalize and remove combiners) for JavaScript, but I ran into the problem of lacking support for Unicode normalization . And even though I can normalize them (using a polyfill , for example), I still have to identify and remove the matching characters.

Is there anything already ready for this, so I do not have to "reinvent the wheel"? Otherwise, how?

Update: Many good answers, each with its pros and cons, difficult to choose one. I decided to propose a test taking into account 3 factors:

  • Robustness (i.e. ability to search for words whose accent is represented in more than one different way)

    'arvore',       // Latin Small Letter A,                         r,v,o,r,e
    '\xe1rvore',    // Latin Small Letter A with acute,              r,v,o,r,e
    'a\u0301rvore', // Latin Small Letter A, Combining Acute Accent, r,v,o,r,e
    
  • Flexibility (can you search by ignoring capitalization?) can just marry the accent with another accented letter ?)

                     arvore    árvore
    'arvore'          sim       não
    '\xe1rvore'       sim       sim
    'a\u0301rvore'    sim       sim
    
  • Performance (requires tinkering with the original list? How long does an operation take?)

  • Example in jsFiddle . I will tailor each response to this example as best I can and avail it on that basis. So we can have a canonical response.

        
    asked by anonymous 04.02.2014 / 01:48

    6 answers

    21

    Since these characters have no mathematical relation relation in the unicode table. The way I see it is to do the substitution "manually", at least using regular expression. It gives a job, but it's functional.

    Basically:

    function replaceSpecialChars(str)
    {
        str = str.replace(/[ÀÁÂÃÄÅ]/,"A");
        str = str.replace(/[àáâãäå]/,"a");
        str = str.replace(/[ÈÉÊË]/,"E");
        str = str.replace(/[Ç]/,"C");
        str = str.replace(/[ç]/,"c");
    
        // o resto
    
        return str.replace(/[^a-z0-9]/gi,''); 
    }
    

    In the example above, the case-insensitive option does not work for override in regex. So you have to make one for each version, lowercase and uppercase.

    If you want to see an example working, visit this fiddle: link

        
    04.02.2014 / 02:06
    12

    Something involving dynamic regular expressions can solve the problem in some ways. I already had to do something of the type in my jQuery plugin but it did not involve accentuation.

    I think something like this might help:

    String.prototype.toSearch = function () {
        var chars = ['aáàãäâ', 'eéèëê', 'iíìïî', 'oóòõöô', 'uúùüû'],
            value = this;
        for (var i in chars) value = value.replace(new RegExp('[' + chars[i] + ']', 'g'), '[' + chars[i] + ']');
        return new RegExp(value);
    };
    

    In your case, the search would look like this:

    palavras.filter(function(value) {
        return value.search('programação'.toSearch()) > -1;
    });
    
        
    04.02.2014 / 02:18
    10

    An interesting solution is to create a regular expression with the pattern you want to fetch, to include all possible accents, thus giving match to the desired value.

    Code to create your pattern:

    /**
     * Cria uma RegExp que aceita qualquer palavra como string.
     * Ignora acentuação, maiúsculas e minusculas.
     */
    function criarPadrao(str) {
        // remove os meta caracteres
        str = str.replace(/([|()[{.+*?^$\])/g,"\$1");
    
        // Separa em palavras
        var words = str.split(/\s+/);
    
        // ordena pelo tamanho
        words.sort(function (a, b) {
            return b.length - a.length;
        });
    
        // troca caracteres pelos seus compositores
        var accentReplacer = function(chr) {
            return accented[chr.toUpperCase()] || chr;
        }
        for (var i = 0; i < words.length; i++) {
            words[i] = words[i].replace(/\S/g, accentReplacer);
        }
    
        // junta as alternativas
        var regexp = words.join("|");
        return new RegExp(regexp, 'g');
    }
    
    // Lista de acentuações possíveis
    var accented = {
        'A': '[Aa\xaa\xc0-\xc5\xe0-\xe5\u0100-\u0105\u01cd\u01ce\u0200-\u0203\u0226\u0227\u1d2c\u1d43\u1e00\u1e01\u1e9a\u1ea0-\u1ea3\u2090\u2100\u2101\u213b\u249c\u24b6\u24d0\u3371-\u3374\u3380-\u3384\u3388\u3389\u33a9-\u33af\u33c2\u33ca\u33df\u33ff\uff21\uff41]',
        'B': '[Bb\u1d2e\u1d47\u1e02-\u1e07\u212c\u249d\u24b7\u24d1\u3374\u3385-\u3387\u33c3\u33c8\u33d4\u33dd\uff22\uff42]',
        'C': '[Cc\xc7\xe7\u0106-\u010d\u1d9c\u2100\u2102\u2103\u2105\u2106\u212d\u216d\u217d\u249e\u24b8\u24d2\u3376\u3388\u3389\u339d\u33a0\u33a4\u33c4-\u33c7\uff23\uff43]',
        'D': '[Dd\u010e\u010f\u01c4-\u01c6\u01f1-\u01f3\u1d30\u1d48\u1e0a-\u1e13\u2145\u2146\u216e\u217e\u249f\u24b9\u24d3\u32cf\u3372\u3377-\u3379\u3397\u33ad-\u33af\u33c5\u33c8\uff24\uff44]',
        'E': '[Ee\xc8-\xcb\xe8-\xeb\u0112-\u011b\u0204-\u0207\u0228\u0229\u1d31\u1d49\u1e18-\u1e1b\u1eb8-\u1ebd\u2091\u2121\u212f\u2130\u2147\u24a0\u24ba\u24d4\u3250\u32cd\u32ce\uff25\uff45]',
        'F': '[Ff\u1da0\u1e1e\u1e1f\u2109\u2131\u213b\u24a1\u24bb\u24d5\u338a-\u338c\u3399\ufb00-\ufb04\uff26\uff46]',
        'G': '[Gg\u011c-\u0123\u01e6\u01e7\u01f4\u01f5\u1d33\u1d4d\u1e20\u1e21\u210a\u24a2\u24bc\u24d6\u32cc\u32cd\u3387\u338d-\u338f\u3393\u33ac\u33c6\u33c9\u33d2\u33ff\uff27\uff47]',
        'H': '[Hh\u0124\u0125\u021e\u021f\u02b0\u1d34\u1e22-\u1e2b\u1e96\u210b-\u210e\u24a3\u24bd\u24d7\u32cc\u3371\u3390-\u3394\u33ca\u33cb\u33d7\uff28\uff48]',
        'I': '[Ii\xcc-\xcf\xec-\xef\u0128-\u0130\u0132\u0133\u01cf\u01d0\u0208-\u020b\u1d35\u1d62\u1e2c\u1e2d\u1ec8-\u1ecb\u2071\u2110\u2111\u2139\u2148\u2160-\u2163\u2165-\u2168\u216a\u216b\u2170-\u2173\u2175-\u2178\u217a\u217b\u24a4\u24be\u24d8\u337a\u33cc\u33d5\ufb01\ufb03\uff29\uff49]',
        'J': '[Jj\u0132-\u0135\u01c7-\u01cc\u01f0\u02b2\u1d36\u2149\u24a5\u24bf\u24d9\u2c7c\uff2a\uff4a]',
        'K': '[Kk\u0136\u0137\u01e8\u01e9\u1d37\u1d4f\u1e30-\u1e35\u212a\u24a6\u24c0\u24da\u3384\u3385\u3389\u338f\u3391\u3398\u339e\u33a2\u33a6\u33aa\u33b8\u33be\u33c0\u33c6\u33cd-\u33cf\uff2b\uff4b]',
        'L': '[Ll\u0139-\u0140\u01c7-\u01c9\u02e1\u1d38\u1e36\u1e37\u1e3a-\u1e3d\u2112\u2113\u2121\u216c\u217c\u24a7\u24c1\u24db\u32cf\u3388\u3389\u33d0-\u33d3\u33d5\u33d6\u33ff\ufb02\ufb04\uff2c\uff4c]',
        'M': '[Mm\u1d39\u1d50\u1e3e-\u1e43\u2120\u2122\u2133\u216f\u217f\u24a8\u24c2\u24dc\u3377-\u3379\u3383\u3386\u338e\u3392\u3396\u3399-\u33a8\u33ab\u33b3\u33b7\u33b9\u33bd\u33bf\u33c1\u33c2\u33ce\u33d0\u33d4-\u33d6\u33d8\u33d9\u33de\u33df\uff2d\uff4d]',
        'N': '[Nn\xd1\xf1\u0143-\u0149\u01ca-\u01cc\u01f8\u01f9\u1d3a\u1e44-\u1e4b\u207f\u2115\u2116\u24a9\u24c3\u24dd\u3381\u338b\u339a\u33b1\u33b5\u33bb\u33cc\u33d1\uff2e\uff4e]',
        'O': '[Oo\xba\xd2-\xd6\xf2-\xf6\u014c-\u0151\u01a0\u01a1\u01d1\u01d2\u01ea\u01eb\u020c-\u020f\u022e\u022f\u1d3c\u1d52\u1ecc-\u1ecf\u2092\u2105\u2116\u2134\u24aa\u24c4\u24de\u3375\u33c7\u33d2\u33d6\uff2f\uff4f]',
        'P': '[Pp\u1d3e\u1d56\u1e54-\u1e57\u2119\u24ab\u24c5\u24df\u3250\u3371\u3376\u3380\u338a\u33a9-\u33ac\u33b0\u33b4\u33ba\u33cb\u33d7-\u33da\uff30\uff50]',
        'Q': '[Qq\u211a\u24ac\u24c6\u24e0\u33c3\uff31\uff51]',
        'R': '[Rr\u0154-\u0159\u0210-\u0213\u02b3\u1d3f\u1d63\u1e58-\u1e5b\u1e5e\u1e5f\u20a8\u211b-\u211d\u24ad\u24c7\u24e1\u32cd\u3374\u33ad-\u33af\u33da\u33db\uff32\uff52]',
        'S': '[Ss\u015a-\u0161\u017f\u0218\u0219\u02e2\u1e60-\u1e63\u20a8\u2101\u2120\u24ae\u24c8\u24e2\u33a7\u33a8\u33ae-\u33b3\u33db\u33dc\ufb06\uff33\uff53]',
        'T': '[Tt\u0162-\u0165\u021a\u021b\u1d40\u1d57\u1e6a-\u1e71\u1e97\u2121\u2122\u24af\u24c9\u24e3\u3250\u32cf\u3394\u33cf\ufb05\ufb06\uff34\uff54]',
        'U': '[Uu\xd9-\xdc\xf9-\xfc\u0168-\u0173\u01af\u01b0\u01d3\u01d4\u0214-\u0217\u1d41\u1d58\u1d64\u1e72-\u1e77\u1ee4-\u1ee7\u2106\u24b0\u24ca\u24e4\u3373\u337a\uff35\uff55]',
        'V': '[Vv\u1d5b\u1d65\u1e7c-\u1e7f\u2163-\u2167\u2173-\u2177\u24b1\u24cb\u24e5\u2c7d\u32ce\u3375\u33b4-\u33b9\u33dc\u33de\uff36\uff56]',
        'W': '[Ww\u0174\u0175\u02b7\u1d42\u1e80-\u1e89\u1e98\u24b2\u24cc\u24e6\u33ba-\u33bf\u33dd\uff37\uff57]',
        'X': '[Xx\u02e3\u1e8a-\u1e8d\u2093\u213b\u2168-\u216b\u2178-\u217b\u24b3\u24cd\u24e7\u33d3\uff38\uff58]',
        'Y': '[Yy\xdd\xfd\xff\u0176-\u0178\u0232\u0233\u02b8\u1e8e\u1e8f\u1e99\u1ef2-\u1ef9\u24b4\u24ce\u24e8\u33c9\uff39\uff59]',
        'Z': '[Zz\u0179-\u017e\u01f1-\u01f3\u1dbb\u1e90-\u1e95\u2124\u2128\u24b5\u24cf\u24e9\u3390-\u3394\uff3a\uff5a]'
    };
    

    To use it's very simple, here's an example:

    'coração'.match(criarPadrao('coracao')); // retorna: coração
    'coraçao'.match(criarPadrao('coracao')); // retorna: coraçao
    'coracao'.match(criarPadrao('coracao')); // retorna: coracao
    

    Or in your case:

    buscar(palavras, criarPadrao('programacao');
    

    Where to look is something like:

    function buscar(palavras, palavraChave) {
        return palavras.find(function (palavra) { 
            return !!palavra.match(criarPadrao(palavraChave));
        });
    }
    

    Do not forget that "WordChave" should not contain an accent, so it searches for all possible accents of the desired letter.

        
    04.02.2014 / 03:44
    9

    Removing accent is part of larger problem (or should be).

    In case of pure search, I strongly suggest using some mechanism for "Full Text Search". These mechanisms have sophisticated systems for analyzing the text and indexing them. A popular open source search system is Apache Solr.

    A very simple example of setting up a Solr fieldType.

    <fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100">
     <analyzer>
       <!-- cria token utlizando espaços em branco -->
       <tokenizer class="solr.WhitespaceTokenizerFactory"/>
    
        <!-- palavras transformadas para caixa baixa -->
        <filter class="solr.LowerCaseFilterFactory"/>
    
        <!-- remove palavras sem valor semantico. ex: preposicoes -->
        <filter class="solr.StopFilterFactory"/>
    
        <!-- extrai o radical das palavras -->
        <filter class="solr.PorterStemFilterFactory"/>
    
        <!-- esse filtro remove os acentos --!>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
    
     </analyzer>
    </fieldType>
    

    (You can still try to use Lucene which is the indexing engine under Solr directly in a database, but then the solution is even more complicated)

    The list of features is quite extensive. For example, Solr can use ternary search trees to search for spell checking, sort items by relevance, adjust relevancy function based on the proximity of terms in text or arbitrary fields, and so on. Anyway, it's a more appropriate, complete, and efficient search engine.

    If you want to implement a suggestion system on the client side you can and should adapt the text transformation system that @dcastro presented. You can even use the chain of responsibility to accomplish the transformations you want. Ex:

    // ... <= inicializacao do mapa diacritico
    function letterFilter (str, chain) {
        var letras = str.split("");
        var newStr = "";
        for(var i=0; i< letras.length; i++) {
            var x = letras[i];
            for(var i=0; i < chain.length; i++){
                   x = chain[i](x);
            }        
            newStr += x;
        }
        return newStr;
    }
    
    function removeDiacritic(letter){
        letter in mapaDiacriticos ? mapaDiacriticos[letter] : letter;
    }
    
    function lowerCase(letter){
        reutnr letter.toLowerCase();
    }
    
    // simula o sistema de busca de palavras do TextMate 
    function starCase(letter){
        return letter + "*";
    }
    
    var fs = [removeDiacritic, lowerCase,  starCase];
    
        
    05.02.2014 / 01:11
    6

    The accented letters are called diacritics . Here is a complete algorithm that searches and removes all the diacritics (of several languages) of a string, maintaining capital letters:

    var mapaDefault = [
        {'base':'A', 'letras':'\u0041\u24B6\uFF21\u00C0\u00C1\u00C2\u1EA6\u1EA4\u1EAA\u1EA8\u00C3\u0100\u0102\u1EB0\u1EAE\u1EB4\u1EB2\u0226\u01E0\u00C4\u01DE\u1EA2\u00C5\u01FA\u01CD\u0200\u0202\u1EA0\u1EAC\u1EB6\u1E00\u0104\u023A\u2C6F'},
        {'base':'AA','letras':'\uA732'},
        {'base':'AE','letras':'\u00C6\u01FC\u01E2'},
        {'base':'AO','letras':'\uA734'},
        {'base':'AU','letras':'\uA736'},
        {'base':'AV','letras':'\uA738\uA73A'},
        {'base':'AY','letras':'\uA73C'},
        {'base':'B', 'letras':'\u0042\u24B7\uFF22\u1E02\u1E04\u1E06\u0243\u0182\u0181'},
        {'base':'C', 'letras':'\u0043\u24B8\uFF23\u0106\u0108\u010A\u010C\u00C7\u1E08\u0187\u023B\uA73E'},
        {'base':'D', 'letras':'\u0044\u24B9\uFF24\u1E0A\u010E\u1E0C\u1E10\u1E12\u1E0E\u0110\u018B\u018A\u0189\uA779'},
        {'base':'DZ','letras':'\u01F1\u01C4'},
        {'base':'Dz','letras':'\u01F2\u01C5'},
        {'base':'E', 'letras':'\u0045\u24BA\uFF25\u00C8\u00C9\u00CA\u1EC0\u1EBE\u1EC4\u1EC2\u1EBC\u0112\u1E14\u1E16\u0114\u0116\u00CB\u1EBA\u011A\u0204\u0206\u1EB8\u1EC6\u0228\u1E1C\u0118\u1E18\u1E1A\u0190\u018E'},
        {'base':'F', 'letras':'\u0046\u24BB\uFF26\u1E1E\u0191\uA77B'},
        {'base':'G', 'letras':'\u0047\u24BC\uFF27\u01F4\u011C\u1E20\u011E\u0120\u01E6\u0122\u01E4\u0193\uA7A0\uA77D\uA77E'},
        {'base':'H', 'letras':'\u0048\u24BD\uFF28\u0124\u1E22\u1E26\u021E\u1E24\u1E28\u1E2A\u0126\u2C67\u2C75\uA78D'},
        {'base':'I', 'letras':'\u0049\u24BE\uFF29\u00CC\u00CD\u00CE\u0128\u012A\u012C\u0130\u00CF\u1E2E\u1EC8\u01CF\u0208\u020A\u1ECA\u012E\u1E2C\u0197'},
        {'base':'J', 'letras':'\u004A\u24BF\uFF2A\u0134\u0248'},
        {'base':'K', 'letras':'\u004B\u24C0\uFF2B\u1E30\u01E8\u1E32\u0136\u1E34\u0198\u2C69\uA740\uA742\uA744\uA7A2'},
        {'base':'L', 'letras':'\u004C\u24C1\uFF2C\u013F\u0139\u013D\u1E36\u1E38\u013B\u1E3C\u1E3A\u0141\u023D\u2C62\u2C60\uA748\uA746\uA780'},
        {'base':'LJ','letras':'\u01C7'},
        {'base':'Lj','letras':'\u01C8'},
        {'base':'M', 'letras':'\u004D\u24C2\uFF2D\u1E3E\u1E40\u1E42\u2C6E\u019C'},
        {'base':'N', 'letras':'\u004E\u24C3\uFF2E\u01F8\u0143\u00D1\u1E44\u0147\u1E46\u0145\u1E4A\u1E48\u0220\u019D\uA790\uA7A4'},
        {'base':'NJ','letras':'\u01CA'},
        {'base':'Nj','letras':'\u01CB'},
        {'base':'O', 'letras':'\u004F\u24C4\uFF2F\u00D2\u00D3\u00D4\u1ED2\u1ED0\u1ED6\u1ED4\u00D5\u1E4C\u022C\u1E4E\u014C\u1E50\u1E52\u014E\u022E\u0230\u00D6\u022A\u1ECE\u0150\u01D1\u020C\u020E\u01A0\u1EDC\u1EDA\u1EE0\u1EDE\u1EE2\u1ECC\u1ED8\u01EA\u01EC\u00D8\u01FE\u0186\u019F\uA74A\uA74C'},
        {'base':'OI','letras':'\u01A2'},
        {'base':'OO','letras':'\uA74E'},
        {'base':'OU','letras':'\u0222'},
        {'base':'P', 'letras':'\u0050\u24C5\uFF30\u1E54\u1E56\u01A4\u2C63\uA750\uA752\uA754'},
        {'base':'Q', 'letras':'\u0051\u24C6\uFF31\uA756\uA758\u024A'},
        {'base':'R', 'letras':'\u0052\u24C7\uFF32\u0154\u1E58\u0158\u0210\u0212\u1E5A\u1E5C\u0156\u1E5E\u024C\u2C64\uA75A\uA7A6\uA782'},
        {'base':'S', 'letras':'\u0053\u24C8\uFF33\u1E9E\u015A\u1E64\u015C\u1E60\u0160\u1E66\u1E62\u1E68\u0218\u015E\u2C7E\uA7A8\uA784'},
        {'base':'T', 'letras':'\u0054\u24C9\uFF34\u1E6A\u0164\u1E6C\u021A\u0162\u1E70\u1E6E\u0166\u01AC\u01AE\u023E\uA786'},
        {'base':'TZ','letras':'\uA728'},
        {'base':'U', 'letras':'\u0055\u24CA\uFF35\u00D9\u00DA\u00DB\u0168\u1E78\u016A\u1E7A\u016C\u00DC\u01DB\u01D7\u01D5\u01D9\u1EE6\u016E\u0170\u01D3\u0214\u0216\u01AF\u1EEA\u1EE8\u1EEE\u1EEC\u1EF0\u1EE4\u1E72\u0172\u1E76\u1E74\u0244'},
        {'base':'V', 'letras':'\u0056\u24CB\uFF36\u1E7C\u1E7E\u01B2\uA75E\u0245'},
        {'base':'VY','letras':'\uA760'},
        {'base':'W', 'letras':'\u0057\u24CC\uFF37\u1E80\u1E82\u0174\u1E86\u1E84\u1E88\u2C72'},
        {'base':'X', 'letras':'\u0058\u24CD\uFF38\u1E8A\u1E8C'},
        {'base':'Y', 'letras':'\u0059\u24CE\uFF39\u1EF2\u00DD\u0176\u1EF8\u0232\u1E8E\u0178\u1EF6\u1EF4\u01B3\u024E\u1EFE'},
        {'base':'Z', 'letras':'\u005A\u24CF\uFF3A\u0179\u1E90\u017B\u017D\u1E92\u1E94\u01B5\u0224\u2C7F\u2C6B\uA762'},
        {'base':'a', 'letras':'\u0061\u24D0\uFF41\u1E9A\u00E0\u00E1\u00E2\u1EA7\u1EA5\u1EAB\u1EA9\u00E3\u0101\u0103\u1EB1\u1EAF\u1EB5\u1EB3\u0227\u01E1\u00E4\u01DF\u1EA3\u00E5\u01FB\u01CE\u0201\u0203\u1EA1\u1EAD\u1EB7\u1E01\u0105\u2C65\u0250'},
        {'base':'aa','letras':'\uA733'},
        {'base':'ae','letras':'\u00E6\u01FD\u01E3'},
        {'base':'ao','letras':'\uA735'},
        {'base':'au','letras':'\uA737'},
        {'base':'av','letras':'\uA739\uA73B'},
        {'base':'ay','letras':'\uA73D'},
        {'base':'b', 'letras':'\u0062\u24D1\uFF42\u1E03\u1E05\u1E07\u0180\u0183\u0253'},
        {'base':'c', 'letras':'\u0063\u24D2\uFF43\u0107\u0109\u010B\u010D\u00E7\u1E09\u0188\u023C\uA73F\u2184'},
        {'base':'d', 'letras':'\u0064\u24D3\uFF44\u1E0B\u010F\u1E0D\u1E11\u1E13\u1E0F\u0111\u018C\u0256\u0257\uA77A'},
        {'base':'dz','letras':'\u01F3\u01C6'},
        {'base':'e', 'letras':'\u0065\u24D4\uFF45\u00E8\u00E9\u00EA\u1EC1\u1EBF\u1EC5\u1EC3\u1EBD\u0113\u1E15\u1E17\u0115\u0117\u00EB\u1EBB\u011B\u0205\u0207\u1EB9\u1EC7\u0229\u1E1D\u0119\u1E19\u1E1B\u0247\u025B\u01DD'},
        {'base':'f', 'letras':'\u0066\u24D5\uFF46\u1E1F\u0192\uA77C'},
        {'base':'g', 'letras':'\u0067\u24D6\uFF47\u01F5\u011D\u1E21\u011F\u0121\u01E7\u0123\u01E5\u0260\uA7A1\u1D79\uA77F'},
        {'base':'h', 'letras':'\u0068\u24D7\uFF48\u0125\u1E23\u1E27\u021F\u1E25\u1E29\u1E2B\u1E96\u0127\u2C68\u2C76\u0265'},
        {'base':'hv','letras':'\u0195'},
        {'base':'i', 'letras':'\u0069\u24D8\uFF49\u00EC\u00ED\u00EE\u0129\u012B\u012D\u00EF\u1E2F\u1EC9\u01D0\u0209\u020B\u1ECB\u012F\u1E2D\u0268\u0131'},
        {'base':'j', 'letras':'\u006A\u24D9\uFF4A\u0135\u01F0\u0249'},
        {'base':'k', 'letras':'\u006B\u24DA\uFF4B\u1E31\u01E9\u1E33\u0137\u1E35\u0199\u2C6A\uA741\uA743\uA745\uA7A3'},
        {'base':'l', 'letras':'\u006C\u24DB\uFF4C\u0140\u013A\u013E\u1E37\u1E39\u013C\u1E3D\u1E3B\u017F\u0142\u019A\u026B\u2C61\uA749\uA781\uA747'},
        {'base':'lj','letras':'\u01C9'},
        {'base':'m', 'letras':'\u006D\u24DC\uFF4D\u1E3F\u1E41\u1E43\u0271\u026F'},
        {'base':'n', 'letras':'\u006E\u24DD\uFF4E\u01F9\u0144\u00F1\u1E45\u0148\u1E47\u0146\u1E4B\u1E49\u019E\u0272\u0149\uA791\uA7A5'},
        {'base':'nj','letras':'\u01CC'},
        {'base':'o', 'letras':'\u006F\u24DE\uFF4F\u00F2\u00F3\u00F4\u1ED3\u1ED1\u1ED7\u1ED5\u00F5\u1E4D\u022D\u1E4F\u014D\u1E51\u1E53\u014F\u022F\u0231\u00F6\u022B\u1ECF\u0151\u01D2\u020D\u020F\u01A1\u1EDD\u1EDB\u1EE1\u1EDF\u1EE3\u1ECD\u1ED9\u01EB\u01ED\u00F8\u01FF\u0254\uA74B\uA74D\u0275'},
        {'base':'oi','letras':'\u01A3'},
        {'base':'ou','letras':'\u0223'},
        {'base':'oo','letras':'\uA74F'},
        {'base':'p','letras':'\u0070\u24DF\uFF50\u1E55\u1E57\u01A5\u1D7D\uA751\uA753\uA755'},
        {'base':'q','letras':'\u0071\u24E0\uFF51\u024B\uA757\uA759'},
        {'base':'r','letras':'\u0072\u24E1\uFF52\u0155\u1E59\u0159\u0211\u0213\u1E5B\u1E5D\u0157\u1E5F\u024D\u027D\uA75B\uA7A7\uA783'},
        {'base':'s','letras':'\u0073\u24E2\uFF53\u00DF\u015B\u1E65\u015D\u1E61\u0161\u1E67\u1E63\u1E69\u0219\u015F\u023F\uA7A9\uA785\u1E9B'},
        {'base':'t','letras':'\u0074\u24E3\uFF54\u1E6B\u1E97\u0165\u1E6D\u021B\u0163\u1E71\u1E6F\u0167\u01AD\u0288\u2C66\uA787'},
        {'base':'tz','letras':'\uA729'},
        {'base':'u','letras': '\u0075\u24E4\uFF55\u00F9\u00FA\u00FB\u0169\u1E79\u016B\u1E7B\u016D\u00FC\u01DC\u01D8\u01D6\u01DA\u1EE7\u016F\u0171\u01D4\u0215\u0217\u01B0\u1EEB\u1EE9\u1EEF\u1EED\u1EF1\u1EE5\u1E73\u0173\u1E77\u1E75\u0289'},
        {'base':'v','letras':'\u0076\u24E5\uFF56\u1E7D\u1E7F\u028B\uA75F\u028C'},
        {'base':'vy','letras':'\uA761'},
        {'base':'w','letras':'\u0077\u24E6\uFF57\u1E81\u1E83\u0175\u1E87\u1E85\u1E98\u1E89\u2C73'},
        {'base':'x','letras':'\u0078\u24E7\uFF58\u1E8B\u1E8D'},
        {'base':'y','letras':'\u0079\u24E8\uFF59\u1EF3\u00FD\u0177\u1EF9\u0233\u1E8F\u00FF\u1EF7\u1E99\u1EF5\u01B4\u024F\u1EFF'},
        {'base':'z','letras':'\u007A\u24E9\uFF5A\u017A\u1E91\u017C\u017E\u1E93\u1E95\u01B6\u0225\u0240\u2C6C\uA763'}
    ];
    
    var mapaDiacriticos = {};
    for (var i=0; i < mapaDefault.length; i++){
        var letras = mapaDefault[i].letras.split("");
        for (var j=0; j < letras.length ; j++){
            mapaDiacriticos[letras[j]] = mapaDefault[i].base;
        }
    }
    
    function removeDiacriticos (str) {
        var letras = str.split("");
        var newStr = "";
        for(var i=0; i< letras.length; i++) {
            newStr += letras[i] in mapaDiacriticos ? mapaDiacriticos[letras[i]] : letras[i];
        }
        return newStr;
    }
    

    Source: Javascript: remove accents in strings

        
    04.02.2014 / 09:48
    4

    You can create a string "Slug" and then do a search, like the code below

    String.prototype.toSlug = function(){
        str = this.replace(/^\s+|\s+$/g, ''); // trim
        str = str.toLowerCase();
    
        // remove accents, swap ñ for n, etc
        var from = "ãàáäâèéëêìíïîõòóöôùúüûñç·/_,:;";
        var to   = "aaaaaeeeeiiiiooooouuuunc------";
        for (var i=0, l=from.length ; i<l ; i++) {
            str = str.replace(new RegExp(from.charAt(i), 'g'), to.charAt(i));
        }
    
        str = str.replace(/[^a-z0-9 -]/g, '').replace(/\s+/g, '-').replace(/-+/g, '-');
    
        return str;
    };
    
    
    String.prototype.toSearch = function(query) {
        var str = this.toSlug();
    
        return this.toSlug().search(new RegExp(query.toLowerCase(), 'i'));
    };
    
    
    var str = 'São Paulo';
    $('#search').click(function() {
        alert(str.toSearch('Paulo'));
    });
    

    Following is an example link

        
    04.02.2014 / 02:38