How to identify a number (in words) in a phrase

4

I need a script to identify a number in a sentence. Ex:

" Two weeks ago" - > number = 2

It can be even by replacement even. So:

If str contains two , replace ("two", 2)

Is there a node module that does this? I have already searched, but only found some that do the opposite (number ->)     

asked by anonymous 25.07.2016 / 01:07

2 answers

5

Thiago, what I got was a function from Python passed to javascript, as you can see in this question . But, it's quite functional:

var Small = {
  'zero': 0,
  'one': 1,
  'two': 2,
  'three': 3,
  'four': 4,
  'five': 5,
  'six': 6,
  'seven': 7,
  'eight': 8,
  'nine': 9,
  'ten': 10,
  'eleven': 11,
  'twelve': 12,
  'thirteen': 13,
  'fourteen': 14,
  'fifteen': 15,
  'sixteen': 16,
  'seventeen': 17,
  'eighteen': 18,
  'nineteen': 19,
  'twenty': 20,
  'thirty': 30,
  'forty': 40,
  'fifty': 50,
  'sixty': 60,
  'seventy': 70,
  'eighty': 80,
  'ninety': 90
};

var Magnitude = {
  'thousand': 1000,
  'million': 1000000,
  'billion': 1000000000,
  'trillion': 1000000000000,
  'quadrillion': 1000000000000000,
  'quintillion': 1000000000000000000,
  'sextillion': 1000000000000000000000,
  'septillion': 1000000000000000000000000,
  'octillion': 1000000000000000000000000000,
  'nonillion': 1000000000000000000000000000000,
  'decillion': 1000000000000000000000000000000000,
};

var a, n, g;

function text2num(s) {
  a = s.toString().split(/[\s-]+/);
  n = 0;
  g = 0;
  a.forEach(feach);
  return g + n;
}
function feach(w) {
  var x = Small[w];
  if (x != null) {
    g = g + x;
  } else if (w == "hundred") {
    g = g * 100;
  } else if (w == "and") {
    return;
  } else {
    x = Magnitude[w];
    if (x != null) {
      n = n + g * x
      g = 0;
    }
  }
}

document.body.innerHTML += text2num('five billion two million one hundred and fourteen thousand and sixty-nine');

The more "complicated" question in this case is replacement, since capturing the sequence of words that may or may not match a number would be a bit more laborious. But you can also use a marker . In this case I put $() .

In the following example, I use the marker in one and in two which clearly would not be necessary since they are small numbers, but in the case of "five billion two million one hundred and fourteen thousand and sixty- nine, "it would be hard to recognize it in other words.

var Small = {
  'zero': 0, 'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5, 'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10, 'eleven': 11, 'twelve': 12, 'thirteen': 13, 'fourteen': 14, 'fifteen': 15, 'sixteen': 16, 'seventeen': 17, 'eighteen': 18, 'nineteen': 19, 'twenty': 20, 'thirty': 30, 'forty': 40, 'fifty': 50, 'sixty': 60, 'seventy': 70, 'eighty': 80, 'ninety': 90
};

var Magnitude = {
  'thousand': 1000, 'million': 1000000, 'billion': 1000000000, 'trillion': 1000000000000, 'quadrillion': 1000000000000000, 'quintillion': 1000000000000000000, 'sextillion': 1000000000000000000000, 'septillion': 1000000000000000000000000, 'octillion': 1000000000000000000000000000, 'nonillion': 1000000000000000000000000000000, 'decillion': 1000000000000000000000000000000000,
};

var a, n, g;

function text2num(s) {
  a = s.toString().split(/[\s-]+/);
  n = 0;
  g = 0;
  a.forEach(feach);
  return g + n;
}

function feach(w) {
  var x = Small[w];
  if (x != null) {
    g = g + x;
  } else if (w == "hundred") {
    g = g * 100;
  } else if (w == "and") {
    return;
  } else {
    x = Magnitude[w];
    if (x != null) {
      n = n + g * x
      g = 0;
    }
  }
}

var regex = /\$\([^$]+\)/g;
var text = document.querySelectorAll('p')[0].innerHTML;

text.match(regex).forEach(function(el, i) {
	el = el.replace(/(\(|\)|\$)/g, "");
  var re = new RegExp('\$\('+el+'\)', 'g');
  text = text.replace(re, text2num(el))
})

document.body.innerHTML += "<br><br>";
document.body.innerHTML += text;
<p>No $(two) objects can occupy the same place at $(one) time
  <p/>

Version without the marker

I think I got a way to recognize the number in the middle of other words, without the need for the previous markers, I ask you to look for possible errors. Take a look, I created the function "replaceToNum ()", which takes any string as an argument and returns the same string, but with the extended numbers converted into numeric digits:

function replaceToNum(text) {
  var Small = { 'zero': 0, 'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5, 'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10, 'eleven': 11, 'twelve': 12, 'thirteen': 13, 'fourteen': 14, 'fifteen': 15, 'sixteen': 16, 'seventeen': 17, 'eighteen': 18, 'nineteen': 19, 'twenty': 20, 'thirty': 30, 'forty': 40, 'fifty': 50, 'sixty': 60, 'seventy': 70, 'eighty': 80, 'ninety': 90 };
  var Magnitude = { 'thousand': 1000, 'million': 1000000, 'billion': 1000000000, 'trillion': 1000000000000, 'quadrillion': 1000000000000000, 'quintillion': 1000000000000000000, 'sextillion': 1000000000000000000000, 'septillion': 1000000000000000000000000, 'octillion': 1000000000000000000000000000, 'nonillion': 1000000000000000000000000000000, 'decillion': 1000000000000000000000000000000000 };
  var a, n, g;
  function text2num(s) {
    a = s.toString().split(/[\s-]+/); n = 0; g = 0;
    a.forEach(feach);
    return g + n;
  }
  function feach(w) {
    var x = Small[w];
    if (x != null) {
      g = g + x;
    } else if (w == "hundred") {
      g = g * 100;
    } else if (w == "and") {
      return;
    } else {
      x = Magnitude[w];
      if (x != null) {
        n = n + g * x
        g = 0;
      }
    }
  }
  var words = "(" + Object.keys(Magnitude).join('|') + "|and|hundred|" + Object.keys(Small).join('|') + ")+( |-)*" + "(" + Object.keys(Magnitude).join('|') + "|and|-|hundred| |" + Object.keys(Small).join('|') + ")*( |[.,^~'´])";
  var regex = new RegExp(words, 'gim');
  text = text.toString() + " ";
  text.match(regex).forEach(function(el, i) {
    text = text.replace(el.replace(/\s$/g, '').replace(/[.,^~'´]/g, ''), text2num(el.replace(/[.,^~'´]/g, '').toLowerCase()));
  })
  return text;
}
var text = "No two objects can occupy the same place one time. Bla bla bla five billion Two Million one hundred and fourteen thousand and sixty-nine. Four. Minha criatividade tá ZeRo";

document.body.innerHTML += "<br><br>";
document.body.innerHTML += replaceToNum(text);
    
25.07.2016 / 04:05
3

Some time ago I worked on this idea in a project where I needed to filter client ages in <input> . Then I ended up changing the approach and creating buttons to receive numbers without having to filter them. But the idea was this:

var numbers = {
    en: ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety', 'hundred'],
    se: ['noll', 'ett', 'två', 'tre', 'fyra', 'fem', 'sex', 'sju', 'åtta', 'nio', 'tio', 'elva', 'tolv', 'tretton', 'fjorton', 'femton', 'sexton', 'sjutton', 'arton', 'nitton', 'tjugo', "trettio", "fyrtio", "femtio", "sextio", "sjuttio", "åttio", "nittio", "hundra"],
    de: ["null", "eins", "zwei", "drei", "vier", "fünf", "sechs", "sieben", "acht", "neun", "zehn", "elf", "zwölf", "dreizehn", "vierzehn", "fünfzehn", "sechzehn", "siebzehn", "achtzehn", "neunzehn", "zwanzig", "dreißig", "vierzig", "fünfzig", "sechzig", "siebzig", "achtzig", "neunzig", "hundert"],
    no: ["null", "en", "to", "tre", "fire", "fem", "seks", "sju", "åtte", "ni", "ti", "elleve", "tolv", "tretten", "fjorten", "femten", "seksten", "sytten", "atten", "nitten", "tyve", "tretti", "førti", "femti", "seksti", "sytti", "åtti", "nitti", "hundre"]
};
var mapper = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 30, 40, 50, 60, 70, 80, 90, 100];

function findNumber(string) {
    var results = [];
    Object.keys(numbers).forEach(function(lang) {
        var nrs = numbers[lang];
        nrs.forEach(function(nr, i) {
            var rexep = new RegExp('[^\w\-]' + nr + '[^\w\-]|^' + nr + '[^\w\-]|[^\w\-]' + nr + '$|^' + nr + '$', 'gi');
            var match = string.match(rexep);
            if (match) {
                var mappedNumber = mapper[i];
                if (mappedNumber > 19) {
                    var customRegexp = new RegExp(match[0].trim() + '\s\w+', 'i');
                    var wholeNumber = string.match(customRegexp);
                    if (wholeNumber) mappedNumber = mappedNumber + results.pop();
                }
                results = results.concat(mappedNumber)
            }
        });
    });
    var more = string.match(/\d+/g) || [];
    return results.concat(more).map(Number);
};

Basically this function compares text from different languages and extracts the numbers. An example would be:

var testes = ['TWO WEEKS AGO', 'two weeks ago', 'I am thirty seven years old!', 'I was the the number five in my class! Now I am number one!'];
console.log(JSON.stringify(testes.map(findNumber))); // [[2],[2],[37],[1,5]]

At the time I did not go any further with this. It may be that it serves you as it is, things I remember wanting to do:

  • Respect the order found in the result
  • allow orders greater than 100 (logic could be with Math.floor(nr / 100) and search for the following N words)
  • filter the language first to avoid cases where words from other languages match.

If you want to use it you can see a live example here: link

    
25.07.2016 / 10:10