wordpos/src/util.js

67 lines
1.3 KiB
JavaScript
Raw Normal View History

2018-10-15 05:20:56 +00:00
/**
* util.js
*
* Copyright (c) 2012-2019 mooster@42at.com
* https://github.com/moos/wordpos
*
* Released under MIT license
*/
2018-10-13 03:35:11 +00:00
let stopwords = require('../lib/natural/util/stopwords').words;
let stopwordsStr = makeStopwordString(stopwords);
function makeStopwordString(stopwords) {
return ' ' + stopwords.join(' ') + ' ';
}
// setImmediate executes callback AFTER promise handlers.
// Without it, exceptions in callback may be caught by Promise.
function nextTick(fn, args) {
if (fn) {
fn.apply(null, args);
}
}
function normalize(word) {
return word.toLowerCase().replace(/\s+/g, '_');
}
2018-10-15 05:20:56 +00:00
function isStopword(stopwordsStr, word) {
return stopwordsStr.indexOf(' '+word+' ') >= 0;
2018-10-13 03:35:11 +00:00
}
function tokenizer(str) {
return str.split(/\W+/);
}
function uniq(arr) {
return arr.filter((v, i) => arr.indexOf(v) === i);
}
function isString(s) {
return typeof s === 'string';
}
function reject(arr, predicate) {
return arr.filter(item => !predicate(item))
}
function prepText(text) {
if (Array.isArray(text)) return text;
var deduped = uniq(tokenizer(text));
if (!this.options.stopwords) return deduped;
return reject(deduped, isStopword.bind(null,
isString(this.options.stopwords) ? this.options.stopwords : stopwordsStr
));
}
2018-10-15 05:20:56 +00:00
module.exports = {
2018-10-21 03:51:37 +00:00
isString,
2018-10-15 05:20:56 +00:00
stopwords,
2018-10-13 03:35:11 +00:00
nextTick,
normalize,
tokenizer,
prepText,
makeStopwordString
2018-10-21 03:51:37 +00:00
};