refactor rand() for both browser and node

This commit is contained in:
Moos 2018-11-02 18:48:52 -07:00
parent bf8957633f
commit 57c09957e8
11 changed files with 304 additions and 199 deletions

View File

@ -8,10 +8,11 @@
let fs = require('fs');
let path = require('path');
let outPath = './dict';
let testPath = './test/dict';
let outPath = './dict'; // browser-use files
let testPath = './test/dict'; // mocha files in CJS format
let posExt = ['adj', 'adv', 'noun', 'verb'];
let dictRoot = './node_modules/wordnet-db/dict/';
let dictRoot = './node_modules/wordnet-db/dict/'; // source files
const fileTypes = {
data: true,
index: true

View File

@ -15,15 +15,28 @@ class BaseFile {
*/
file = {};
constructor(type, dictPath, posName) {
/**
* constructor
* @param {type} type - 'index' or 'data'
* @param {string} dictPath - path to dict db
* @param {string} posName - one of 'noun', 'verb', 'adj', 'adv'
* @param {object} [options] - @see WordPOS options
*/
constructor(type, dictPath, posName, options) {
this.type = type;
this.filePath = `${dictPath}/${type}.${posName}.js`;
this.posName = posName;
this.loadError = null;
this.options = Object.assign({}, options);
}
load() {
if (this.loadError) return Promise.reject(this.loadError);
this.options.debug && console.time('index load ' + this.posName);
let promise = Promise.resolve(require(this.filePath));
this.options.debug && console.timeEnd('index load ' + this.posName)
return promise
.then(exports => {

View File

@ -9,6 +9,7 @@
const { stopwords, prepText, makeStopwordString } = require('../util');
const { is, get, getPOS, lookup, seek, lookupPOS } = require('../common');
const { randX, rand } = require('../rand');
const IndexFile = require('./indexFile');
const DataFile = require('./dataFile');
@ -22,7 +23,6 @@ const POS = {
class WordPOS {
options = {};
loaded = Promise.resolve();
constructor(config) {
this.options = Object.assign({}, WordPOS.defaults, config);
@ -31,17 +31,11 @@ class WordPOS {
if (Array.isArray(this.options.stopwords)) {
this.options.stopwords = makeStopwordString(this.options.stopwords);
}
// TODO rand()
}
ready() {
return this.loaded;
}
initFiles() {
const keys = Object.keys(POS);
const loadOne = (Comp, pos) => new Comp(this.options.dictPath, POS[pos]);
const loadOne = (Comp, pos) => new Comp(this.options.dictPath, POS[pos], this.options);
const loader = (Comp) => keys.map(loadOne.bind(null, Comp));
const reducer = (arr) => arr.reduce((coll, item, i) => (coll[keys[i]] = item, coll), {});
@ -118,6 +112,17 @@ class WordPOS {
lookupAdverb = lookup('r');
lookupNoun = lookup('n');
lookupVerb = lookup('v');
/**
* define randX()
* @see makeRandX
*/
rand = rand;
randAdjective = randX('a');
randAdverb = randX('r');
randNoun = randX('n');
randVerb = randX('v');
}
WordPOS.defaults = {
@ -155,7 +160,13 @@ WordPOS.defaults = {
* include data files in preload
* @type {boolean}
*/
includeData: false
includeData: false,
/**
* set to true to enable debug logging
* @type {boolean}
*/
debug: false
};

View File

@ -8,7 +8,9 @@
*/
const { indexLookup } = require('../common');
const { sample } = require('../util');
const BaseFile = require('./baseFile');
const Trie = require('../../lib/natural/trie/trie');
/**
* find a search term in an index file (using fast index)
@ -43,16 +45,78 @@ function find(search, callback) {
}
/**
* IndexFile class
* Select <count> words at random for POS
*
* @param dictPath {string} - WordNet db dict path
* @param posName {string} - name of index: noun, verb, adj, adv
* @constructor
* @param {string} startsWith - string that results should start with
* @param {integer} count - number of results to return
* @param {Function} callback - receives (results, startsWith)
* @return {Promise} receives results
* @this IndexFile
*/
function rand(startsWith, count, callback) {
const done = (res) => {
callback(res, startsWith || '');
return Promise.resolve(res);
};
const doSample = (values) => {
let res = sample(values, count);
// console.timeEnd('getkeys')
return done(res);
};
const time = (label) => {
this.options.debug && console.time(label + ' ' + this.posName);
};
const timeEnd = (label) => {
this.options.debug && console.timeEnd(label + ' ' + this.posName);
};
if (!startsWith) {
// console.time('getkeys')
return doSample(this.getKeys());
}
// calc trie if haven't done so yet
if (!this.trie) {
time('Trie');
this.trie = new Trie();
this.trie.addStrings(this.getKeys());
timeEnd('Trie');
}
let keys = [];
time('trie-withprefix');
keys = this.trie.keysWithPrefix(startsWith);
timeEnd('trie-withprefix');
// TODO cache results?
return keys.length ? doSample(keys) : done([]);
}
/**
* IndexFile class
*/
class IndexFile extends BaseFile {
constructor(dictPath, posName) {
super('index', dictPath, posName);
keys = null;
/**
* @param dictPath {string} - WordNet db dict path
* @param posName {string} - name of index: noun, verb, adj, adv
* @param {object} [options] - @see WordPOS options
* @constructor
*/
constructor(dictPath, posName, options) {
super('index', dictPath, posName, options);
this.options = Object.assign({}, options);
this.posName = posName;
}
getKeys() {
return this.keys || (this.keys = Object.keys(this.file));
}
lookup() {
@ -62,6 +126,10 @@ class IndexFile extends BaseFile {
find() {
return this.ready(find, arguments);
}
rand() {
return this.ready(rand, arguments);
}
}
module.exports = IndexFile;

View File

@ -9,7 +9,7 @@
* Released under MIT license
*/
var { normalize, nextTick, isString, uniq, diff, flat } = require('./util');
var { normalize, nextTick, isString, uniq, sample, diff, flat } = require('./util');
function error(err, callback) {
if (isString(err)) err = new RangeError(err);
@ -341,35 +341,6 @@ function seek(offset, pos, callback){
return data.lookup(offset, callback);
}
/**
* factory function for randX()
*
* @param pos {string} - a,r,n,v
* @returns {Function} - rand function bound to an index file
* @this WordPOS
*/
function makeRandX(pos){
return function(opts, callback, _noprofile) {
// disable profiling when isX() used internally
var profile = this.options.profile && !_noprofile,
start = profile && new Date(),
args = [],
index = this.getFilesFor(pos).index,
startsWith = opts && opts.startsWith || '',
count = opts && opts.count || 1;
if (typeof opts === 'function') {
callback = opts;
}
return index.rand(startsWith, count, function (record) {
args.push(record, startsWith);
profile && args.push(new Date() - start);
callback && callback.apply(null, args);
});
};
}
const LEX_NAMES = [
'adj.all',
'adj.pert',
@ -424,7 +395,6 @@ module.exports= {
get,
seek,
getPOS,
makeRandX,
lineDataToJSON,
LEX_NAMES,

View File

@ -60,7 +60,7 @@ var WordPOS = function(options) {
this.advData = new DataFile(dictPath, 'adv');
// define randX() functions
require('../rand').init(this);
require('./rand').init(this);
if (_.isArray(this.options.stopwords)) {
this.options.stopwords = makeStopwordString(this.options.stopwords);

View File

@ -1,5 +1,5 @@
/*!
* rand.js
* node/rand.js
*
* define rand() and randX() functions on wordpos
*
@ -10,13 +10,11 @@
*/
var _ = require('underscore')._,
util = require('util'),
Trie = require('../lib/natural/trie/trie'),
indexPath = process.browser ? 'browser' : 'node',
IndexFile = require(`./${indexPath}/indexFile`),
{ randX, rand } = require('../rand'),
Trie = require('../../lib/natural/trie/trie'),
IndexFile = require(`./indexFile`),
KEY_LENGTH = 3;
/**
* rand function (bound to index)
*
@ -26,15 +24,14 @@ var _ = require('underscore')._,
* @returns Promise
* @this IndexFile
*/
function rand(startsWith, num, callback){
function randomizer(startsWith, num, callback){
var self = this,
nextKey = null,
trie = this.fastIndex.trie,
key, keys;
return new Promise(function(resolve, reject) {
//console.log('-- ', startsWith, num, self.fastIndex.indexKeys.length);
// console.log('-- ', startsWith, num, self.fastIndex.indexKeys.length);
if (startsWith) {
key = startsWith.slice(0, KEY_LENGTH);
@ -45,10 +42,12 @@ function rand(startsWith, num, callback){
// calc trie if haven't done so yet
if (!trie) {
// console.time('trie');
trie = new Trie();
trie.addStrings(self.fastIndex.indexKeys);
self.fastIndex.trie = trie;
//console.log(' +++ Trie calc ');
// console.timeEnd('trie')
}
try {
@ -129,80 +128,6 @@ function rand(startsWith, num, callback){
}); // Promise
}
// relative weight of each POS word count (DB 3.1 numbers)
var POS_factor = {
Noun: 26,
Verb: 3,
Adjective: 5,
Adverb: 1,
Total: 37
};
/**
* rand() - for all Index files
* @returns Promise
*/
function randAll(opts, callback) {
if (typeof opts === 'function') {
callback = opts;
opts = {};
} else {
opts = _.clone(opts || {});
}
var
profile = this.options.profile,
start = profile && new Date(),
results = [],
startsWith = opts && opts.startsWith || '',
count = opts && opts.count || 1,
args = [null, startsWith],
parts = 'Noun Verb Adjective Adverb'.split(' '),
self = this;
return new Promise(function(resolve, reject) {
// select at random a POS to look at
var doParts = _.sample(parts, parts.length);
tryPart();
function tryPart() {
var part = doParts.pop(),
rand = 'rand' + part,
factor = POS_factor[part],
weight = factor / POS_factor.Total;
// pick count according to relative weight
opts.count = Math.ceil(count * weight * 1.1); // guard against dupes
self[rand](opts, partCallback);
}
function partCallback(result) {
if (result) {
results = _.uniq(results.concat(result)); // make sure it's unique!
}
if (results.length < count && doParts.length) {
return tryPart();
}
// final random and trim excess
results = _.sample(results, count);
done();
}
function done() {
profile && (args.push(new Date() - start));
args[0] = results;
callback && callback.apply(null, args);
resolve(results);
}
}); // Promise
}
/**
* bind rand() to index
*
@ -210,31 +135,30 @@ function randAll(opts, callback) {
* @returns {function} - bound rand function for index
*/
function randomify(index){
if (!index.fastIndex) throw 'rand requires fastIndex';
return _.bind(rand, index);
if (!index.fastIndex) throw new Error('rand requires fastIndex');
index.rand = _.bind(randomizer, index);
}
module.exports = {
init: function(wordposProto) {
wordposProto.nounIndex.rand = randomify(wordposProto.nounIndex);
wordposProto.verbIndex.rand = randomify(wordposProto.verbIndex);
wordposProto.adjIndex.rand = randomify(wordposProto.adjIndex);
wordposProto.advIndex.rand = randomify(wordposProto.advIndex);
randomify(wordposProto.nounIndex);
randomify(wordposProto.verbIndex);
randomify(wordposProto.adjIndex);
randomify(wordposProto.advIndex);
/**
* define rand()
* define rand() (all POS)
*/
wordposProto.rand = randAll;
wordposProto.rand = rand;
/**
* define randX()
*/
wordposProto.randAdjective = makeRandX('a');
wordposProto.randAdverb = makeRandX('r');
wordposProto.randNoun = makeRandX('n');
wordposProto.randVerb = makeRandX('v');
wordposProto.randAdjective = randX('a');
wordposProto.randAdverb = randX('r');
wordposProto.randNoun = randX('n');
wordposProto.randVerb = randX('v');
}
};

125
src/rand.js Normal file
View File

@ -0,0 +1,125 @@
/**
* rand.js
*
* Copyright (c) 2012-2019 mooster@42at.com
* https://github.com/moos/wordpos
*
* Released under MIT license
*/
var { uniq, sample } = require('./util');
/**
* factory function for randX()
*
* @param pos {string} - a,r,n,v
* @returns {Function} - rand function bound to an index file
* @this WordPOS
*/
function randX(pos){
return function(opts, callback, _noprofile) {
// disable profiling when isX() used internally
var profile = this.options.profile && !_noprofile,
start = profile && new Date(),
args = [],
index = this.getFilesFor(pos).index,
startsWith = opts && opts.startsWith || '',
count = opts && opts.count || 1;
if (typeof opts === 'function') {
callback = opts;
}
return index.rand(startsWith, count, function (record) {
args.push(record, startsWith);
profile && args.push(new Date() - start);
callback && callback.apply(null, args);
});
};
}
/**
* rand() - for all Index files
*
* @param [opts] {object} options
* @param opts.startsWith {string} string random words should start with
* @param opts.count {integer} number of random words to return
* @param callback {function} - callback receives (results, startsWith, profile)
* @returns {Promise} receives results
* @this WordPOS
*/
function rand(opts, callback) {
if (typeof opts === 'function') {
callback = opts;
opts = {};
} else {
opts = Object.assign({
startsWith: '',
count: 1
}, opts);
}
var
profile = this.options.profile,
start = profile && new Date(),
results = [],
count = opts.count,
args = [null, opts.startsWith],
parts = 'Noun Verb Adjective Adverb'.split(' '),
self = this;
return new Promise(function(resolve, reject) {
// select at random a POS to look at
var doParts = sample(parts, parts.length);
tryPart();
function tryPart() {
var part = doParts.pop(),
rand = 'rand' + part,
factor = POS_factor[part],
weight = factor / POS_factor.Total;
// pick count according to relative weight
opts.count = Math.ceil(count * weight * 1.1); // guard against dupes
self[rand](opts, partCallback);
}
function partCallback(result) {
if (result) {
results = uniq(results.concat(result)); // make sure it's unique!
}
if (results.length < count && doParts.length) {
return tryPart();
}
// final random and trim excess
results = sample(results, count);
done();
}
function done() {
profile && (args.push(new Date() - start));
args[0] = results;
callback && callback.apply(null, args);
resolve(results);
}
}); // Promise
}
// relative weight of each POS word count (DB 3.1 numbers)
const POS_factor = {
Noun: 26,
Verb: 3,
Adjective: 5,
Adverb: 1,
Total: 37
};
module.exports = {
randX,
rand
};

View File

@ -53,6 +53,24 @@ function flat(arr) {
return [].concat.apply([], arr);
}
// get random sample from array (note: count << array.length)
// https://stackoverflow.com/a/37834217
function sample(array, count) {
var indices = [];
var result = new Array(count);
for (let i = 0; i < count; i++ ) {
let j = Math.floor(Math.random() * (array.length - i) + i);
let val = array[indices[j] === undefined ? j : indices[j]];
if (val === undefined) {
result.length = i;
break;
}
result[i] = val;
indices[j] = indices[i] === undefined ? i : indices[i];
}
return result;
}
function isString(s) {
return typeof s === 'string';
}
@ -81,5 +99,6 @@ module.exports = {
makeStopwordString,
uniq,
diff,
flat
flat,
sample
};

View File

@ -1,7 +1,7 @@
/**
* wordpos_test.js
*
* test file for main wordpos functionality
* test file for main wordpos functionality (both node and browser)
*
* Usage:
* npm install mocha -g
@ -11,14 +11,12 @@
*
* npm test
*
* Copyright (c) 2012-2016 mooster@42at.com
* Copyright (c) 2012-2019 mooster@42at.com
* https://github.com/moos/wordpos
*
* Released under MIT license
*/
//import {describe, it} from 'mocha/lib/mocha.js';
var
chai = require('chai'),
_ = require('underscore'),
@ -29,7 +27,8 @@ var
dictPath = browser ? path.resolve('./test/dict') : undefined,
wordpos = new WordPOS({
profile: false,
dictPath: dictPath
dictPath: dictPath,
// debug: true
});
const assertNoData = (err) => {
@ -58,7 +57,6 @@ var str = "The angry bear chased the frightened little squirrel",
offset = 1285602;
describe('lookup', function() {
it('with callback', function () {
@ -301,73 +299,49 @@ describe('nested callbacks on same index key', function() {
describe('rand()...', function() {
it('should get random word', function(done) {
wordpos.rand(function(result) {
it('should get random word', function() {
return wordpos.rand(function(result) {
assert.equal(result.length, 1);
done();
});
});
it('should get N random words', function(done) {
wordpos.rand({count: 3}, function(result) {
it('should get N random words', function() {
return wordpos.rand({count: 3}, function(result) {
assert.equal(result.length, 3);
done();
});
});
it('should get random word starting with', function(done) {
wordpos.rand({startsWith: 'foo'}, function(result, startsWith) {
it('should get random word starting with', function() {
return wordpos.rand({startsWith: 'foo'}, function(result, startsWith) {
assert.equal(result[0].indexOf('foo'), 0);
assert.equal(startsWith, 'foo');
done();
});
});
it('should get nothing starting with not found', function(done) {
wordpos.rand({startsWith: 'zzzz'}, function(result) {
it('should get nothing starting with not found', function() {
return wordpos.rand({startsWith: 'zzzz'}, function(result) {
assert.equal(result.length, 0);
done();
});
});
});
describe('randX()...', function() {
it('should get random noun', function(done) {
wordpos.randNoun(function(result) {
assert.equal(result.length, 1);
done();
});
});
let assertOneResult = (res) => {
assert.equal(res.length, 1);
};
it('should get random verb', function(done) {
wordpos.randVerb(function(result) {
assert.equal(result.length, 1);
done();
});
});
it('should get random adjective', function(done) {
wordpos.randAdjective(function(result) {
assert.equal(result.length, 1);
done();
});
});
it('should get random adverb', function(done) {
wordpos.randAdverb(function(result) {
assert.equal(result.length, 1);
done();
});
});
it('should get random noun', () => wordpos.randNoun(assertOneResult));
it('should get random verb', () => wordpos.randVerb(assertOneResult));
it('should get random adjective', () => wordpos.randAdjective(assertOneResult));
it('should get random adverb', () => wordpos.randAdverb(assertOneResult));
// not found
it('should NOT get random noun starting with', function(done) {
wordpos.randNoun({startsWith: 'zzzz'},function(result, startsWith) {
assert.equal(result.length, 0);
done();
});
});
it('should NOT get random noun starting with', () =>
wordpos.randNoun({startsWith: 'zzzz'}, (result, startsWith) =>
assert.equal(result.length, 0)
)
);
});