first checkin for browser rework

2018-10-12 20:35:11 -07:00 · 2018-10-12 20:35:11 -07:00 · 364b2648f7
parent b972581640
commit 364b2648f7
14 changed files with 1221 additions and 4 deletions
--- a/.babelrc
+++ b/.babelrc
@ -0,0 +1,4 @@
+{
+  "presets": ["env", "stage-2"],
+  "plugins": ["transform-class-properties"]
+}
--- a/.gitignore
+++ b/.gitignore
@ -2,3 +2,5 @@ dict
 node_modules
 .idea
 *.iml
+.cache
+dist
--- a/package.json
+++ b/package.json
@ -1,6 +1,6 @@
 {
  "name": "wordpos",
-  "version": "1.2.0",
+  "version": "2.0.0-alpha",
  "description": "wordpos is a set of part-of-speech utilities for Node.js using the WordNet database.",
  "author": "Moos <mooster@42at.com>",
  "keywords": [
@ -16,14 +16,26 @@
  "engines": {
    "node": ">=4"
  },
-  "files": ["bench","bin","lib","src","test","tools"],
+  "files": [
+    "bench",
+    "bin",
+    "lib",
+    "src",
+    "test",
+    "tools"
+  ],
  "bin": "./bin/wordpos-cli.js",
  "dependencies": {
    "commander": "^2.0.0",
+    "dict": "^1.4.0",
    "underscore": ">=1.3.1",
    "wordnet-db": "^3.1.6"
  },
  "devDependencies": {
+    "babel-core": "^6.26.3",
+    "babel-plugin-transform-class-properties": "^6.24.1",
+    "babel-preset-env": "^1.7.0",
+    "babel-preset-stage-2": "^6.24.1",
    "chai": "^4.0.2",
    "mini-bench": "^1.0.0",
    "mocha": "^5.2.0"
@ -35,7 +47,11 @@
  "main": "./src/wordpos.js",
  "scripts": {
    "postinstall": "node tools/stat.js --no-stats index.adv index.adj index.verb index.noun",
-    "test": "mocha test"
+    "postinstall-web": "node scripts/makeJsonDict.js index data",
+    "test": "mocha test",
+    "start": "npm run start-self",
+    "start-self": "parcel samples/self-hosted/index.html",
+    "start-cdn": "parcel samples/cdn/index.html"
  },
  "license": "MIT"
 }
--- a/samples/self-hosted/index.html
+++ b/samples/self-hosted/index.html
@ -0,0 +1,51 @@
+<!doctype html>
+<html>
+<head>
+  <script src="./main.js"></script>
+  <script type="ignore-me">
+
+    // import IndexFile from "../../src/browser/indexFile.js";
+    console.log(333, IndexFile)
+
+    let posExt = ['adj', 'adv', 'noun', 'verb'];
+    let dictRoot = '../../dict/';
+    let files = {};
+
+    function loadPos(pos) {
+
+      return import(dictRoot + 'index.' + pos + '.js');
+
+      console.time('load-' + pos);
+      let get = (name) => {
+        let path = dictRoot + name + '.' + pos + '.json';
+
+        return fetch(path).then(res => res.json()).then(obj => {
+          // console.log(`got ${path}: `, text);
+          files[pos] = files[pos] || {};
+          files[pos][name] = obj;
+          console.timeEnd('load-' + pos);
+        });
+
+      };
+
+      // get('data');
+      return get('index');
+    }
+
+
+    let pos = 'adv';
+    loadPos(pos).then(result => {
+      console.log('got', pos ,result);
+      window.res = result.default;
+    });
+
+  </script>
+</head>
+
+<body>
+ <h1>Self-hosted WordPOS sample</h1>
+  <script>
+  </script>
+
+</body>
+</html>
--- a/samples/self-hosted/main.js
+++ b/samples/self-hosted/main.js
@ -0,0 +1,19 @@
+import WordPOS from '../../src/browser';
+
+console.log(__dirname, WordPOS.defaults)
+
+let wordpos = window.wordpos = new WordPOS({
+  // preload: true,
+  dictPath: './dict',
+  profile: true,
+  // stopwords: false
+});
+
+wordpos.isAdverb('likely').then(res => console.log('likely is adverb:', res));
+// wordpos.isAdverb('likely', (res, ...profile) => console.log('likely callback', res, profile));
+wordpos.getAdverbs('this is is likely a likely tricky business this is').then(
+  res => console.log('getAdverb', res)
+);
+
+wordpos.lookupAdverb('likely').then(res => console.log('lookup ===', res))
+// wordpos.lookup('likely').then(res, console.log('lookup ===', res))
--- a/scripts/makeJsonDict.js
+++ b/scripts/makeJsonDict.js
@ -0,0 +1,85 @@
+#!/usr/bin/env node
+
+/**
+ * takes original WordNet index & data files and converts to
+ * exported JSON format with lemma as the key.
+ */
+
+let fs = require('fs');
+let path = require('path');
+
+let outPath = './dict';
+let posExt = ['adj', 'adv', 'noun', 'verb'];
+let dictRoot = './node_modules/wordnet-db/dict/';
+const fileTypes = {
+  data: true,
+  index: true
+};
+const [,, ...args] = process.argv;
+
+if (!args.length || args.filter(p => !(p in fileTypes)).length) {
+  console.log('Converts wordnet-db index & data files to JSON format for use in the browser.');
+  console.log('\nUsage:  makeJsonDict.js index|data');
+  process.exit(1);
+}
+
+function uniq(arr) {
+  return arr.filter((v, i) => arr.indexOf(v) === i);
+}
+
+console.time('Done');
+
+// create out directory
+try {
+  fs.statSync(outPath);
+} catch (e) {
+  fs.mkdirSync(outPath);
+}
+
+function processFile(name) {
+
+  // read the file as text
+  function loadFile(pos) {
+    console.time('  load');
+    let inPath = path.resolve(dictRoot, name + '.' + pos);
+    let text = fs.readFileSync(inPath, 'utf8');
+    console.timeEnd('  load');
+    return text;
+  }
+
+  // convert raw text to JSON and write to file
+  function processText(pos, text) {
+    let obj = {};
+    let sp = ' ';
+    console.time('  process');
+    text.split('\n').forEach(line => {
+      if (!line || line[0] === sp) return;
+      let spi = line.indexOf(sp);
+      let key = line.substr(0, spi);
+      line = line.substring(1 + spi, line.lastIndexOf(sp + sp))
+      obj[key] = line;
+    });
+    console.timeEnd('  process');
+    return obj;
+  }
+
+  function writeFile(pos, obj) {
+    console.time('  write');
+    let text = JSON.stringify(obj);
+    text = 'export default ' + text;
+    fs.writeFileSync(path.resolve(outPath, name + '.' + pos + '.js'), text);
+    console.timeEnd('  write');
+  }
+
+  posExt.forEach(pos => {
+    console.log('\n', name, pos, ':');
+    let text = loadFile(pos);
+    let obj = processText(pos, text);
+    writeFile(pos, obj);
+  });
+}
+
+uniq(args).forEach(processFile);
+
+console.log('\nWritten to', path.resolve(outPath));
+console.timeEnd('Done');
--- a/src/browser/baseFile.js
+++ b/src/browser/baseFile.js
@ -0,0 +1,30 @@
+
+
+class BaseFile {
+
+  /**
+   * file contents
+   * @type {Object}
+   */
+  file = {};
+
+  constructor(type, dictPath, posName) {
+    this.filePath = `${dictPath}/${type}.${posName}.js`;
+    this.type = type;
+  }
+
+  load() {
+    return import(this.filePath)
+      .then(exports => this.file = exports.default)
+      .catch(err => {
+        console.error(`Error loading ${this.type} file for ${this.filePath}.`, err);
+        throw err;
+      });
+  }
+
+  ready(fn, args) {
+    return this.load().then(() => fn.apply(this, args));
+  }
+}
+
+export default BaseFile;
--- a/src/browser/dataFile.js
+++ b/src/browser/dataFile.js
@ -0,0 +1,92 @@
+/*!
+ * dataFile.js
+ *
+ * Copyright (c) 2012-2019 mooster@42at.com
+ * https://github.com/moos/wordpos
+ *
+ * Portions: Copyright (c) 2011, Chris Umbel
+ *
+ * Released under MIT license
+ */
+
+import { lineDataToJSON, LEX_NAMES } from '../common';
+import BaseFile from './BaseFile';
+
+/**
+ * get parsed line from data file
+ *
+ * @param  {string} offset The offset key
+ * @return {object} Data record object
+ * @this DataFile
+ */
+function seek(offset) {
+  let str = this.file[offset];
+  if (!str) return {};
+  // offset was extracted for the key - add it back to line data
+  return lineDataToJSON(offset + ' ' + str);
+}
+
+/**
+ * lookup offsets in data file
+ *
+ * @param offsets {array} - array of offsets to lookup (obtained from index.find())
+ * @param callback{function} (optional) - callback function
+ * @returns {Promise.[<Object>]} array of or single data record
+ * @this DataFile
+ */
+function lookup(offsets, callback) {
+  var results = [],
+    self = this,
+    readLine = seek.bind(this),
+    valid = (item => item.pos),
+    single = !Array.isArray(offsets);
+
+  if (single) offsets = [offsets];
+  return new Promise(function(resolve, reject) {
+    results = offsets.map(readLine).filter(valid);
+
+    if (!results.length) {
+      let err = new RangeError(`No data at offsets ${offsets.join()} in ${self.filePath}.`);
+      callback && callback(err, single ? {} :[]);
+      reject(err);
+    } else {
+      if (single) results = results[0];
+      callback && callback(null, results);
+      resolve(results);
+    }
+  });
+}
+
+/**
+ * DataFile class
+ *
+ * @param dictPath {string} - path to dict folder
+ * @param name {string} - POS name
+ * @constructor
+ */
+class DataFile extends BaseFile {
+
+  constructor(dictPath, posName) {
+    super('data', dictPath, posName);
+  }
+
+  lookup() {
+    return this.ready(lookup, arguments);
+  }
+
+  seek() {
+    // return this.ready(find, arguments);
+  }
+
+}
+
+
+/**
+ * map of lexFilenum to lex names
+ *
+ * @see https://wordnet.princeton.edu/wordnet/man/lexnames.5WN.html
+ * @type {string[]}
+ */
+DataFile.LEX_NAMES = LEX_NAMES;
+
+export default DataFile;
--- a/src/browser/index.js
+++ b/src/browser/index.js
@ -0,0 +1,165 @@
+import { stopwords, prepText, makeStopwordString } from '../util';
+import { is, get, lookup } from '../common';
+import IndexFile from './indexFile';
+import DataFile from './dataFile';
+
+const POS = {
+  n: 'noun',
+  v: 'verb',
+  a: 'adj',
+  r: 'adv'
+};
+
+
+class WordPOS {
+
+  options = {};
+  loaded = Promise.resolve();
+
+  constructor(config) {
+    this.options = Object.assign({}, WordPOS.defaults, config);
+    console.log('wpos ctor -- ', this.options)
+
+    this.initFiles();
+    if (Array.isArray(this.options.stopwords)) {
+      this.options.stopwords = makeStopwordString(this.options.stopwords);
+    }
+
+    // TODO rand()
+  }
+
+  ready() {
+    return this.loaded;
+  }
+
+  initFiles() {
+    const keys = Object.keys(POS);
+    const loadOne = (Comp, pos) => new Comp(this.options.dictPath, POS[pos]);
+    const loader = (Comp) => keys.map(loadOne.bind(null, Comp));
+    const reducer = (arr) => arr.reduce((coll, item, i) => (coll[keys[i]] = item, coll), {});
+
+    this.indexFiles = reducer(loader(IndexFile));
+    this.dataFiles = reducer(loader(DataFile));
+
+    if (this.options.preload) {
+      this.loaded = this.preloadIndexes(this.options.preload);
+    }
+  }
+
+  getFilesFor(pos) {
+    return {
+      index: this.indexFiles[pos],
+      data: this.dataFiles[pos]
+    };
+  }
+
+  /**
+   * loads index files
+   *
+   * @param  {string|Array} [pos] POS to load (default: all)
+   * @return {Promise.<index data>}
+   */
+  preloadIndexes(pos) {
+    let file = this.indexFile[pos];
+    let load = p => file.load();
+    let promise;
+
+    if (!pos || pos === true) { // preload all
+      promise = Promise.all(Object.keys(POS).map(load));
+    }
+    else if (typeof pos === 'string' && file) {
+      promise = load(pos);
+    }
+    else if (pos instanceof Array) {
+      promise = pos.forEach(pos => file && load(pos));
+    }
+
+    // TODO includeData
+
+    return promise || Promise.reject(new RangeError(`Unknown POS "${pos}" for preload.`));
+  }
+
+  parse = prepText;
+
+  /**
+   * isX() - Test if word is given POS
+   * @see is
+   */
+  isAdjective = is('a');
+  isAdverb = is('r');
+  isNoun = is('n');
+  isVerb = is('v');
+
+  /**
+   * getX() - Find all words in string that are given POS
+   * @see get
+   */
+  getAdjectives = get('isAdjective');
+  getAdverbs = get('isAdverb');
+  getNouns = get('isNoun');
+  getVerbs = get('isVerb');
+
+  /**
+   * lookupX() - Lookup word definition if already know POS
+   * @see lookup
+   */
+  lookupAdjective = lookup('a');
+  lookupAdverb = lookup('r');
+  lookupNoun = lookup('n');
+  lookupVerb = lookup('v');
+}
+
+WordPOS.defaults = {
+  /**
+   * path to WordNet data (override only if not using wordnet-db)
+   * @type {string}
+   */
+  dictPath: '',
+
+  /**
+   * enable profiling, time in msec returned as second argument in callback
+   * @type {boolean}
+   */
+  profile: false,
+
+  /**
+   * if true, exclude standard stopwords.
+   * if array, stopwords to exclude, eg, ['all','of','this',...]
+   * if false, do not filter any stopwords.
+   * @type {boolean}
+   */
+  stopwords: true,
+
+  /**
+   * preload files.
+   *    true - preload all POS
+   *    false - do not preload any POS
+   *    'a' - preload adj
+   *    ['a','v'] - preload adj & verb
+   * @type {boolean|string|Array}
+   */
+  preload: false,
+
+  /**
+   * include data files in preload
+   * @type {boolean}
+   */
+
+  includeData: false
+
+};
+
+
+/**
+ * access to WordNet DB
+ * @type {object}
+ */
+// WordPOS.WNdb = WNdb;
+
+/**
+ * access to stopwords
+ * @type {Array}
+ */
+WordPOS.stopwords = stopwords;
+
+export default WordPOS;
--- a/src/browser/indexFile.js
+++ b/src/browser/indexFile.js
@ -0,0 +1,71 @@
+/*!
+ * indexFile.js
+ *
+ * 		implements fast index lookup of WordNet's index files
+ *
+ * Copyright (c) 2012-2019 mooster@42at.com
+ * https://github.com/moos/wordpos
+ *
+ * Portions: Copyright (c) 2011, Chris Umbel
+ *
+ * Released under MIT license
+ */
+
+import { indexLookup } from '../common';
+import BaseFile from './BaseFile';
+
+/**
+ * find a search term in an index file (using fast index)
+ *
+ * Calls to same bucket are queued for callback using the piper.
+ *
+ * @param search {string} - word to search for
+ * @param callback {function} - callback receives found line and tokens
+ * @returns none
+ * @this IndexFile
+ */
+function find(search, callback) {
+  var miss = {status: 'miss'};
+
+  if (!(search in this.file)) {
+    callback(miss);
+    return;
+  }
+
+  var
+    line = this.file[search],
+    tokens = line.split(/\s+/),
+    result = {
+      status: 'hit',
+      key: search,
+      line: line,
+      tokens: tokens
+    };
+
+  result.tokens.unshift(search);
+  callback(result);
+}
+
+/**
+ * IndexFile class
+ *
+ * @param dictPath {string} - WordNet db dict path
+ * @param name {string} - name of index: noun, verb, adj, adv
+ * @constructor
+ */
+class IndexFile extends BaseFile {
+
+  constructor(dictPath, posName) {
+    super('index', dictPath, posName);
+  }
+
+  lookup() {
+    return this.ready(indexLookup, arguments);
+  }
+
+  find() {
+    return this.ready(find, arguments);
+  }
+}
+
+export default IndexFile;
--- a/src/browser/piper.js
+++ b/src/browser/piper.js
@ -0,0 +1,82 @@
+/*!
+ * piper.js
+ *
+ *     executes multiple async i/o tasks and pools similar callbacks,
+ *     calling i/o open/close when all incoming tasks are done.
+ *
+ * Copyright (c) 2012-2016 mooster@42at.com
+ * https://github.com/moos/wordpos
+ *
+ * Released under MIT license
+ */
+
+var _ = require('underscore')._,
+  util = require('util'),
+  fs = require('fs');
+
+/**
+ * run single 'task' method sharing callbacks.  Method MUST take callback as LAST arg.
+ * piper is bound to an IndexFile.
+ *
+ * @param task {string} - task name unique to method!
+ * @param method {function} - method to execute, gets (args, ... , callback)
+ * @param args {Array} - args to pass to method
+ * @param context {object} - other params to remember and sent to callback
+ * @param callback {function} - result callback
+ */
+function piper(task, method, args, context, callback){
+  var readCallbacks = this.callbackQueue,
+    memoArgs = _.rest(arguments, 2),
+    wrappedCallback;
+
+   //console.log('piper', task, [method]);
+
+  // queue up if already reading file for this task
+  if (task in readCallbacks){
+    readCallbacks[task].push(memoArgs);
+    return;
+  }
+  readCallbacks[task] = [memoArgs];
+
+  if (!this.fd) {
+    //console.log(' ... opening', this.filePath);
+    this.fd = fs.openSync(this.filePath, 'r');
+  }
+
+  // ref count so we know when to close the main index file
+  ++this.refcount;
+
+  wrappedCallback = _.partial(piper.wrapper, this, task);
+
+  // call method -- replace original callback (last arg) with wrapped one
+  method.apply(null, [].concat( args, wrappedCallback ));
+}
+
+// result is the *same* for same task
+piper.wrapper = function(self, task /*, result...*/){
+  var readCallbacks = self.callbackQueue,
+    result = _.rest(arguments, 2),
+    callback, args;
+
+  // live access callbacks cache in case nested cb's
+  // add to the array.
+  while (args = readCallbacks[task].shift()) {
+    callback = args.pop(); // last arg MUST be callback
+
+//    console.log('>>>> pper wrapper', self.fastIndex.name, task, result.toString())
+    callback.apply(null, [].concat(_.flatten(args, /*shallow*/true), result));
+  }
+
+  // now done - delete cb cache
+  delete readCallbacks[task];
+
+  if (--self.refcount === 0) {
+    //console.log(' ... closing', self.filePath);
+    fs.closeSync(self.fd);
+    self.fd = null;
+  }
+};
+
+
+module.exports = piper;
+
--- a/src/browser/rand.js
+++ b/src/browser/rand.js
@ -0,0 +1,267 @@
+/*!
+ * rand.js
+ *
+ * 		define rand() and randX() functions on wordpos
+ *
+ * Copyright (c) 2012-2016 mooster@42at.com
+ * https://github.com/moos/wordpos
+ *
+ * Released under MIT license
+ */
+
+var _ = require('underscore')._,
+  util = require('util'),
+  Trie = require('../lib/natural/trie/trie'),
+  IndexFile = require('./indexFile'),
+  KEY_LENGTH = 3;
+
+
+/**
+ * factory function for randX()
+ *
+ * @param pos {string} - a,r,n,v
+ * @returns {Function} - rand function bound to an index file
+ */
+function makeRandX(pos){
+  return function(opts, callback, _noprofile) {
+    // disable profiling when isX() used internally
+    var profile = this.options.profile && !_noprofile,
+      start = profile && new Date(),
+      args = [],
+      index = this.getFilesFor(pos).index,
+      startsWith = opts && opts.startsWith || '',
+      count = opts && opts.count || 1;
+
+    if (typeof opts === 'function') {
+      callback = opts;
+    }
+
+    return index.rand(startsWith, count, function (record) {
+      args.push(record, startsWith);
+      profile && args.push(new Date() - start);
+      callback && callback.apply(null, args);
+    });
+  };
+}
+
+/**
+ * rand function (bound to index)
+ *
+ * @param startsWith {string} - get random word(s) that start with this, or ''
+ * @param num {number} - number of words to return
+ * @param callback {function} - callback function, receives words array and startsWith
+ * @returns Promise
+ */
+function rand(startsWith, num, callback){
+  var self = this,
+    nextKey = null,
+    trie = this.fastIndex.trie,
+    key, keys;
+
+  return new Promise(function(resolve, reject) {
+
+    //console.log('-- ', startsWith, num, self.fastIndex.indexKeys.length);
+    if (startsWith) {
+      key = startsWith.slice(0, KEY_LENGTH);
+
+      /**
+       * if key is 'a' or 'ab' (<3 chars), search for ALL keys starting with that.
+       */
+      if (key.length < KEY_LENGTH) {
+
+        // calc trie if haven't done so yet
+        if (!trie) {
+          trie = new Trie();
+          trie.addStrings(self.fastIndex.indexKeys);
+          self.fastIndex.trie = trie;
+          //console.log(' +++ Trie calc ');
+        }
+
+        try {
+          // trie throws if not found!!!!!
+          keys = trie.keysWithPrefix(startsWith);
+        } catch (e) {
+          keys = [];
+        }
+
+        // read all keys then select random word.
+        // May be large disk read!
+        key = keys[0];
+        nextKey = _.last(keys);
+      }
+
+      if (!key || !(key in self.fastIndex.offsets))  {
+        callback && callback([], startsWith);
+        resolve([]);
+      }
+
+    } else {
+      // no startWith given - random select among keys
+      keys = _.sample(self.fastIndex.indexKeys, num);
+
+      // if num > 1, run each key independently and collect results
+      if (num > 1) {
+        var results = [], ii = 0;
+        _(keys).each(function (startsWith) {
+          self.rand(startsWith, 1, function (result) {
+            results.push(result[0]);
+            if (++ii == num) {
+              callback && callback(results, '');
+              resolve(results);
+            }
+          });
+        });
+        return;
+      }
+      key = keys;
+    }
+
+    // prepare the piper
+    var args = [key, nextKey, self],
+      task = 'rand:' + key + nextKey,
+      context = [startsWith, num, callback]; // last arg MUST be callback
+
+    // pay the piper
+    self.piper(task, IndexFile.readIndexBetweenKeys, args, context, collector);
+
+    function collector(key, nextKey, index, startsWith, num, callback, buffer) {
+      var lines = buffer.toString().split('\n'),
+        matches = lines.map(function (line) {
+          return line.substring(0, line.indexOf(' '));
+        });
+      //console.log(' got lines for key ', key, lines.length);
+
+      // we got bunch of matches for key - now search within for startsWith
+      if (startsWith !== key) {
+        // binary search for startsWith within set of matches
+        var ind = _.sortedIndex(matches, startsWith);
+        if (ind >= lines.length || matches[ind].indexOf(startsWith) === -1) {
+          callback && callback([], startsWith);
+          resolve([]);
+          return;
+        }
+
+        var trie = new Trie();
+        trie.addStrings(matches);
+        //console.log('Trie > ', trie.matchesWithPrefix( startsWith ));
+        matches = trie.keysWithPrefix(startsWith);
+      }
+
+      var words = _.sample(matches, num);
+      callback && callback(words, startsWith);
+      resolve(words);
+    }
+
+  }); // Promise
+}
+
+// relative weight of each POS word count (DB 3.1 numbers)
+var POS_factor = {
+  Noun: 26,
+  Verb: 3,
+  Adjective: 5,
+  Adverb: 1,
+  Total: 37
+};
+
+/**
+ * rand() - for all Index files
+ * @returns Promise
+ */
+function randAll(opts, callback) {
+
+  if (typeof opts === 'function') {
+    callback = opts;
+    opts = {};
+  } else {
+    opts = _.clone(opts || {});
+  }
+
+  var
+    profile = this.options.profile,
+    start = profile && new Date(),
+    results = [],
+    startsWith = opts && opts.startsWith || '',
+    count = opts && opts.count || 1,
+    args = [null, startsWith],
+    parts = 'Noun Verb Adjective Adverb'.split(' '),
+    self = this;
+
+
+
+  return new Promise(function(resolve, reject) {
+    // select at random a POS to look at
+    var doParts = _.sample(parts, parts.length);
+    tryPart();
+
+    function tryPart() {
+      var part = doParts.pop(),
+        rand = 'rand' + part,
+        factor = POS_factor[part],
+        weight = factor / POS_factor.Total;
+
+      // pick count according to relative weight
+      opts.count = Math.ceil(count * weight * 1.1); // guard against dupes
+      self[rand](opts, partCallback);
+    }
+
+    function partCallback(result) {
+      if (result) {
+        results = _.uniq(results.concat(result));  // make sure it's unique!
+      }
+
+      if (results.length < count && doParts.length) {
+        return tryPart();
+      }
+
+      // final random and trim excess
+      results = _.sample(results, count);
+      done();
+    }
+
+    function done() {
+      profile && (args.push(new Date() - start));
+      args[0] = results;
+      callback && callback.apply(null, args);
+      resolve(results);
+    }
+
+  }); // Promise
+}
+
+/**
+ * bind rand() to index
+ *
+ * @param index {object} - the IndexFile instance
+ * @returns {function} - bound rand function for index
+ */
+function randomify(index){
+  if (!index.fastIndex) throw 'rand requires fastIndex';
+  return _.bind(rand, index);
+}
+
+
+
+module.exports = {
+
+  init: function(wordposProto) {
+    wordposProto.nounIndex.rand = randomify(wordposProto.nounIndex);
+    wordposProto.verbIndex.rand = randomify(wordposProto.verbIndex);
+    wordposProto.adjIndex.rand = randomify(wordposProto.adjIndex);
+    wordposProto.advIndex.rand = randomify(wordposProto.advIndex);
+
+    /**
+     * define rand()
+     */
+    wordposProto.rand = randAll;
+
+    /**
+     * define randX()
+     */
+    wordposProto.randAdjective = makeRandX('a');
+    wordposProto.randAdverb = makeRandX('r');
+    wordposProto.randNoun = makeRandX('n');
+    wordposProto.randVerb = makeRandX('v');
+  }
+};
+
--- a/src/common.js
+++ b/src/common.js
@ -0,0 +1,277 @@
+import { normalize, nextTick } from './util';
+
+
+
+/**
+ * factory for main lookup function
+ *
+ * @param pos {string} - n/v/a/r
+ * @returns {Function} - lookup function bound to POS
+ * @this WordPOS
+ */
+function lookup(pos) {
+  return function(word, callback) {
+    var profile = this.options.profile,
+      start = profile && new Date(),
+      files = this.getFilesFor(pos),
+      args = [];
+
+    word = normalize(word);
+
+    // lookup index
+    return files.index.lookup(word)
+      .then(function(result) {
+        if (result) {
+          // lookup data
+          return files.data.lookup(result.synsetOffset).then(done);
+        } else {
+          // not found in index
+          return done([]);
+        }
+      })
+      .catch(done);
+
+    function done(results) {
+      if (results instanceof Error) {
+        args.push([], word);
+      } else {
+        args.push(results, word);
+      }
+      //console.log(3333, args)
+      profile && args.push(new Date() - start);
+      nextTick(callback, args);
+      return results;
+    }
+  };
+}
+
+/**
+ * find a word and prepare its lexical record
+ *
+ * @param word {string} - search word
+ * @param callback {function} - callback function receives result
+ * @returns {Promise.<IndexRecord>}
+ * @this IndexFile
+ *
+ * Credit for this routine to https://github.com/NaturalNode/natural
+ */
+function indexLookup(word, callback) {
+  var self = this;
+
+  return new Promise(function(resolve, reject){
+    self.find(word, function (record) {
+      var indexRecord = null,
+        i;
+
+      if (record.status == 'hit') {
+        var ptrs = [], offsets = [];
+        let n = parseInt(record.tokens[3]);
+
+        for (i = 0; i < n; i++) {
+          ptrs.push(record.tokens[i]);
+        }
+
+        n = parseInt(record.tokens[2]);
+        for (i = 0; i < n; i++) {
+          offsets.push(record.tokens[ptrs.length + 6 + i]);
+        }
+
+        indexRecord = {
+          lemma       : record.tokens[0],
+          pos         : record.tokens[1],
+          ptrSymbol   : ptrs,
+          senseCnt    : parseInt(record.tokens[ptrs.length + 4], 10),
+          tagsenseCnt : parseInt(record.tokens[ptrs.length + 5], 10),
+          synsetOffset: offsets
+        };
+      }
+      callback && callback(indexRecord);
+      resolve(indexRecord);
+    });
+  });
+}
+
+
+
+/**
+ * getX() factory function
+ *
+ * @param isFn {function} - an isX() function
+ * @returns {Function}
+ * @this IndexFile
+ */
+function get(isFn) {
+  return function(text, callback, _noprofile) {
+    var profile = this.options.profile && !_noprofile,
+      start = profile && new Date(),
+      words = this.parse(text),
+      results = [],
+      self = this;
+
+    return Promise
+      .all(words.map(exec))
+      .then(done);
+
+    function exec(word) {
+      return self[isFn]
+        .call(self, word, null, /*_noprofile*/ true)
+        .then(function collect(result) {
+          result && results.push(word);
+        });
+    }
+
+    function done(){
+      var args = [results];
+      profile && args.push(new Date() - start);
+      nextTick(callback, args);
+      return results;
+    }
+  };
+}
+
+
+/**
+ * isX() factory function
+ *
+ * @param pos {string} - n/v/a/r
+ * @returns {Function}
+ * @this WordPOS
+ */
+function is(pos){
+  return function(word, callback, _noprofile) {
+    // disable profiling when isX() used internally
+    var profile = this.options.profile && !_noprofile,
+      start = profile && new Date(),
+      args = [],
+      index = this.getFilesFor(pos).index;
+    word = normalize(word);
+
+    return index
+      .lookup(word)
+      .then(function(record) {
+        var result = !!record;
+        args.push(result, word);
+        profile && args.push(new Date() - start);
+        nextTick(callback, args);
+        return result;
+      });
+  };
+}
+
+
+/**
+ * parse a single data file line, returning data object
+ *
+ * @param line {string} - a single line from WordNet data file
+ * @returns {object}
+ *
+ * Credit for this routine to https://github.com/NaturalNode/natural
+ */
+function lineDataToJSON(line, location) {
+  // if (!dataCheck(line, location)) return new Error('Bad data at location ' + location);
+
+  var data = line.split('| '),
+    tokens = data[0].split(/\s+/),
+    ptrs = [],
+    wCnt = parseInt(tokens[3], 16),
+    synonyms = [],
+    i;
+
+  for(i = 0; i < wCnt; i++) {
+    synonyms.push(tokens[4 + i * 2]);
+  }
+
+  var ptrOffset = (wCnt - 1) * 2 + 6;
+  let n = parseInt(tokens[ptrOffset], 10);
+  for(i = 0; i < n; i++) {
+    ptrs.push({
+      pointerSymbol: tokens[ptrOffset + 1 + i * 4],
+      synsetOffset: tokens[ptrOffset + 2 + i * 4],
+      pos: tokens[ptrOffset + 3 + i * 4],
+      sourceTarget: tokens[ptrOffset + 4 + i * 4]
+    });
+  }
+
+  // break "gloss" into definition vs. examples
+  var glossArray = data[1].split('; ');
+  var definition = glossArray[0];
+  var examples = glossArray.slice(1);
+  var lexFilenum = parseInt(tokens[1], 10);
+
+  for (var k = 0; k < examples.length; k++) {
+    examples[k] = examples[k].replace(/\"/g,'').replace(/\s\s+/g,'');
+  }
+
+  return {
+    synsetOffset: tokens[0],
+    lexFilenum: lexFilenum,
+    lexName: LEX_NAMES[ lexFilenum ],
+    pos: tokens[2],
+    wCnt: wCnt,
+    lemma: tokens[4],
+    synonyms: synonyms,
+    lexId: tokens[5],
+    ptrs: ptrs,
+    gloss: data[1],
+    def: definition,
+    exp: examples
+  };
+}
+
+const LEX_NAMES = [
+ 'adj.all',
+ 'adj.pert',
+ 'adv.all',
+ 'noun.Tops',
+ 'noun.act',
+ 'noun.animal',
+ 'noun.artifact',
+ 'noun.attribute',
+ 'noun.body',
+ 'noun.cognition',
+ 'noun.communication',
+ 'noun.event',
+ 'noun.feeling',
+ 'noun.food',
+ 'noun.group',
+ 'noun.location',
+ 'noun.motive',
+ 'noun.object',
+ 'noun.person',
+ 'noun.phenomenon',
+ 'noun.plant',
+ 'noun.possession',
+ 'noun.process',
+ 'noun.quantity',
+ 'noun.relation',
+ 'noun.shape',
+ 'noun.state',
+ 'noun.substance',
+ 'noun.time',
+ 'verb.body',
+ 'verb.change',
+ 'verb.cognition',
+ 'verb.communication',
+ 'verb.competition',
+ 'verb.consumption',
+ 'verb.contact',
+ 'verb.creation',
+ 'verb.emotion',
+ 'verb.motion',
+ 'verb.perception',
+ 'verb.possession',
+ 'verb.social',
+ 'verb.stative',
+ 'verb.weather',
+ 'adj.ppl'
+];
+
+export {
+  indexLookup,
+  is,
+  get,
+
+  lineDataToJSON,
+  LEX_NAMES,
+  lookup
+}
--- a/src/util.js
+++ b/src/util.js
@ -0,0 +1,56 @@
+let stopwords = require('../lib/natural/util/stopwords').words;
+let stopwordsStr = makeStopwordString(stopwords);
+
+
+function makeStopwordString(stopwords) {
+  return ' ' + stopwords.join(' ') + ' ';
+}
+
+// setImmediate executes callback AFTER promise handlers.
+// Without it, exceptions in callback may be caught by Promise.
+function nextTick(fn, args) {
+  if (fn) {
+    fn.apply(null, args);
+  }
+}
+
+function normalize(word) {
+  return word.toLowerCase().replace(/\s+/g, '_');
+}
+
+function isStopword(stopwords, word) {
+  return stopwords.indexOf(' '+word+' ') >= 0;
+}
+
+function tokenizer(str) {
+  return str.split(/\W+/);
+}
+
+function uniq(arr) {
+  return arr.filter((v, i) => arr.indexOf(v) === i);
+}
+
+function isString(s) {
+  return typeof s === 'string';
+}
+
+function reject(arr, predicate) {
+  return arr.filter(item => !predicate(item))
+}
+
+function prepText(text) {
+  if (Array.isArray(text)) return text;
+  var deduped = uniq(tokenizer(text));
+  if (!this.options.stopwords) return deduped;
+  return reject(deduped, isStopword.bind(null,
+    isString(this.options.stopwords) ? this.options.stopwords : stopwordsStr
+  ));
+}
+
+export {
+  nextTick,
+  normalize,
+  tokenizer,
+  prepText,
+  makeStopwordString
+}