update to babel@7, run tests for both node & browser
This commit is contained in:
parent
890f3dd353
commit
09e774de06
36
.babelrc
36
.babelrc
|
@ -1,4 +1,36 @@
|
||||||
{
|
{
|
||||||
"presets": ["env", "stage-1"],
|
"ignore": [
|
||||||
"plugins": ["transform-class-properties"]
|
"./test/dict"
|
||||||
|
],
|
||||||
|
"presets": [
|
||||||
|
"@babel/preset-env"
|
||||||
|
],
|
||||||
|
"plugins": [
|
||||||
|
"@babel/plugin-proposal-class-properties",
|
||||||
|
"babel-plugin-dynamic-import-node",
|
||||||
|
"@babel/plugin-syntax-dynamic-import",
|
||||||
|
"@babel/plugin-syntax-import-meta",
|
||||||
|
"@babel/plugin-proposal-json-strings",
|
||||||
|
[
|
||||||
|
"@babel/plugin-proposal-decorators",
|
||||||
|
{
|
||||||
|
"legacy": true
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"@babel/plugin-proposal-function-sent",
|
||||||
|
"@babel/plugin-proposal-export-namespace-from",
|
||||||
|
"@babel/plugin-proposal-numeric-separator",
|
||||||
|
"@babel/plugin-proposal-throw-expressions",
|
||||||
|
"@babel/plugin-proposal-export-default-from",
|
||||||
|
"@babel/plugin-proposal-logical-assignment-operators",
|
||||||
|
"@babel/plugin-proposal-optional-chaining",
|
||||||
|
[
|
||||||
|
"@babel/plugin-proposal-pipeline-operator",
|
||||||
|
{
|
||||||
|
"proposal": "minimal"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"@babel/plugin-proposal-nullish-coalescing-operator",
|
||||||
|
"@babel/plugin-proposal-do-expressions"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
35
package.json
35
package.json
|
@ -29,15 +29,30 @@
|
||||||
"bin": "./bin/wordpos-cli.js",
|
"bin": "./bin/wordpos-cli.js",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"commander": "^2.0.0",
|
"commander": "^2.0.0",
|
||||||
|
"symlink-dir": "1.1.3",
|
||||||
"underscore": ">=1.3.1",
|
"underscore": ">=1.3.1",
|
||||||
"wordnet-db": "^3.1.6",
|
"wordnet-db": "^3.1.6"
|
||||||
"symlink-dir": "1.1.3"
|
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"babel-core": "^6.26.3",
|
"@babel/core": "^7.0.0",
|
||||||
"babel-plugin-transform-class-properties": "^6.24.1",
|
"@babel/plugin-proposal-class-properties": "^7.0.0",
|
||||||
"babel-preset-env": "^1.7.0",
|
"@babel/plugin-proposal-decorators": "^7.0.0",
|
||||||
"babel-preset-stage-2": "^6.24.1",
|
"@babel/plugin-proposal-do-expressions": "^7.0.0",
|
||||||
|
"@babel/plugin-proposal-export-default-from": "^7.0.0",
|
||||||
|
"@babel/plugin-proposal-export-namespace-from": "^7.0.0",
|
||||||
|
"@babel/plugin-proposal-function-sent": "^7.0.0",
|
||||||
|
"@babel/plugin-proposal-json-strings": "^7.0.0",
|
||||||
|
"@babel/plugin-proposal-logical-assignment-operators": "^7.0.0",
|
||||||
|
"@babel/plugin-proposal-nullish-coalescing-operator": "^7.0.0",
|
||||||
|
"@babel/plugin-proposal-numeric-separator": "^7.0.0",
|
||||||
|
"@babel/plugin-proposal-optional-chaining": "^7.0.0",
|
||||||
|
"@babel/plugin-proposal-pipeline-operator": "^7.0.0",
|
||||||
|
"@babel/plugin-proposal-throw-expressions": "^7.0.0",
|
||||||
|
"@babel/plugin-syntax-dynamic-import": "^7.0.0",
|
||||||
|
"@babel/plugin-syntax-import-meta": "^7.0.0",
|
||||||
|
"@babel/preset-env": "^7.0.0",
|
||||||
|
"@babel/register": "^7.0.0",
|
||||||
|
"babel-plugin-dynamic-import-node": "^2.2.0",
|
||||||
"chai": "^4.0.2",
|
"chai": "^4.0.2",
|
||||||
"mini-bench": "^1.0.0",
|
"mini-bench": "^1.0.0",
|
||||||
"mocha": "^5.2.0"
|
"mocha": "^5.2.0"
|
||||||
|
@ -52,14 +67,12 @@
|
||||||
"postinstall": "npm run postinstall-web && npm run postinstall-node",
|
"postinstall": "npm run postinstall-web && npm run postinstall-node",
|
||||||
"postinstall-node": "node tools/stat.js --no-stats index.adv index.adj index.verb index.noun",
|
"postinstall-node": "node tools/stat.js --no-stats index.adv index.adj index.verb index.noun",
|
||||||
"postinstall-web": "node scripts/makeJsonDict.js index data",
|
"postinstall-web": "node scripts/makeJsonDict.js index data",
|
||||||
|
|
||||||
"build": "parcel build --detailed-report -o wordpos.min.js --global WordPOS -t browser src/browser/index.js",
|
"build": "parcel build --detailed-report -o wordpos.min.js --global WordPOS -t browser src/browser/index.js",
|
||||||
"test": "mocha test",
|
"test": "npm run test-node && npm run test-browser",
|
||||||
"test-browser": "mocha test- # TODO",
|
"test-node": "mocha test",
|
||||||
|
"test-browser": "mocha test/wordpos_test --require @babel/register",
|
||||||
"prestart": "symlink-dir dict samples/self-hosted/dict",
|
"prestart": "symlink-dir dict samples/self-hosted/dict",
|
||||||
"start": "npm run build && http-server",
|
"start": "npm run build && http-server",
|
||||||
|
|
||||||
"prestart-dev": "rm -rf build && mkdir build && symlink-dir dict build/dict && cp samples/self-hosted/main.js build/main.txt",
|
"prestart-dev": "rm -rf build && mkdir build && symlink-dir dict build/dict && cp samples/self-hosted/main.js build/main.txt",
|
||||||
"start-dev": "npm run start-self -- -d build",
|
"start-dev": "npm run start-self -- -d build",
|
||||||
"start-self": "parcel samples/self-hosted/index.html",
|
"start-self": "parcel samples/self-hosted/index.html",
|
||||||
|
|
|
@ -9,6 +9,7 @@ let fs = require('fs');
|
||||||
let path = require('path');
|
let path = require('path');
|
||||||
|
|
||||||
let outPath = './dict';
|
let outPath = './dict';
|
||||||
|
let testPath = './test/dict';
|
||||||
let posExt = ['adj', 'adv', 'noun', 'verb'];
|
let posExt = ['adj', 'adv', 'noun', 'verb'];
|
||||||
let dictRoot = './node_modules/wordnet-db/dict/';
|
let dictRoot = './node_modules/wordnet-db/dict/';
|
||||||
const fileTypes = {
|
const fileTypes = {
|
||||||
|
@ -30,11 +31,16 @@ function uniq(arr) {
|
||||||
console.time('Done');
|
console.time('Done');
|
||||||
|
|
||||||
// create out directory
|
// create out directory
|
||||||
try {
|
const ensurePath = (path) => {
|
||||||
fs.statSync(outPath);
|
try {
|
||||||
} catch (e) {
|
fs.statSync(path);
|
||||||
fs.mkdirSync(outPath);
|
} catch (e) {
|
||||||
}
|
fs.mkdirSync(path);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
ensurePath(outPath);
|
||||||
|
ensurePath(testPath);
|
||||||
|
|
||||||
function processFile(name) {
|
function processFile(name) {
|
||||||
|
|
||||||
|
@ -66,8 +72,13 @@ function processFile(name) {
|
||||||
function writeFile(pos, obj) {
|
function writeFile(pos, obj) {
|
||||||
console.time(' write');
|
console.time(' write');
|
||||||
let text = JSON.stringify(obj);
|
let text = JSON.stringify(obj);
|
||||||
text = 'export default ' + text;
|
fs.writeFileSync(path.resolve(outPath, name + '.' + pos + '.js'),
|
||||||
fs.writeFileSync(path.resolve(outPath, name + '.' + pos + '.js'), text);
|
'export default ' + text);
|
||||||
|
|
||||||
|
// also write for mocha tests
|
||||||
|
fs.writeFileSync(path.resolve(testPath, name + '.' + pos + '.js'),
|
||||||
|
'module.exports.default = ' + text);
|
||||||
|
|
||||||
console.timeEnd(' write');
|
console.timeEnd(' write');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -23,8 +23,12 @@ class BaseFile {
|
||||||
|
|
||||||
load() {
|
load() {
|
||||||
if (this.loadError) return Promise.reject(this.loadError);
|
if (this.loadError) return Promise.reject(this.loadError);
|
||||||
return import(this.filePath)
|
let promise = Promise.resolve(require(this.filePath));
|
||||||
.then(exports => this.file = exports.default)
|
|
||||||
|
return promise
|
||||||
|
.then(exports => {
|
||||||
|
this.file = exports.default
|
||||||
|
})
|
||||||
.catch(err => {
|
.catch(err => {
|
||||||
console.error(`Error loading "${this.type}" file ${this.filePath}.`, err);
|
console.error(`Error loading "${this.type}" file ${this.filePath}.`, err);
|
||||||
this.loadError = err;
|
this.loadError = err;
|
||||||
|
@ -37,4 +41,5 @@ class BaseFile {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export default BaseFile;
|
// export default BaseFile;
|
||||||
|
module.exports = BaseFile;
|
||||||
|
|
|
@ -9,8 +9,9 @@
|
||||||
* Released under MIT license
|
* Released under MIT license
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { lineDataToJSON, LEX_NAMES } from '../common';
|
const { lineDataToJSON, LEX_NAMES } = require('../common');
|
||||||
import BaseFile from './baseFile';
|
const { zeroPad } = require('../util');
|
||||||
|
const BaseFile = require('./baseFile');
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get parsed line from data file
|
* get parsed line from data file
|
||||||
|
@ -43,7 +44,10 @@ function lookup(offsets, callback) {
|
||||||
|
|
||||||
if (single) offsets = [offsets];
|
if (single) offsets = [offsets];
|
||||||
return new Promise(function(resolve, reject) {
|
return new Promise(function(resolve, reject) {
|
||||||
results = offsets.map(readLine).filter(valid);
|
results = offsets
|
||||||
|
.map(zeroPad)
|
||||||
|
.map(readLine)
|
||||||
|
.filter(valid);
|
||||||
|
|
||||||
if (!results.length) {
|
if (!results.length) {
|
||||||
let err = new RangeError(`No data at offsets ${offsets.join()} in ${self.filePath}.`);
|
let err = new RangeError(`No data at offsets ${offsets.join()} in ${self.filePath}.`);
|
||||||
|
@ -83,4 +87,4 @@ class DataFile extends BaseFile {
|
||||||
*/
|
*/
|
||||||
DataFile.LEX_NAMES = LEX_NAMES;
|
DataFile.LEX_NAMES = LEX_NAMES;
|
||||||
|
|
||||||
export default DataFile;
|
module.exports = DataFile;
|
||||||
|
|
|
@ -7,12 +7,10 @@
|
||||||
* Released under MIT license
|
* Released under MIT license
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { stopwords, prepText, makeStopwordString } from '../util';
|
const { stopwords, prepText, makeStopwordString } = require('../util');
|
||||||
import { is, get, lookup, seek } from '../common';
|
const { is, get, getPOS, lookup, seek, lookupPOS } = require('../common');
|
||||||
import IndexFile from './indexFile';
|
const IndexFile = require('./indexFile');
|
||||||
import DataFile from './dataFile';
|
const DataFile = require('./dataFile');
|
||||||
|
|
||||||
console.log(4545, ' borwser index')
|
|
||||||
|
|
||||||
const POS = {
|
const POS = {
|
||||||
n: 'noun',
|
n: 'noun',
|
||||||
|
@ -105,6 +103,7 @@ class WordPOS {
|
||||||
* getX() - Find all words in string that are given POS
|
* getX() - Find all words in string that are given POS
|
||||||
* @see get
|
* @see get
|
||||||
*/
|
*/
|
||||||
|
getPOS = getPOS;
|
||||||
getAdjectives = get('isAdjective');
|
getAdjectives = get('isAdjective');
|
||||||
getAdverbs = get('isAdverb');
|
getAdverbs = get('isAdverb');
|
||||||
getNouns = get('isNoun');
|
getNouns = get('isNoun');
|
||||||
|
@ -114,6 +113,7 @@ class WordPOS {
|
||||||
* lookupX() - Lookup word definition if already know POS
|
* lookupX() - Lookup word definition if already know POS
|
||||||
* @see lookup
|
* @see lookup
|
||||||
*/
|
*/
|
||||||
|
lookup = lookupPOS;
|
||||||
lookupAdjective = lookup('a');
|
lookupAdjective = lookup('a');
|
||||||
lookupAdverb = lookup('r');
|
lookupAdverb = lookup('r');
|
||||||
lookupNoun = lookup('n');
|
lookupNoun = lookup('n');
|
||||||
|
@ -159,7 +159,6 @@ WordPOS.defaults = {
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* access to WordNet DB
|
* access to WordNet DB
|
||||||
* @type {object}
|
* @type {object}
|
||||||
|
|
|
@ -7,8 +7,8 @@
|
||||||
* Released under MIT license
|
* Released under MIT license
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { indexLookup } from '../common';
|
const { indexLookup } = require('../common');
|
||||||
import BaseFile from './baseFile';
|
const BaseFile = require('./baseFile');
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* find a search term in an index file (using fast index)
|
* find a search term in an index file (using fast index)
|
||||||
|
@ -64,4 +64,4 @@ class IndexFile extends BaseFile {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export default IndexFile;
|
module.exports = IndexFile;
|
||||||
|
|
128
src/common.js
128
src/common.js
|
@ -9,10 +9,10 @@
|
||||||
* Released under MIT license
|
* Released under MIT license
|
||||||
*/
|
*/
|
||||||
|
|
||||||
var { normalize, nextTick, isString } = require('./util');
|
var { normalize, nextTick, isString, uniq, diff, flat } = require('./util');
|
||||||
|
|
||||||
function error(err, callback) {
|
function error(err, callback) {
|
||||||
if (isString(err)) err = new Error(err);
|
if (isString(err)) err = new RangeError(err);
|
||||||
callback && callback(err, {});
|
callback && callback(err, {});
|
||||||
return Promise.reject(err);
|
return Promise.reject(err);
|
||||||
}
|
}
|
||||||
|
@ -105,6 +105,47 @@ function indexLookup(word, callback) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* lookup a word in all indexes
|
||||||
|
*
|
||||||
|
* @param word {string} - search word
|
||||||
|
* @param callback {Function} (optional) - callback with (results, word) signature
|
||||||
|
* @returns {Promise}
|
||||||
|
* @this WordPOS
|
||||||
|
*/
|
||||||
|
function lookupPOS(word, callback) {
|
||||||
|
var self = this,
|
||||||
|
results = [],
|
||||||
|
profile = this.options.profile,
|
||||||
|
start = profile && new Date(),
|
||||||
|
methods = ['lookupAdverb', 'lookupAdjective', 'lookupVerb', 'lookupNoun'];
|
||||||
|
|
||||||
|
return Promise
|
||||||
|
.all(methods.map(exec))
|
||||||
|
.then(done)
|
||||||
|
.catch(error);
|
||||||
|
|
||||||
|
function exec(method) {
|
||||||
|
return self[ method ]
|
||||||
|
.call(self, word)
|
||||||
|
.then(function collect(result){
|
||||||
|
results = results.concat(result);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function done() {
|
||||||
|
var args = [results, word];
|
||||||
|
profile && args.push(new Date() - start);
|
||||||
|
nextTick(callback, args);
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
function error(err) {
|
||||||
|
nextTick(callback, [[], word]);
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* getX() factory function
|
* getX() factory function
|
||||||
*
|
*
|
||||||
|
@ -127,7 +168,7 @@ function get(isFn) {
|
||||||
.then(() => Promise.all(words.map(exec)))
|
.then(() => Promise.all(words.map(exec)))
|
||||||
.then(done)
|
.then(done)
|
||||||
.catch(err => {
|
.catch(err => {
|
||||||
done(); // callback signature is same!
|
// done(); // callback signature is same! // FIXME
|
||||||
return Promise.reject(err);
|
return Promise.reject(err);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -148,6 +189,53 @@ function get(isFn) {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* getPOS() - Find all POS for all words in given string
|
||||||
|
*
|
||||||
|
* @param text {string} - words to lookup for POS
|
||||||
|
* @param callback {function} (optional) - receives object with words broken into POS or 'rest', ie,
|
||||||
|
* Object: {nouns:[], verbs:[], adjectives:[], adverbs:[], rest:[]}
|
||||||
|
* @return Promise - resolve function receives data object
|
||||||
|
*/
|
||||||
|
function getPOS(text, callback) {
|
||||||
|
var self = this,
|
||||||
|
data = {nouns:[], verbs:[], adjectives:[], adverbs:[], rest:[]},
|
||||||
|
profile = this.options.profile,
|
||||||
|
start = profile && new Date(),
|
||||||
|
words = this.parse(text),
|
||||||
|
methods = ['getAdverbs', 'getAdjectives', 'getVerbs', 'getNouns'];
|
||||||
|
|
||||||
|
return Promise
|
||||||
|
.all(methods.map(exec))
|
||||||
|
.then(done)
|
||||||
|
.catch(error);
|
||||||
|
|
||||||
|
function exec(method) {
|
||||||
|
return self[ method ]
|
||||||
|
.call(self, text, null, true)
|
||||||
|
.then(function collect(results) {
|
||||||
|
// getAdjectives --> adjectives
|
||||||
|
var pos = method.replace('get','').toLowerCase();
|
||||||
|
data[ pos ] = results;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function done() {
|
||||||
|
var args = [data];
|
||||||
|
var matches = uniq(flat(Object.values(data)));
|
||||||
|
data.rest = diff(words, matches);
|
||||||
|
|
||||||
|
profile && args.push(new Date() - start);
|
||||||
|
nextTick(callback, args);
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
function error(err) {
|
||||||
|
nextTick(callback, []);
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* isX() factory function
|
* isX() factory function
|
||||||
*
|
*
|
||||||
|
@ -253,6 +341,35 @@ function seek(offset, pos, callback){
|
||||||
return data.lookup(offset, callback);
|
return data.lookup(offset, callback);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* factory function for randX()
|
||||||
|
*
|
||||||
|
* @param pos {string} - a,r,n,v
|
||||||
|
* @returns {Function} - rand function bound to an index file
|
||||||
|
* @this WordPOS
|
||||||
|
*/
|
||||||
|
function makeRandX(pos){
|
||||||
|
return function(opts, callback, _noprofile) {
|
||||||
|
// disable profiling when isX() used internally
|
||||||
|
var profile = this.options.profile && !_noprofile,
|
||||||
|
start = profile && new Date(),
|
||||||
|
args = [],
|
||||||
|
index = this.getFilesFor(pos).index,
|
||||||
|
startsWith = opts && opts.startsWith || '',
|
||||||
|
count = opts && opts.count || 1;
|
||||||
|
|
||||||
|
if (typeof opts === 'function') {
|
||||||
|
callback = opts;
|
||||||
|
}
|
||||||
|
|
||||||
|
return index.rand(startsWith, count, function (record) {
|
||||||
|
args.push(record, startsWith);
|
||||||
|
profile && args.push(new Date() - start);
|
||||||
|
callback && callback.apply(null, args);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
const LEX_NAMES = [
|
const LEX_NAMES = [
|
||||||
'adj.all',
|
'adj.all',
|
||||||
'adj.pert',
|
'adj.pert',
|
||||||
|
@ -306,8 +423,11 @@ module.exports= {
|
||||||
is,
|
is,
|
||||||
get,
|
get,
|
||||||
seek,
|
seek,
|
||||||
|
getPOS,
|
||||||
|
makeRandX,
|
||||||
|
|
||||||
lineDataToJSON,
|
lineDataToJSON,
|
||||||
LEX_NAMES,
|
LEX_NAMES,
|
||||||
lookup
|
lookup,
|
||||||
|
lookupPOS
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
var fs = require('fs'),
|
var fs = require('fs'),
|
||||||
path = require('path'),
|
path = require('path'),
|
||||||
_ = require('underscore'),
|
_ = require('underscore'),
|
||||||
|
{ zeroPad } = require('../util'),
|
||||||
{
|
{
|
||||||
lineDataToJSON,
|
lineDataToJSON,
|
||||||
LEX_NAMES
|
LEX_NAMES
|
||||||
|
@ -24,9 +25,7 @@ var fs = require('fs'),
|
||||||
* @return {boolean} true if line data is good
|
* @return {boolean} true if line data is good
|
||||||
*/
|
*/
|
||||||
function dataCheck(line, location) {
|
function dataCheck(line, location) {
|
||||||
var pad = '00000000', // 8 zeros
|
return line.indexOf(zeroPad(location)) === 0;
|
||||||
padded = String(pad + location).slice( - pad.length);
|
|
||||||
return line.indexOf(padded) === 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -52,13 +51,13 @@ function readLocation(location, callback) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
//console.log(' read %d bytes at <%d>', count, location);
|
//console.log(' read %d bytes at <%d>', count, location);
|
||||||
if (!dataCheck(str, location)) return callback(new Error('Bad data at location ' + location));
|
if (!dataCheck(str, location)) return callback(new RangeError('No data at offset ' + location));
|
||||||
|
|
||||||
callback(null, lineDataToJSON(str, location));
|
callback(null, lineDataToJSON(str, location));
|
||||||
});
|
});
|
||||||
|
|
||||||
function readChunk(pos, cb) {
|
function readChunk(pos, cb) {
|
||||||
var nonDataErr = new Error('no data at offset ' + pos);
|
var nonDataErr = new RangeError('No data at offset ' + pos);
|
||||||
|
|
||||||
fs.read(file.fd, buffer, 0, len, pos, function (err, count) {
|
fs.read(file.fd, buffer, 0, len, pos, function (err, count) {
|
||||||
if (!count) return cb(nonDataErr, count);
|
if (!count) return cb(nonDataErr, count);
|
||||||
|
|
|
@ -27,8 +27,10 @@ var
|
||||||
{
|
{
|
||||||
is,
|
is,
|
||||||
get,
|
get,
|
||||||
|
getPOS,
|
||||||
seek,
|
seek,
|
||||||
lookup
|
lookup,
|
||||||
|
lookupPOS
|
||||||
} = require('../common');
|
} = require('../common');
|
||||||
|
|
||||||
stopwordsStr = makeStopwordString(stopwords);
|
stopwordsStr = makeStopwordString(stopwords);
|
||||||
|
@ -58,7 +60,7 @@ var WordPOS = function(options) {
|
||||||
this.advData = new DataFile(dictPath, 'adv');
|
this.advData = new DataFile(dictPath, 'adv');
|
||||||
|
|
||||||
// define randX() functions
|
// define randX() functions
|
||||||
require('../rand').init(this); // FIXME
|
require('../rand').init(this);
|
||||||
|
|
||||||
if (_.isArray(this.options.stopwords)) {
|
if (_.isArray(this.options.stopwords)) {
|
||||||
this.options.stopwords = makeStopwordString(this.options.stopwords);
|
this.options.stopwords = makeStopwordString(this.options.stopwords);
|
||||||
|
@ -94,39 +96,7 @@ var wordposProto = WordPOS.prototype;
|
||||||
* @param callback {Function} (optional) - callback with (results, word) signature
|
* @param callback {Function} (optional) - callback with (results, word) signature
|
||||||
* @returns {Promise}
|
* @returns {Promise}
|
||||||
*/
|
*/
|
||||||
wordposProto.lookup = function(word, callback) {
|
wordposProto.lookup = lookupPOS;
|
||||||
var self = this,
|
|
||||||
results = [],
|
|
||||||
profile = this.options.profile,
|
|
||||||
start = profile && new Date(),
|
|
||||||
methods = ['lookupAdverb', 'lookupAdjective', 'lookupVerb', 'lookupNoun'];
|
|
||||||
|
|
||||||
return Promise
|
|
||||||
.all(methods.map(exec))
|
|
||||||
.then(done)
|
|
||||||
.catch(error);
|
|
||||||
|
|
||||||
function exec(method) {
|
|
||||||
return self[ method ]
|
|
||||||
.call(self, word)
|
|
||||||
.then(function collect(result){
|
|
||||||
results = results.concat(result);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
function done() {
|
|
||||||
var args = [results, word];
|
|
||||||
profile && args.push(new Date() - start);
|
|
||||||
nextTick(callback, args);
|
|
||||||
return results;
|
|
||||||
}
|
|
||||||
|
|
||||||
function error(err) {
|
|
||||||
nextTick(callback, [[], word]);
|
|
||||||
throw err;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* getPOS() - Find all POS for all words in given string
|
* getPOS() - Find all POS for all words in given string
|
||||||
|
@ -136,49 +106,7 @@ wordposProto.lookup = function(word, callback) {
|
||||||
* Object: {nouns:[], verbs:[], adjectives:[], adverbs:[], rest:[]}
|
* Object: {nouns:[], verbs:[], adjectives:[], adverbs:[], rest:[]}
|
||||||
* @return Promise - resolve function receives data object
|
* @return Promise - resolve function receives data object
|
||||||
*/
|
*/
|
||||||
wordposProto.getPOS = function(text, callback) {
|
wordposProto.getPOS = getPOS;
|
||||||
var self = this,
|
|
||||||
data = {nouns:[], verbs:[], adjectives:[], adverbs:[], rest:[]},
|
|
||||||
profile = this.options.profile,
|
|
||||||
start = profile && new Date(),
|
|
||||||
words = this.parse(text),
|
|
||||||
methods = ['getAdverbs', 'getAdjectives', 'getVerbs', 'getNouns'];
|
|
||||||
|
|
||||||
return Promise
|
|
||||||
.all(methods.map(exec))
|
|
||||||
.then(done)
|
|
||||||
.catch(error);
|
|
||||||
|
|
||||||
function exec(method) {
|
|
||||||
return self[ method ]
|
|
||||||
.call(self, text, null, true)
|
|
||||||
.then(function collect(results) {
|
|
||||||
// getAdjectives --> adjectives
|
|
||||||
var pos = method.replace('get','').toLowerCase();
|
|
||||||
data[ pos ] = results;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
function done() {
|
|
||||||
var matches = _(data).chain()
|
|
||||||
.values()
|
|
||||||
.flatten()
|
|
||||||
.uniq()
|
|
||||||
.value(),
|
|
||||||
args = [data];
|
|
||||||
|
|
||||||
data.rest = _(words).difference(matches);
|
|
||||||
|
|
||||||
profile && args.push(new Date() - start);
|
|
||||||
nextTick(callback, args);
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
|
|
||||||
function error(err) {
|
|
||||||
nextTick(callback, []);
|
|
||||||
throw err;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get index and data files for given pos
|
* get index and data files for given pos
|
||||||
|
|
39
src/rand.js
39
src/rand.js
|
@ -12,45 +12,11 @@
|
||||||
var _ = require('underscore')._,
|
var _ = require('underscore')._,
|
||||||
util = require('util'),
|
util = require('util'),
|
||||||
Trie = require('../lib/natural/trie/trie'),
|
Trie = require('../lib/natural/trie/trie'),
|
||||||
|
indexPath = process.browser ? 'browser' : 'node',
|
||||||
|
IndexFile = require(`./${indexPath}/indexFile`),
|
||||||
// FIXME
|
|
||||||
IndexFile = require('./node/indexFile'),
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
KEY_LENGTH = 3;
|
KEY_LENGTH = 3;
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* factory function for randX()
|
|
||||||
*
|
|
||||||
* @param pos {string} - a,r,n,v
|
|
||||||
* @returns {Function} - rand function bound to an index file
|
|
||||||
*/
|
|
||||||
function makeRandX(pos){
|
|
||||||
return function(opts, callback, _noprofile) {
|
|
||||||
// disable profiling when isX() used internally
|
|
||||||
var profile = this.options.profile && !_noprofile,
|
|
||||||
start = profile && new Date(),
|
|
||||||
args = [],
|
|
||||||
index = this.getFilesFor(pos).index,
|
|
||||||
startsWith = opts && opts.startsWith || '',
|
|
||||||
count = opts && opts.count || 1;
|
|
||||||
|
|
||||||
if (typeof opts === 'function') {
|
|
||||||
callback = opts;
|
|
||||||
}
|
|
||||||
|
|
||||||
return index.rand(startsWith, count, function (record) {
|
|
||||||
args.push(record, startsWith);
|
|
||||||
profile && args.push(new Date() - start);
|
|
||||||
callback && callback.apply(null, args);
|
|
||||||
});
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* rand function (bound to index)
|
* rand function (bound to index)
|
||||||
*
|
*
|
||||||
|
@ -58,6 +24,7 @@ function makeRandX(pos){
|
||||||
* @param num {number} - number of words to return
|
* @param num {number} - number of words to return
|
||||||
* @param callback {function} - callback function, receives words array and startsWith
|
* @param callback {function} - callback function, receives words array and startsWith
|
||||||
* @returns Promise
|
* @returns Promise
|
||||||
|
* @this IndexFile
|
||||||
*/
|
*/
|
||||||
function rand(startsWith, num, callback){
|
function rand(startsWith, num, callback){
|
||||||
var self = this,
|
var self = this,
|
||||||
|
|
21
src/util.js
21
src/util.js
|
@ -22,6 +22,12 @@ function nextTick(fn, args) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// offsets must be zero-padded to 8 chars
|
||||||
|
function zeroPad(str) {
|
||||||
|
var pad = '00000000'; // 8 zeros
|
||||||
|
return String(pad + str).slice(-pad.length);
|
||||||
|
}
|
||||||
|
|
||||||
function normalize(word) {
|
function normalize(word) {
|
||||||
return word.toLowerCase().replace(/\s+/g, '_');
|
return word.toLowerCase().replace(/\s+/g, '_');
|
||||||
}
|
}
|
||||||
|
@ -38,6 +44,15 @@ function uniq(arr) {
|
||||||
return arr.filter((v, i) => arr.indexOf(v) === i);
|
return arr.filter((v, i) => arr.indexOf(v) === i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function diff(arr, subArr) {
|
||||||
|
return arr.filter(x => !subArr.includes(x));
|
||||||
|
}
|
||||||
|
|
||||||
|
// flatten an array - 1-deep only!
|
||||||
|
function flat(arr) {
|
||||||
|
return [].concat.apply([], arr);
|
||||||
|
}
|
||||||
|
|
||||||
function isString(s) {
|
function isString(s) {
|
||||||
return typeof s === 'string';
|
return typeof s === 'string';
|
||||||
}
|
}
|
||||||
|
@ -57,10 +72,14 @@ function prepText(text) {
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
isString,
|
isString,
|
||||||
|
zeroPad,
|
||||||
stopwords,
|
stopwords,
|
||||||
nextTick,
|
nextTick,
|
||||||
normalize,
|
normalize,
|
||||||
tokenizer,
|
tokenizer,
|
||||||
prepText,
|
prepText,
|
||||||
makeStopwordString
|
makeStopwordString,
|
||||||
|
uniq,
|
||||||
|
diff,
|
||||||
|
flat
|
||||||
};
|
};
|
||||||
|
|
|
@ -9,9 +9,7 @@
|
||||||
* Released under MIT license
|
* Released under MIT license
|
||||||
*/
|
*/
|
||||||
|
|
||||||
console.log(333, process.browser);
|
if (process.browser) {
|
||||||
|
|
||||||
if (11 || process.browser) {
|
|
||||||
module.exports = require('./browser');
|
module.exports = require('./browser');
|
||||||
} else {
|
} else {
|
||||||
module.exports = require('./node');
|
module.exports = require('./node');
|
||||||
|
|
|
@ -23,9 +23,27 @@ var
|
||||||
chai = require('chai'),
|
chai = require('chai'),
|
||||||
_ = require('underscore'),
|
_ = require('underscore'),
|
||||||
assert = chai.assert,
|
assert = chai.assert,
|
||||||
|
browser = process.browser = process.argv.includes('@babel/register'),
|
||||||
WordPOS = require('../src/wordpos'),
|
WordPOS = require('../src/wordpos'),
|
||||||
wordpos = new WordPOS({profile: false});
|
path = require('path'),
|
||||||
|
dictPath = browser ? path.resolve('./test/dict') : undefined,
|
||||||
|
wordpos = new WordPOS({
|
||||||
|
profile: false,
|
||||||
|
dictPath: dictPath
|
||||||
|
});
|
||||||
|
|
||||||
|
const assertNoData = (err) => {
|
||||||
|
assert(err instanceof RangeError);
|
||||||
|
assert(/No data at offset/.test(err.message));
|
||||||
|
};
|
||||||
|
|
||||||
|
const assertOffsetErr = (err) => {
|
||||||
|
assert(err instanceof RangeError);
|
||||||
|
assert.equal(err.message, 'Offset must be valid positive number: foobar');
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
console.log('Running', browser ? 'browser' : 'node', 'tests');
|
||||||
chai.config.showDiff = true;
|
chai.config.showDiff = true;
|
||||||
|
|
||||||
var str = "The angry bear chased the frightened little squirrel",
|
var str = "The angry bear chased the frightened little squirrel",
|
||||||
|
@ -42,23 +60,22 @@ var str = "The angry bear chased the frightened little squirrel",
|
||||||
|
|
||||||
|
|
||||||
describe('lookup', function() {
|
describe('lookup', function() {
|
||||||
it('with callback', function (done) {
|
|
||||||
wordpos.lookup('hegemony', function (result) {
|
it('with callback', function () {
|
||||||
|
return wordpos.lookup('hegemony', function (result) {
|
||||||
assert.equal(result.length, 1);
|
assert.equal(result.length, 1);
|
||||||
assert.equal(result[0].pos, 'n');
|
assert.equal(result[0].pos, 'n');
|
||||||
assert.equal(result[0].lemma, 'hegemony');
|
assert.equal(result[0].lemma, 'hegemony');
|
||||||
assert.equal(result[0].synonyms.length, 1);
|
assert.equal(result[0].synonyms.length, 1);
|
||||||
done();
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('with Promise', function (done) {
|
it('with Promise', function () {
|
||||||
wordpos.lookup('hegemony').then(function (result) {
|
return wordpos.lookup('hegemony').then(function (result) {
|
||||||
assert.equal(result.length, 1);
|
assert.equal(result.length, 1);
|
||||||
assert.equal(result[0].pos, 'n');
|
assert.equal(result[0].pos, 'n');
|
||||||
assert.equal(result[0].lemma, 'hegemony');
|
assert.equal(result[0].lemma, 'hegemony');
|
||||||
assert.equal(result[0].synonyms.length, 1);
|
assert.equal(result[0].synonyms.length, 1);
|
||||||
done();
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
@ -83,42 +100,38 @@ describe('options passed to constructor', function() {
|
||||||
|
|
||||||
|
|
||||||
describe('getX()...', function() {
|
describe('getX()...', function() {
|
||||||
it('should get all POS', function(done) {
|
|
||||||
wordpos.getPOS(str, function(result) {
|
it('should get all POS', function() {
|
||||||
|
return wordpos.getPOS(str, function(result) {
|
||||||
assert.sameMembers(result.nouns, expected.nouns);
|
assert.sameMembers(result.nouns, expected.nouns);
|
||||||
assert.sameMembers(result.verbs, expected.verbs);
|
assert.sameMembers(result.verbs, expected.verbs);
|
||||||
assert.sameMembers(result.adjectives, expected.adjectives);
|
assert.sameMembers(result.adjectives, expected.adjectives);
|
||||||
assert.sameMembers(result.adverbs, expected.adverbs);
|
assert.sameMembers(result.adverbs, expected.adverbs);
|
||||||
assert.sameMembers(result.rest, expected.rest);
|
assert.sameMembers(result.rest, expected.rest);
|
||||||
done();
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it.only('should get nouns', function(done) {
|
it('should get nouns', function() {
|
||||||
wordpos.getNouns('foot bar', function(result) {
|
return wordpos.getNouns(str, function(result) {
|
||||||
assert.sameMembers(result, expected.nouns);
|
assert.sameMembers(result, expected.nouns);
|
||||||
done();
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should get verbs', function(done) {
|
it('should get verbs', function() {
|
||||||
wordpos.getVerbs(str, function(result) {
|
return wordpos.getVerbs(str, function(result) {
|
||||||
assert.sameMembers(result, expected.verbs);
|
assert.sameMembers(result, expected.verbs);
|
||||||
done();
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should get adjectives', function(done) {
|
it('should get adjectives', function() {
|
||||||
wordpos.getAdjectives(str, function(result) {
|
return wordpos.getAdjectives(str, function(result) {
|
||||||
assert.sameMembers(result, expected.adjectives);
|
assert.sameMembers(result, expected.adjectives);
|
||||||
done();
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should get adverbs', function(done) {
|
it('should get adverbs', function() {
|
||||||
wordpos.getAdverbs(str, function(result) {
|
return wordpos.getAdverbs(str, function(result) {
|
||||||
assert.sameMembers(result, expected.adverbs);
|
assert.sameMembers(result, expected.adverbs);
|
||||||
done();
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
@ -223,7 +236,7 @@ describe('lookupX()...', function() {
|
||||||
|
|
||||||
|
|
||||||
describe('profile option', function() {
|
describe('profile option', function() {
|
||||||
var wp = new WordPOS({profile : true});
|
var wp = new WordPOS({profile : true, dictPath: dictPath});
|
||||||
|
|
||||||
it('should return time argument for isX()', function(done){
|
it('should return time argument for isX()', function(done){
|
||||||
wp.isNoun(garble, function(result, word, time) {
|
wp.isNoun(garble, function(result, word, time) {
|
||||||
|
@ -248,7 +261,7 @@ describe('profile option', function() {
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should disable stopword filtering', function(done){
|
it('should disable stopword filtering', function(done){
|
||||||
var wp = new WordPOS({stopwords : false}),
|
var wp = new WordPOS({stopwords : false, dictPath: dictPath}),
|
||||||
strWithStopwords = 'about after all'; // 3 adjective stopwords
|
strWithStopwords = 'about after all'; // 3 adjective stopwords
|
||||||
wp.getAdjectives(strWithStopwords, function(result){
|
wp.getAdjectives(strWithStopwords, function(result){
|
||||||
assert.equal(result.length, 3);
|
assert.equal(result.length, 3);
|
||||||
|
@ -257,7 +270,7 @@ describe('profile option', function() {
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should use custom stopwords', function(done){
|
it('should use custom stopwords', function(done){
|
||||||
var wp = new WordPOS({stopwords : ['all']}),
|
var wp = new WordPOS({stopwords : ['all'], dictPath: dictPath}),
|
||||||
strWithStopwords = 'about after all'; // 3 adjective stopwords
|
strWithStopwords = 'about after all'; // 3 adjective stopwords
|
||||||
// 'all' should be filtered
|
// 'all' should be filtered
|
||||||
wp.getAdjectives(strWithStopwords, function(result){
|
wp.getAdjectives(strWithStopwords, function(result){
|
||||||
|
@ -269,7 +282,7 @@ describe('profile option', function() {
|
||||||
|
|
||||||
|
|
||||||
describe('nested callbacks on same index key', function() {
|
describe('nested callbacks on same index key', function() {
|
||||||
var wp = new WordPOS(),
|
var wp = new WordPOS({dictPath: dictPath}),
|
||||||
word1 = 'head',
|
word1 = 'head',
|
||||||
word2 = word1 + 'er';
|
word2 = word1 + 'er';
|
||||||
|
|
||||||
|
@ -360,56 +373,38 @@ describe('randX()...', function() {
|
||||||
|
|
||||||
describe('seek()...', function() {
|
describe('seek()...', function() {
|
||||||
|
|
||||||
it('should seek offset', function(done) {
|
it('should seek offset', function() {
|
||||||
wordpos.seek(offset, 'a', function(err, result) {
|
return wordpos.seek(offset, 'a', function(err, result) {
|
||||||
assert.equal(result.synsetOffset, offset);
|
assert.equal(result.synsetOffset, offset);
|
||||||
assert.equal(result.pos, 's');
|
assert.equal(result.pos, 's');
|
||||||
assert.equal(result.lemma, 'amazing');
|
assert.equal(result.lemma, 'amazing');
|
||||||
done();
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should handle bad offset', function(done) {
|
it('should handle bad offset', function() {
|
||||||
wordpos.seek('foobar', 'a', function(err, result){
|
return wordpos.seek('foobar', 'a', assertOffsetErr).catch(assertOffsetErr);
|
||||||
assert(err instanceof Error);
|
|
||||||
assert.equal(err.message, 'Offset must be valid positive number: foobar');
|
|
||||||
done();
|
|
||||||
}).catch(_.noop); // UnhandledPromiseRejectionWarning
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should handle wrong offset', function(done) {
|
it('should handle wrong offset', function() {
|
||||||
var bad_offset = offset + 1;
|
const bad_offset = offset + 1;
|
||||||
wordpos.seek(bad_offset, 'a', function(err, result) {
|
return wordpos.seek(bad_offset, 'a', assertNoData).catch(assertNoData);
|
||||||
assert(err instanceof Error);
|
|
||||||
assert.equal(err.message, 'Bad data at location ' + bad_offset);
|
|
||||||
assert.deepEqual(result, {});
|
|
||||||
done();
|
|
||||||
}).catch(_.noop); // UnhandledPromiseRejectionWarning;
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should handle very large offset', function(done) {
|
it('should handle very large offset', function() {
|
||||||
var bad_offset = offset + 100000000;
|
const bad_offset = offset + 999999999;
|
||||||
wordpos.seek(bad_offset, 'a', function(err, result) {
|
return wordpos.seek(bad_offset, 'a', assertNoData).catch(assertNoData);
|
||||||
assert(err instanceof Error);
|
|
||||||
assert.equal(err.message, 'no data at offset ' + bad_offset);
|
|
||||||
assert.deepEqual(result, {});
|
|
||||||
done();
|
|
||||||
}).catch(_.noop); // UnhandledPromiseRejectionWarning;
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should handle bad POS', function(done) {
|
it('should handle bad POS', function() {
|
||||||
wordpos.seek(offset, 'g', function(err, result) {
|
const assertErr = err => {
|
||||||
assert(err instanceof Error);
|
assert(err instanceof Error);
|
||||||
assert(/Incorrect POS/.test(err.message));
|
assert(/Incorrect POS/.test(err.message));
|
||||||
done();
|
};
|
||||||
}).catch(_.noop); // UnhandledPromiseRejectionWarning;
|
return wordpos.seek(offset, 'g', assertErr).catch(assertErr);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should handle wrong POS', function(done) {
|
it('should handle wrong POS', function() {
|
||||||
wordpos.seek(offset, 'v', function(err, result){
|
return wordpos.seek(offset, 'v', assertNoData).catch(assertNoData);
|
||||||
assert.equal(err.message, 'Bad data at location ' + offset);
|
|
||||||
}).catch(_.noop); // UnhandledPromiseRejectionWarning;
|
|
||||||
done();
|
|
||||||
});
|
});
|
||||||
|
|
||||||
});
|
});
|
||||||
|
@ -489,17 +484,11 @@ describe('Promise pattern', function() {
|
||||||
});
|
});
|
||||||
|
|
||||||
it('seek() - wrong offset', function () {
|
it('seek() - wrong offset', function () {
|
||||||
return wordpos.seek(offset + 1, 'a').catch(function (err) {
|
return wordpos.seek(offset + 1, 'a').catch(assertNoData);
|
||||||
assert(err instanceof Error);
|
|
||||||
assert.equal(err.message, 'Bad data at location ' + (offset+1));
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it('seek() - bad offset', function () {
|
it('seek() - bad offset', function () {
|
||||||
return wordpos.seek('foobar', 'a').catch(function (err) {
|
return wordpos.seek('foobar', 'a').catch(assertOffsetErr);
|
||||||
assert(err instanceof Error);
|
|
||||||
assert.equal(err.message, 'Offset must be valid positive number: foobar');
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
Loading…
Reference in New Issue