Added seek() method and lexName property. Bump to 1.1.
This commit is contained in:
parent
57c3340130
commit
0abe5a9010
29
README.md
29
README.md
|
@ -29,7 +29,7 @@ wordpos.isAdjective('awesome', function(result){
|
||||||
// true 'awesome'
|
// true 'awesome'
|
||||||
```
|
```
|
||||||
|
|
||||||
Command-line: (see [CLI](bin))
|
Command-line: (see [CLI](bin) for full command list)
|
||||||
```bash
|
```bash
|
||||||
$ wordpos def git
|
$ wordpos def git
|
||||||
git
|
git
|
||||||
|
@ -71,7 +71,7 @@ WordPOS.defaults = {
|
||||||
stopwords: true
|
stopwords: true
|
||||||
};
|
};
|
||||||
```
|
```
|
||||||
To override, pass an options hash to the constructor. With the `profile` option, all callbacks receive a last argument that is the execution time in msec of the call.
|
To override, pass an options hash to the constructor. With the `profile` option, most callbacks receive a last argument that is the execution time in msec of the call.
|
||||||
|
|
||||||
```js
|
```js
|
||||||
wordpos = new WordPOS({profile: true});
|
wordpos = new WordPOS({profile: true});
|
||||||
|
@ -165,19 +165,33 @@ Example:
|
||||||
```js
|
```js
|
||||||
wordpos.lookupAdjective('awesome', console.log);
|
wordpos.lookupAdjective('awesome', console.log);
|
||||||
// output:
|
// output:
|
||||||
[ { synsetOffset: 1282510,
|
[ { synsetOffset: 1285602,
|
||||||
lexFilenum: 0,
|
lexFilenum: 0,
|
||||||
|
lexName: 'adj.all',
|
||||||
pos: 's',
|
pos: 's',
|
||||||
wCnt: 5,
|
wCnt: 5,
|
||||||
lemma: 'amazing',
|
lemma: 'amazing',
|
||||||
synonyms: [ 'amazing', 'awe-inspiring', 'awesome', 'awful', 'awing' ],
|
synonyms: [ 'amazing', 'awe-inspiring', 'awesome', 'awful', 'awing' ],
|
||||||
lexId: '0',
|
lexId: '0',
|
||||||
ptrs: [],
|
ptrs: [],
|
||||||
gloss: 'inspiring awe or admiration or wonder; <snip> awing majesty, so vast, so high, so silent" '
|
gloss: 'inspiring awe or admiration or wonder; [...] awing majesty, so vast, so high, so silent" '
|
||||||
|
def: 'inspiring awe or admiration or wonder',
|
||||||
|
...
|
||||||
} ], 'awesome'
|
} ], 'awesome'
|
||||||
```
|
```
|
||||||
In this case only one lookup was found, but there could be several.
|
In this case only one lookup was found, but there could be several.
|
||||||
|
|
||||||
|
Version 1.1 adds the `lexName` parameter, which maps the lexFilenum to one of [45 lexicographer domains](https://wordnet.princeton.edu/wordnet/man/lexnames.5WN.html).
|
||||||
|
|
||||||
|
|
||||||
|
#### seek(offset, pos, callback)
|
||||||
|
Version 1.1 introduces the seek method to lookup a record directly from the synsetOffset for a given POS. Unlike other methods, callback (if provided) receives `(err, result)` arguments.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
```js
|
||||||
|
wordpos.seek(1285602, 'a').then(console.log)
|
||||||
|
// same result as wordpos.lookupAdjective('awesome', console.log);
|
||||||
|
```
|
||||||
|
|
||||||
#### rand(options, callback)
|
#### rand(options, callback)
|
||||||
#### randNoun(options, callback)
|
#### randNoun(options, callback)
|
||||||
|
@ -214,6 +228,7 @@ wordpos.rand({starsWith: 'zzz'}, console.log)
|
||||||
|
|
||||||
Getting random POS (`randNoun()`, etc.) is generally faster than `rand()`, which may look at multiple POS files until `count` requirement is met.
|
Getting random POS (`randNoun()`, etc.) is generally faster than `rand()`, which may look at multiple POS files until `count` requirement is met.
|
||||||
|
|
||||||
|
|
||||||
#### parse(text)
|
#### parse(text)
|
||||||
Returns tokenized array of words in `text`, less duplicates and stopwords. This method is called on all getX() calls internally.
|
Returns tokenized array of words in `text`, less duplicates and stopwords. This method is called on all getX() calls internally.
|
||||||
|
|
||||||
|
@ -274,6 +289,10 @@ See [bench/README](bench).
|
||||||
|
|
||||||
## Changes
|
## Changes
|
||||||
|
|
||||||
|
1.1 -
|
||||||
|
- added seek() method
|
||||||
|
- added lexName property
|
||||||
|
|
||||||
1.0.1
|
1.0.1
|
||||||
- Removed npm dependency on Natural. Certain modules are included in /lib.
|
- Removed npm dependency on Natural. Certain modules are included in /lib.
|
||||||
- Add support for ES6 Promises.
|
- Add support for ES6 Promises.
|
||||||
|
|
|
@ -19,6 +19,8 @@ $ wordpos
|
||||||
syn lookup synonyms
|
syn lookup synonyms
|
||||||
|
|
||||||
exp lookup examples
|
exp lookup examples
|
||||||
|
|
||||||
|
seek get record at synset offset. Must include one of POS -n, -a, -v, -r
|
||||||
|
|
||||||
rand get random words (starting with [word]). If first arg is a number, returns
|
rand get random words (starting with [word]). If first arg is a number, returns
|
||||||
that many random words. Valid options are -b, -f, -j, -s, -i.
|
that many random words. Valid options are -b, -f, -j, -s, -i.
|
||||||
|
@ -222,6 +224,49 @@ $ wordpos rand --adj foot
|
||||||
foot-shaped
|
foot-shaped
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Seek a synset offset
|
||||||
|
Seek offset as adjective:
|
||||||
|
```sh
|
||||||
|
$ wordpos seek 1285602 -a
|
||||||
|
{ '1285602':
|
||||||
|
{ synsetOffset: 1285602,
|
||||||
|
lexFilenum: 0,
|
||||||
|
lexName: 'adj.all',
|
||||||
|
pos: 's',
|
||||||
|
wCnt: 5,
|
||||||
|
lemma: 'amazing',
|
||||||
|
synonyms: [ 'amazing', 'awe-inspiring', 'awesome', 'awful', 'awing' ],
|
||||||
|
lexId: '0',
|
||||||
|
ptrs:
|
||||||
|
[ { pointerSymbol: '&',
|
||||||
|
synsetOffset: 1285124,
|
||||||
|
pos: 'a',
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
Same as verb (not found!):
|
||||||
|
```sh
|
||||||
|
$ wordpos seek 1285602 -v
|
||||||
|
{ '1285602': {} }
|
||||||
|
```
|
||||||
|
|
||||||
|
Multiple offsets from same POS:
|
||||||
|
```sh
|
||||||
|
$ wordpos seek 1285602 1285124 -a
|
||||||
|
{ '1285124':
|
||||||
|
{ synsetOffset: 1285124,
|
||||||
|
lexFilenum: 0,
|
||||||
|
...
|
||||||
|
},
|
||||||
|
'1285602':
|
||||||
|
{ synsetOffset: 1285602,
|
||||||
|
lexFilenum: 0,
|
||||||
|
...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
Note that results are always returned as `--full` format. To get compact JSON format, add the `-j` option.
|
||||||
|
|
||||||
|
|
||||||
#### Stopwords
|
#### Stopwords
|
||||||
List stopwords (brief):
|
List stopwords (brief):
|
||||||
```bash
|
```bash
|
||||||
|
|
|
@ -5,9 +5,9 @@
|
||||||
* command-line interface to wordpos
|
* command-line interface to wordpos
|
||||||
*
|
*
|
||||||
* Usage:
|
* Usage:
|
||||||
* wordpos [options] <get|parse|def|rand|syn|exp> <stdin|words*>
|
* wordpos [options] <get|parse|def|rand|syn|exp|seek> <stdin|words*>
|
||||||
*
|
*
|
||||||
* Copyright (c) 2012 mooster@42at.com
|
* Copyright (c) 2012, 2016 mooster@42at.com
|
||||||
* https://github.com/moos/wordpos
|
* https://github.com/moos/wordpos
|
||||||
*
|
*
|
||||||
* Released under MIT license
|
* Released under MIT license
|
||||||
|
@ -17,6 +17,7 @@ var program = require('commander'),
|
||||||
_ = require('underscore')._,
|
_ = require('underscore')._,
|
||||||
fs = require('fs'),
|
fs = require('fs'),
|
||||||
POS = {noun:'Noun', adj:'Adjective', verb:'Verb', adv:'Adverb'},
|
POS = {noun:'Noun', adj:'Adjective', verb:'Verb', adv:'Adverb'},
|
||||||
|
POS_abbr = {noun:'n', adj:'a', verb:'v', adv:'r'},
|
||||||
version = JSON.parse(fs.readFileSync(__dirname + '/../package.json', 'utf8')).version,
|
version = JSON.parse(fs.readFileSync(__dirname + '/../package.json', 'utf8')).version,
|
||||||
rawCmd = '',
|
rawCmd = '',
|
||||||
RAND_PLACEHOLDER = '__',
|
RAND_PLACEHOLDER = '__',
|
||||||
|
@ -67,6 +68,19 @@ program.command('exp')
|
||||||
exec.apply(this, arguments);
|
exec.apply(this, arguments);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
program.command('seek')
|
||||||
|
.description('get record at synset offset. Must include one of POS -n, -a, -v, -r')
|
||||||
|
.action(function(){
|
||||||
|
var one = _.chain(program).pick('noun adj adv verb'.split(' ')).countBy().value().true;
|
||||||
|
if (!one || one > 1) {
|
||||||
|
console.error('Must include one and only one of -n, -a, -v, -r');
|
||||||
|
process.exit(-1);
|
||||||
|
}
|
||||||
|
// force full output mode
|
||||||
|
program.full = 1;
|
||||||
|
exec.apply(this, arguments);
|
||||||
|
});
|
||||||
|
|
||||||
program.command('rand')
|
program.command('rand')
|
||||||
.description('get random words (starting with [word]). If first arg is a number, returns ' +
|
.description('get random words (starting with [word]). If first arg is a number, returns ' +
|
||||||
'that many random words. Valid options are -b, -f, -j, -s, -i.')
|
'that many random words. Valid options are -b, -f, -j, -s, -i.')
|
||||||
|
@ -80,12 +94,10 @@ program.command('rand')
|
||||||
args.shift();
|
args.shift();
|
||||||
program.num = num;
|
program.num = num;
|
||||||
}
|
}
|
||||||
|
|
||||||
// no startsWith given, add a placeholder
|
// no startsWith given, add a placeholder
|
||||||
if (args.length === 1){
|
if (args.length === 1){
|
||||||
args.unshift(RAND_PLACEHOLDER);
|
args.unshift(RAND_PLACEHOLDER);
|
||||||
}
|
}
|
||||||
|
|
||||||
exec.apply(this, args);
|
exec.apply(this, args);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -150,23 +162,24 @@ function read_stdin(callback) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function optToFn() {
|
function optToFn() {
|
||||||
var fns = _.reject(POS, function(fn, opt) { return !program[opt] });
|
var
|
||||||
|
map = cmd === 'seek' ? POS_abbr : POS,
|
||||||
|
fns = _.reject(map, function(fn, opt) { return !program[opt] });
|
||||||
if (!fns.length && cmd === 'rand') return fns = ['']; // run rand()
|
if (!fns.length && cmd === 'rand') return fns = ['']; // run rand()
|
||||||
if (!fns.length) fns = _.values(POS); //default to all if no POS given
|
if (!fns.length) fns = _.values(map); //default to all if no POS given
|
||||||
return fns;
|
return fns;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function run(data) {
|
function run(data) {
|
||||||
var
|
var
|
||||||
opts = {stopwords: !program.withStopwords},
|
opts = {stopwords: !program.withStopwords},
|
||||||
wordpos = new WordPos(opts),
|
wordpos = new WordPos(opts),
|
||||||
words = wordpos.parse(data),
|
seek = cmd === 'seek',
|
||||||
|
words = seek ? data.split(' ') : wordpos.parse(data),
|
||||||
fns = optToFn(),
|
fns = optToFn(),
|
||||||
plural = (cmd=='get' ? 's':''),
|
plural = (cmd === 'get' ? 's':''),
|
||||||
results = {},
|
results = {},
|
||||||
finale = _.after(
|
finale = _.after(plural ? fns.length : words.length * fns.length,
|
||||||
plural ? fns.length : words.length * fns.length,
|
|
||||||
_.bind(output, null, results)),
|
_.bind(output, null, results)),
|
||||||
collect = function(what, result, word){
|
collect = function(what, result, word){
|
||||||
if (word) { // lookup
|
if (word) { // lookup
|
||||||
|
@ -184,13 +197,20 @@ function run(data) {
|
||||||
_(fns).each(function(fn){
|
_(fns).each(function(fn){
|
||||||
var method = cmd + fn + plural,
|
var method = cmd + fn + plural,
|
||||||
cb = _.bind(collect, null, fn);
|
cb = _.bind(collect, null, fn);
|
||||||
if (cmd == 'get') {
|
if (cmd === 'get') {
|
||||||
wordpos[method](words, cb);
|
wordpos[method](words, cb);
|
||||||
} else if (cmd == 'rand') {
|
} else if (cmd === 'rand') {
|
||||||
if (words[0] === RAND_PLACEHOLDER) words[0] = '';
|
if (words[0] === RAND_PLACEHOLDER) words[0] = '';
|
||||||
words.forEach(function(word){
|
words.forEach(function(word){
|
||||||
wordpos[method]({startsWith: word, count: program.num || 1}, cb);
|
wordpos[method]({startsWith: word, count: program.num || 1}, cb);
|
||||||
});
|
});
|
||||||
|
} else if (seek) {
|
||||||
|
words.forEach(function(offset){
|
||||||
|
wordpos.seek(offset, fn, function(err, result){
|
||||||
|
results[offset.trim()] = result;
|
||||||
|
finale();
|
||||||
|
});
|
||||||
|
});
|
||||||
} else {
|
} else {
|
||||||
words.forEach(function(word){
|
words.forEach(function(word){
|
||||||
wordpos[method](word, cb);
|
wordpos[method](word, cb);
|
||||||
|
@ -227,8 +247,9 @@ function sprint(results) {
|
||||||
}, '');
|
}, '');
|
||||||
default:
|
default:
|
||||||
return _.reduce(results, function(memo, v, k){
|
return _.reduce(results, function(memo, v, k){
|
||||||
var pre = program.brief ? '' : util.format('# %s %d:%s', k, v.length, sep);
|
var pre = program.brief ? '' : util.format('# %s %d:%s', k, v.length, sep),
|
||||||
return memo + (v.length && util.format('%s%s%s\n', pre, v.join(sep), sep) || '');
|
res = v.length ? v.join(sep) : '';
|
||||||
|
return memo + (v.length && util.format('%s%s%s\n', pre, res, sep) || '');
|
||||||
}, '');
|
}, '');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
"verbs"
|
"verbs"
|
||||||
],
|
],
|
||||||
"description": "wordpos is a set of part-of-speech utilities for Node.js using the WordNet database.",
|
"description": "wordpos is a set of part-of-speech utilities for Node.js using the WordNet database.",
|
||||||
"version": "1.0.1",
|
"version": "1.1",
|
||||||
"homepage": "https://github.com/moos/wordpos",
|
"homepage": "https://github.com/moos/wordpos",
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=0.12"
|
"node": ">=0.12"
|
||||||
|
|
|
@ -13,6 +13,17 @@ var fs = require('fs'),
|
||||||
path = require('path'),
|
path = require('path'),
|
||||||
_ = require('underscore');
|
_ = require('underscore');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* sanity check read data - line must start with zero-padded location
|
||||||
|
*
|
||||||
|
* @param line {string} - line data read
|
||||||
|
* @return {boolean} true if line data is good
|
||||||
|
*/
|
||||||
|
function dataCheck(line, location) {
|
||||||
|
var pad = '00000000', // 8 zeros
|
||||||
|
padded = String(pad + location).slice( - pad.length);
|
||||||
|
return line.indexOf(padded) === 0;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* parse a single data file line, returning data object
|
* parse a single data file line, returning data object
|
||||||
|
@ -22,7 +33,9 @@ var fs = require('fs'),
|
||||||
*
|
*
|
||||||
* Credit for this routine to https://github.com/NaturalNode/natural
|
* Credit for this routine to https://github.com/NaturalNode/natural
|
||||||
*/
|
*/
|
||||||
function lineDataToJSON(line) {
|
function lineDataToJSON(line, location) {
|
||||||
|
if (!dataCheck(line, location)) return new Error('Bad data at location ' + location);
|
||||||
|
|
||||||
var data = line.split('| '),
|
var data = line.split('| '),
|
||||||
tokens = data[0].split(/\s+/),
|
tokens = data[0].split(/\s+/),
|
||||||
ptrs = [],
|
ptrs = [],
|
||||||
|
@ -48,6 +61,7 @@ function lineDataToJSON(line) {
|
||||||
var glossArray = data[1].split("; ");
|
var glossArray = data[1].split("; ");
|
||||||
var definition = glossArray[0];
|
var definition = glossArray[0];
|
||||||
var examples = glossArray.slice(1);
|
var examples = glossArray.slice(1);
|
||||||
|
var lexFilenum = parseInt(tokens[1], 10);
|
||||||
|
|
||||||
for (var k = 0; k < examples.length; k++) {
|
for (var k = 0; k < examples.length; k++) {
|
||||||
examples[k] = examples[k].replace(/\"/g,'').replace(/\s\s+/g,'');
|
examples[k] = examples[k].replace(/\"/g,'').replace(/\s\s+/g,'');
|
||||||
|
@ -55,7 +69,8 @@ function lineDataToJSON(line) {
|
||||||
|
|
||||||
return {
|
return {
|
||||||
synsetOffset: parseInt(tokens[0], 10),
|
synsetOffset: parseInt(tokens[0], 10),
|
||||||
lexFilenum: parseInt(tokens[1], 10),
|
lexFilenum: lexFilenum,
|
||||||
|
lexName: DataFile.LEX_NAMES[ lexFilenum ],
|
||||||
pos: tokens[2],
|
pos: tokens[2],
|
||||||
wCnt: wCnt,
|
wCnt: wCnt,
|
||||||
lemma: tokens[4],
|
lemma: tokens[4],
|
||||||
|
@ -85,12 +100,12 @@ function readLocation(location, callback) {
|
||||||
|
|
||||||
readChunk(location, function(err, count) {
|
readChunk(location, function(err, count) {
|
||||||
if (err) {
|
if (err) {
|
||||||
console.log(err);
|
//console.log(err);
|
||||||
callback(err);
|
callback(err);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
//console.log(' read %d bytes at <%d>', count, location);
|
//console.log(' read %d bytes at <%d>', count, location);
|
||||||
callback(null, lineDataToJSON(str));
|
callback(null, lineDataToJSON(str, location));
|
||||||
});
|
});
|
||||||
|
|
||||||
function readChunk(pos, cb) {
|
function readChunk(pos, cb) {
|
||||||
|
@ -98,12 +113,13 @@ function readLocation(location, callback) {
|
||||||
str += buffer.toString('ascii');
|
str += buffer.toString('ascii');
|
||||||
var eol = str.indexOf('\n');
|
var eol = str.indexOf('\n');
|
||||||
//console.log(' -- read %d bytes at <%d>', count, pos, eol);
|
//console.log(' -- read %d bytes at <%d>', count, pos, eol);
|
||||||
if (eol === -1 && len < file.maxLineLength) {
|
if (count && eol === -1 && len < file.maxLineLength) {
|
||||||
// continue reading
|
// continue reading
|
||||||
return readChunk(pos + count, cb);
|
return readChunk(pos + count, cb);
|
||||||
}
|
}
|
||||||
|
|
||||||
str = str.substr(0, eol);
|
str = str.substr(0, eol);
|
||||||
|
if (str === '' && !err) err = new Error('no data at offset ' + pos);
|
||||||
cb(err, count);
|
cb(err, count);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -112,15 +128,16 @@ function readLocation(location, callback) {
|
||||||
/**
|
/**
|
||||||
* main lookup function
|
* main lookup function
|
||||||
*
|
*
|
||||||
* @param record {object} - record to lookup, obtained from index.find()
|
* @param offsets {array} - array of offsets to lookup (obtained from index.find())
|
||||||
* @param callback{function} (optional) - callback function
|
* @param callback{function} (optional) - callback function
|
||||||
* @returns {Promise}
|
* @returns {Promise}
|
||||||
*/
|
*/
|
||||||
function lookup(record, callback) {
|
function lookup(offsets, callback) {
|
||||||
var results = [],
|
var results = [],
|
||||||
self = this,
|
self = this,
|
||||||
offsets = record.synsetOffset;
|
single = !_.isArray(offsets);
|
||||||
|
|
||||||
|
if (single) offsets = [offsets];
|
||||||
return new Promise(function(resolve, reject) {
|
return new Promise(function(resolve, reject) {
|
||||||
offsets
|
offsets
|
||||||
.map(function (offset) {
|
.map(function (offset) {
|
||||||
|
@ -134,9 +151,10 @@ function lookup(record, callback) {
|
||||||
function done(lastResult) {
|
function done(lastResult) {
|
||||||
closeFile();
|
closeFile();
|
||||||
if (lastResult instanceof Error) {
|
if (lastResult instanceof Error) {
|
||||||
callback && callback(lastResult, []);
|
callback && callback(lastResult, single ? {} :[]);
|
||||||
reject(lastResult);
|
reject(lastResult);
|
||||||
} else {
|
} else {
|
||||||
|
if (single) results = results[0];
|
||||||
callback && callback(null, results);
|
callback && callback(null, results);
|
||||||
resolve(results);
|
resolve(results);
|
||||||
}
|
}
|
||||||
|
@ -233,5 +251,58 @@ DataFile.MAX_LINE_LENGTH = {
|
||||||
adv: 638
|
adv: 638
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* map of lexFilenum to lex names
|
||||||
|
*
|
||||||
|
* @see https://wordnet.princeton.edu/wordnet/man/lexnames.5WN.html
|
||||||
|
* @type {string[]}
|
||||||
|
*/
|
||||||
|
DataFile.LEX_NAMES = [
|
||||||
|
'adj.all',
|
||||||
|
'adj.pert',
|
||||||
|
'adv.all',
|
||||||
|
'noun.Tops',
|
||||||
|
'noun.act',
|
||||||
|
'noun.animal',
|
||||||
|
'noun.artifact',
|
||||||
|
'noun.attribute',
|
||||||
|
'noun.body',
|
||||||
|
'noun.cognition',
|
||||||
|
'noun.communication',
|
||||||
|
'noun.event',
|
||||||
|
'noun.feeling',
|
||||||
|
'noun.food',
|
||||||
|
'noun.group',
|
||||||
|
'noun.location',
|
||||||
|
'noun.motive',
|
||||||
|
'noun.object',
|
||||||
|
'noun.person',
|
||||||
|
'noun.phenomenon',
|
||||||
|
'noun.plant',
|
||||||
|
'noun.possession',
|
||||||
|
'noun.process',
|
||||||
|
'noun.quantity',
|
||||||
|
'noun.relation',
|
||||||
|
'noun.shape',
|
||||||
|
'noun.state',
|
||||||
|
'noun.substance',
|
||||||
|
'noun.time',
|
||||||
|
'verb.body',
|
||||||
|
'verb.change',
|
||||||
|
'verb.cognition',
|
||||||
|
'verb.communication',
|
||||||
|
'verb.competition',
|
||||||
|
'verb.consumption',
|
||||||
|
'verb.contact',
|
||||||
|
'verb.creation',
|
||||||
|
'verb.emotion',
|
||||||
|
'verb.motion',
|
||||||
|
'verb.perception',
|
||||||
|
'verb.possession',
|
||||||
|
'verb.social',
|
||||||
|
'verb.stative',
|
||||||
|
'verb.weather',
|
||||||
|
'adj.ppl'
|
||||||
|
];
|
||||||
|
|
||||||
module.exports = DataFile;
|
module.exports = DataFile;
|
||||||
|
|
|
@ -63,7 +63,7 @@ function lookup(pos) {
|
||||||
.then(function(result) {
|
.then(function(result) {
|
||||||
if (result) {
|
if (result) {
|
||||||
// lookup data
|
// lookup data
|
||||||
return files.data.lookup(result).then(done);
|
return files.data.lookup(result.synsetOffset).then(done);
|
||||||
} else {
|
} else {
|
||||||
// not found in index
|
// not found in index
|
||||||
return done([]);
|
return done([]);
|
||||||
|
@ -362,6 +362,31 @@ wordposProto.getVerbs = get('isVerb');
|
||||||
wordposProto.parse = prepText;
|
wordposProto.parse = prepText;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* seek - get record at offset for pos
|
||||||
|
*
|
||||||
|
* @param offset {number} - synset offset
|
||||||
|
* @param pos {string} - POS a/r/n/v
|
||||||
|
* @param callback {function} - optional callback
|
||||||
|
* @returns Promise
|
||||||
|
*/
|
||||||
|
wordposProto.seek = function(offset, pos, callback){
|
||||||
|
offset = Number(offset);
|
||||||
|
if (_.isNaN(offset) || offset <= 0) return error('offset must be valid positive number.');
|
||||||
|
|
||||||
|
var data = this.getFilesFor(pos).data;
|
||||||
|
if (!data) return error('Incorrect POS - 2nd argument must be a, r, n or v.');
|
||||||
|
|
||||||
|
return data.lookup(offset, callback);
|
||||||
|
|
||||||
|
function error(msg) {
|
||||||
|
var err = new Error(msg);
|
||||||
|
callback && callback(err, {});
|
||||||
|
return Promise.reject(err);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* access to WordNet DB
|
* access to WordNet DB
|
||||||
* @type {object}
|
* @type {object}
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
|
|
||||||
var
|
var
|
||||||
chai = require('chai'),
|
chai = require('chai'),
|
||||||
|
_ = require('underscore'),
|
||||||
assert = chai.assert,
|
assert = chai.assert,
|
||||||
WordPOS = require('../src/wordpos'),
|
WordPOS = require('../src/wordpos'),
|
||||||
wordpos = new WordPOS({profile: false});
|
wordpos = new WordPOS({profile: false});
|
||||||
|
@ -35,7 +36,9 @@ var str = "The angry bear chased the frightened little squirrel",
|
||||||
adverbs: [ 'little' ],
|
adverbs: [ 'little' ],
|
||||||
rest: [ 'The' ]
|
rest: [ 'The' ]
|
||||||
},
|
},
|
||||||
garble = 'garblegarble'; // expect not to find word
|
garble = 'garblegarble', // expect not to find word
|
||||||
|
offset = 1285602,
|
||||||
|
offset_pos ='a';
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -356,6 +359,62 @@ describe('randX()...', function() {
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
|
describe('seek()...', function() {
|
||||||
|
|
||||||
|
it('should handle bad offset', function(done) {
|
||||||
|
wordpos.seek('foobar', 'a', function(err, result){
|
||||||
|
assert(err instanceof Error);
|
||||||
|
assert.equal(err.message, 'offset must be valid positive number.');
|
||||||
|
done();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle wrong offset', function(done) {
|
||||||
|
var bad_offset = offset + 1;
|
||||||
|
wordpos.seek(bad_offset, offset_pos, function(err, result) {
|
||||||
|
assert(err instanceof Error);
|
||||||
|
assert.equal(err.message, 'Bad data at location ' + bad_offset);
|
||||||
|
assert.deepEqual(result, {});
|
||||||
|
done();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle very large offset', function(done) {
|
||||||
|
var bad_offset = offset + 100000000;
|
||||||
|
wordpos.seek(bad_offset, offset_pos, function(err, result) {
|
||||||
|
assert(err instanceof Error);
|
||||||
|
assert.equal(err.message, 'no data at offset ' + bad_offset);
|
||||||
|
assert.deepEqual(result, {});
|
||||||
|
done();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle bad pos', function(done) {
|
||||||
|
wordpos.seek(offset, 'g', function(err, result) {
|
||||||
|
assert(err instanceof Error);
|
||||||
|
assert(/Incorrect POS/.test(err.message));
|
||||||
|
done();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle wrong pos', function(done) {
|
||||||
|
wordpos.seek(offset, 'v', function(err, result){
|
||||||
|
assert.equal(err.message, 'Bad data at location ' + offset);
|
||||||
|
});
|
||||||
|
done();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should seek offset', function(done) {
|
||||||
|
wordpos.seek(offset, offset_pos, function(err, result) {
|
||||||
|
assert.equal(result.synsetOffset, offset);
|
||||||
|
assert.equal(result.pos, 's');
|
||||||
|
assert.equal(result.lemma, 'amazing');
|
||||||
|
done();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
describe('Promise pattern', function() {
|
describe('Promise pattern', function() {
|
||||||
|
|
||||||
|
@ -413,4 +472,28 @@ describe('Promise pattern', function() {
|
||||||
assert.equal(result[0].indexOf('foo'), 0);
|
assert.equal(result[0].indexOf('foo'), 0);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('seek()', function () {
|
||||||
|
return wordpos.seek(offset, offset_pos).then(function (result) {
|
||||||
|
assert.equal(result.synsetOffset, offset);
|
||||||
|
assert.equal(result.pos, 's');
|
||||||
|
assert.equal(result.lemma, 'amazing');
|
||||||
|
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('seek() - wrong offset', function () {
|
||||||
|
return wordpos.seek(offset + 1, offset_pos).catch(function (err) {
|
||||||
|
assert(err instanceof Error);
|
||||||
|
assert.equal(err.message, 'Bad data at location ' + (offset+1));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('seek() - bad offset', function () {
|
||||||
|
return wordpos.seek('foobar', offset_pos).catch(function (err) {
|
||||||
|
assert(err instanceof Error);
|
||||||
|
assert.equal(err.message, 'offset must be valid positive number.');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
});
|
});
|
Loading…
Reference in New Issue