diff --git a/samples/self-hosted/index.html b/samples/self-hosted/index.html index 05fee23..e71dc4d 100644 --- a/samples/self-hosted/index.html +++ b/samples/self-hosted/index.html @@ -1,51 +1,42 @@
+ + + + - +
+ var a = "foo"
+
+
+
+
diff --git a/samples/self-hosted/main.js b/samples/self-hosted/main.js
index 4e5db59..a57c33e 100644
--- a/samples/self-hosted/main.js
+++ b/samples/self-hosted/main.js
@@ -1,4 +1,4 @@
-import WordPOS from '../../src/browser';
+import WordPOS from '../../src/wordpos';
console.log(__dirname, WordPOS.defaults)
@@ -9,11 +9,35 @@ let wordpos = window.wordpos = new WordPOS({
// stopwords: false
});
-wordpos.isAdverb('likely').then(res => console.log('likely is adverb:', res));
-// wordpos.isAdverb('likely', (res, ...profile) => console.log('likely callback', res, profile));
-wordpos.getAdverbs('this is is likely a likely tricky business this is').then(
- res => console.log('getAdverb', res)
-);
+let assertLikely = (r) => {
+ console.assert(r.def === 'with considerable certainty');
+ console.assert(r.pos === 'r');
+ console.assert(r.synsetOffset === '00139421');
+};
-wordpos.lookupAdverb('likely').then(res => console.log('lookup ===', res))
+console.group('Likely');
+wordpos.isAdverb('likely').then(res => console.assert(res));
+wordpos.isAdverb('likely', (res, ...profile) => console.log('callback with profile', res, profile));
+
+wordpos.getAdverbs('this is is lately a likely tricky business this is')
+ .then(res => {
+ console.log('getAdverbs:', res);
+ console.assert(res[0] === 'lately');
+ console.assert(res[1] === 'likely');
+ });
+
+wordpos.lookupAdverb('likely')
+ .then(res => {
+ console.log('lookupAdverb:', res);
+ assertLikely(res[0]);
+
+ });
// wordpos.lookup('likely').then(res, console.log('lookup ===', res))
+
+wordpos.seek('00139421', 'r')
+ .then(res => {
+ console.log('seek:', res);
+ assertLikely(res);
+ });
+
+// console.groupEnd('Likely');
diff --git a/samples/self-hosted/main.txt b/samples/self-hosted/main.txt
new file mode 120000
index 0000000..82df346
--- /dev/null
+++ b/samples/self-hosted/main.txt
@@ -0,0 +1 @@
+main.js
\ No newline at end of file
diff --git a/src/browser/baseFile.js b/src/browser/baseFile.js
index 68af93b..acb56d1 100644
--- a/src/browser/baseFile.js
+++ b/src/browser/baseFile.js
@@ -1,16 +1,23 @@
-
+/**
+ * browser/baseFile.js
+ *
+ * Copyright (c) 2012-2019 mooster@42at.com
+ * https://github.com/moos/wordpos
+ *
+ * Released under MIT license
+ */
class BaseFile {
/**
- * file contents
+ * file contents - in browser it's just a string & not a file!
* @type {Object}
*/
file = {};
constructor(type, dictPath, posName) {
- this.filePath = `${dictPath}/${type}.${posName}.js`;
this.type = type;
+ this.filePath = `${dictPath}/${type}.${posName}.js`;
}
load() {
diff --git a/src/browser/dataFile.js b/src/browser/dataFile.js
index 6c72327..1238423 100644
--- a/src/browser/dataFile.js
+++ b/src/browser/dataFile.js
@@ -1,5 +1,5 @@
-/*!
- * dataFile.js
+/**
+ * browser/dataFile.js
*
* Copyright (c) 2012-2019 mooster@42at.com
* https://github.com/moos/wordpos
@@ -61,7 +61,7 @@ function lookup(offsets, callback) {
* DataFile class
*
* @param dictPath {string} - path to dict folder
- * @param name {string} - POS name
+ * @param posName {string} - POS name
* @constructor
*/
class DataFile extends BaseFile {
@@ -73,14 +73,8 @@ class DataFile extends BaseFile {
lookup() {
return this.ready(lookup, arguments);
}
-
- seek() {
- // return this.ready(find, arguments);
- }
-
}
-
/**
* map of lexFilenum to lex names
*
diff --git a/src/browser/index.js b/src/browser/index.js
index 344b549..4bd8d21 100644
--- a/src/browser/index.js
+++ b/src/browser/index.js
@@ -1,5 +1,14 @@
+/**
+* browser/index.js
+*
+* Copyright (c) 2012-2019 mooster@42at.com
+* https://github.com/moos/wordpos
+*
+* Released under MIT license
+*/
+
import { stopwords, prepText, makeStopwordString } from '../util';
-import { is, get, lookup } from '../common';
+import { is, get, lookup, seek } from '../common';
import IndexFile from './indexFile';
import DataFile from './dataFile';
@@ -10,7 +19,6 @@ const POS = {
r: 'adv'
};
-
class WordPOS {
options = {};
@@ -18,7 +26,6 @@ class WordPOS {
constructor(config) {
this.options = Object.assign({}, WordPOS.defaults, config);
- console.log('wpos ctor -- ', this.options)
this.initFiles();
if (Array.isArray(this.options.stopwords)) {
@@ -81,6 +88,8 @@ class WordPOS {
parse = prepText;
+ seek = seek;
+
/**
* isX() - Test if word is given POS
* @see is
@@ -144,7 +153,6 @@ WordPOS.defaults = {
* include data files in preload
* @type {boolean}
*/
-
includeData: false
};
@@ -154,7 +162,7 @@ WordPOS.defaults = {
* access to WordNet DB
* @type {object}
*/
-// WordPOS.WNdb = WNdb;
+// WordPOS.WNdb = WNdb; // FIXME
/**
* access to stopwords
diff --git a/src/browser/indexFile.js b/src/browser/indexFile.js
index 88d0166..2c0308a 100644
--- a/src/browser/indexFile.js
+++ b/src/browser/indexFile.js
@@ -1,13 +1,9 @@
-/*!
- * indexFile.js
- *
- * implements fast index lookup of WordNet's index files
+/**
+ * browser/indexFile.js
*
* Copyright (c) 2012-2019 mooster@42at.com
* https://github.com/moos/wordpos
*
- * Portions: Copyright (c) 2011, Chris Umbel
- *
* Released under MIT license
*/
@@ -50,7 +46,7 @@ function find(search, callback) {
* IndexFile class
*
* @param dictPath {string} - WordNet db dict path
- * @param name {string} - name of index: noun, verb, adj, adv
+ * @param posName {string} - name of index: noun, verb, adj, adv
* @constructor
*/
class IndexFile extends BaseFile {
diff --git a/src/browser/piper.js b/src/browser/piper.js
deleted file mode 100644
index c0985de..0000000
--- a/src/browser/piper.js
+++ /dev/null
@@ -1,82 +0,0 @@
-/*!
- * piper.js
- *
- * executes multiple async i/o tasks and pools similar callbacks,
- * calling i/o open/close when all incoming tasks are done.
- *
- * Copyright (c) 2012-2016 mooster@42at.com
- * https://github.com/moos/wordpos
- *
- * Released under MIT license
- */
-
-var _ = require('underscore')._,
- util = require('util'),
- fs = require('fs');
-
-/**
- * run single 'task' method sharing callbacks. Method MUST take callback as LAST arg.
- * piper is bound to an IndexFile.
- *
- * @param task {string} - task name unique to method!
- * @param method {function} - method to execute, gets (args, ... , callback)
- * @param args {Array} - args to pass to method
- * @param context {object} - other params to remember and sent to callback
- * @param callback {function} - result callback
- */
-function piper(task, method, args, context, callback){
- var readCallbacks = this.callbackQueue,
- memoArgs = _.rest(arguments, 2),
- wrappedCallback;
-
- //console.log('piper', task, [method]);
-
- // queue up if already reading file for this task
- if (task in readCallbacks){
- readCallbacks[task].push(memoArgs);
- return;
- }
- readCallbacks[task] = [memoArgs];
-
- if (!this.fd) {
- //console.log(' ... opening', this.filePath);
- this.fd = fs.openSync(this.filePath, 'r');
- }
-
- // ref count so we know when to close the main index file
- ++this.refcount;
-
- wrappedCallback = _.partial(piper.wrapper, this, task);
-
- // call method -- replace original callback (last arg) with wrapped one
- method.apply(null, [].concat( args, wrappedCallback ));
-}
-
-// result is the *same* for same task
-piper.wrapper = function(self, task /*, result...*/){
- var readCallbacks = self.callbackQueue,
- result = _.rest(arguments, 2),
- callback, args;
-
- // live access callbacks cache in case nested cb's
- // add to the array.
- while (args = readCallbacks[task].shift()) {
- callback = args.pop(); // last arg MUST be callback
-
-// console.log('>>>> pper wrapper', self.fastIndex.name, task, result.toString())
- callback.apply(null, [].concat(_.flatten(args, /*shallow*/true), result));
- }
-
- // now done - delete cb cache
- delete readCallbacks[task];
-
- if (--self.refcount === 0) {
- //console.log(' ... closing', self.filePath);
- fs.closeSync(self.fd);
- self.fd = null;
- }
-};
-
-
-module.exports = piper;
-
diff --git a/src/browser/rand.js b/src/browser/rand.js
deleted file mode 100644
index 17808c8..0000000
--- a/src/browser/rand.js
+++ /dev/null
@@ -1,267 +0,0 @@
-/*!
- * rand.js
- *
- * define rand() and randX() functions on wordpos
- *
- * Copyright (c) 2012-2016 mooster@42at.com
- * https://github.com/moos/wordpos
- *
- * Released under MIT license
- */
-
-var _ = require('underscore')._,
- util = require('util'),
- Trie = require('../lib/natural/trie/trie'),
- IndexFile = require('./indexFile'),
- KEY_LENGTH = 3;
-
-
-/**
- * factory function for randX()
- *
- * @param pos {string} - a,r,n,v
- * @returns {Function} - rand function bound to an index file
- */
-function makeRandX(pos){
- return function(opts, callback, _noprofile) {
- // disable profiling when isX() used internally
- var profile = this.options.profile && !_noprofile,
- start = profile && new Date(),
- args = [],
- index = this.getFilesFor(pos).index,
- startsWith = opts && opts.startsWith || '',
- count = opts && opts.count || 1;
-
- if (typeof opts === 'function') {
- callback = opts;
- }
-
- return index.rand(startsWith, count, function (record) {
- args.push(record, startsWith);
- profile && args.push(new Date() - start);
- callback && callback.apply(null, args);
- });
- };
-}
-
-/**
- * rand function (bound to index)
- *
- * @param startsWith {string} - get random word(s) that start with this, or ''
- * @param num {number} - number of words to return
- * @param callback {function} - callback function, receives words array and startsWith
- * @returns Promise
- */
-function rand(startsWith, num, callback){
- var self = this,
- nextKey = null,
- trie = this.fastIndex.trie,
- key, keys;
-
- return new Promise(function(resolve, reject) {
-
- //console.log('-- ', startsWith, num, self.fastIndex.indexKeys.length);
- if (startsWith) {
- key = startsWith.slice(0, KEY_LENGTH);
-
- /**
- * if key is 'a' or 'ab' (<3 chars), search for ALL keys starting with that.
- */
- if (key.length < KEY_LENGTH) {
-
- // calc trie if haven't done so yet
- if (!trie) {
- trie = new Trie();
- trie.addStrings(self.fastIndex.indexKeys);
- self.fastIndex.trie = trie;
- //console.log(' +++ Trie calc ');
- }
-
- try {
- // trie throws if not found!!!!!
- keys = trie.keysWithPrefix(startsWith);
- } catch (e) {
- keys = [];
- }
-
- // read all keys then select random word.
- // May be large disk read!
- key = keys[0];
- nextKey = _.last(keys);
- }
-
- if (!key || !(key in self.fastIndex.offsets)) {
- callback && callback([], startsWith);
- resolve([]);
- }
-
- } else {
- // no startWith given - random select among keys
- keys = _.sample(self.fastIndex.indexKeys, num);
-
- // if num > 1, run each key independently and collect results
- if (num > 1) {
- var results = [], ii = 0;
- _(keys).each(function (startsWith) {
- self.rand(startsWith, 1, function (result) {
- results.push(result[0]);
- if (++ii == num) {
- callback && callback(results, '');
- resolve(results);
- }
- });
- });
- return;
- }
- key = keys;
- }
-
- // prepare the piper
- var args = [key, nextKey, self],
- task = 'rand:' + key + nextKey,
- context = [startsWith, num, callback]; // last arg MUST be callback
-
- // pay the piper
- self.piper(task, IndexFile.readIndexBetweenKeys, args, context, collector);
-
- function collector(key, nextKey, index, startsWith, num, callback, buffer) {
- var lines = buffer.toString().split('\n'),
- matches = lines.map(function (line) {
- return line.substring(0, line.indexOf(' '));
- });
- //console.log(' got lines for key ', key, lines.length);
-
- // we got bunch of matches for key - now search within for startsWith
- if (startsWith !== key) {
- // binary search for startsWith within set of matches
- var ind = _.sortedIndex(matches, startsWith);
- if (ind >= lines.length || matches[ind].indexOf(startsWith) === -1) {
- callback && callback([], startsWith);
- resolve([]);
- return;
- }
-
- var trie = new Trie();
- trie.addStrings(matches);
- //console.log('Trie > ', trie.matchesWithPrefix( startsWith ));
- matches = trie.keysWithPrefix(startsWith);
- }
-
- var words = _.sample(matches, num);
- callback && callback(words, startsWith);
- resolve(words);
- }
-
- }); // Promise
-}
-
-// relative weight of each POS word count (DB 3.1 numbers)
-var POS_factor = {
- Noun: 26,
- Verb: 3,
- Adjective: 5,
- Adverb: 1,
- Total: 37
-};
-
-/**
- * rand() - for all Index files
- * @returns Promise
- */
-function randAll(opts, callback) {
-
- if (typeof opts === 'function') {
- callback = opts;
- opts = {};
- } else {
- opts = _.clone(opts || {});
- }
-
- var
- profile = this.options.profile,
- start = profile && new Date(),
- results = [],
- startsWith = opts && opts.startsWith || '',
- count = opts && opts.count || 1,
- args = [null, startsWith],
- parts = 'Noun Verb Adjective Adverb'.split(' '),
- self = this;
-
-
-
- return new Promise(function(resolve, reject) {
- // select at random a POS to look at
- var doParts = _.sample(parts, parts.length);
- tryPart();
-
- function tryPart() {
- var part = doParts.pop(),
- rand = 'rand' + part,
- factor = POS_factor[part],
- weight = factor / POS_factor.Total;
-
- // pick count according to relative weight
- opts.count = Math.ceil(count * weight * 1.1); // guard against dupes
- self[rand](opts, partCallback);
- }
-
- function partCallback(result) {
- if (result) {
- results = _.uniq(results.concat(result)); // make sure it's unique!
- }
-
- if (results.length < count && doParts.length) {
- return tryPart();
- }
-
- // final random and trim excess
- results = _.sample(results, count);
- done();
- }
-
- function done() {
- profile && (args.push(new Date() - start));
- args[0] = results;
- callback && callback.apply(null, args);
- resolve(results);
- }
-
- }); // Promise
-}
-
-/**
- * bind rand() to index
- *
- * @param index {object} - the IndexFile instance
- * @returns {function} - bound rand function for index
- */
-function randomify(index){
- if (!index.fastIndex) throw 'rand requires fastIndex';
- return _.bind(rand, index);
-}
-
-
-
-module.exports = {
-
- init: function(wordposProto) {
- wordposProto.nounIndex.rand = randomify(wordposProto.nounIndex);
- wordposProto.verbIndex.rand = randomify(wordposProto.verbIndex);
- wordposProto.adjIndex.rand = randomify(wordposProto.adjIndex);
- wordposProto.advIndex.rand = randomify(wordposProto.advIndex);
-
- /**
- * define rand()
- */
- wordposProto.rand = randAll;
-
- /**
- * define randX()
- */
- wordposProto.randAdjective = makeRandX('a');
- wordposProto.randAdverb = makeRandX('r');
- wordposProto.randNoun = makeRandX('n');
- wordposProto.randVerb = makeRandX('v');
- }
-};
-
diff --git a/src/common.js b/src/common.js
index a405af2..057df38 100644
--- a/src/common.js
+++ b/src/common.js
@@ -1,6 +1,15 @@
-import { normalize, nextTick } from './util';
-
+/**
+* common.js
+*
+* Copyright (c) 2012-2019 mooster@42at.com
+* https://github.com/moos/wordpos
+*
+* Portions: Copyright (c) 2011, Chris Umbel
+*
+* Released under MIT license
+*/
+var { normalize, nextTick } = require('./util');
/**
* factory for main lookup function
@@ -57,7 +66,6 @@ function lookup(pos) {
*/
function indexLookup(word, callback) {
var self = this;
-
return new Promise(function(resolve, reject){
self.find(word, function (record) {
var indexRecord = null,
@@ -91,8 +99,6 @@ function indexLookup(word, callback) {
});
}
-
-
/**
* getX() factory function
*
@@ -129,7 +135,6 @@ function get(isFn) {
};
}
-
/**
* isX() factory function
*
@@ -158,7 +163,6 @@ function is(pos){
};
}
-
/**
* parse a single data file line, returning data object
*
@@ -218,6 +222,32 @@ function lineDataToJSON(line, location) {
};
}
+
+/**
+ * seek - get record at offset for pos
+ *
+ * @param offset {number} - synset offset
+ * @param pos {string} - POS a/r/n/v
+ * @param callback {function} - optional callback
+ * @returns Promise
+ * @this WordPOS
+ */
+function seek(offset, pos, callback){
+ var offsetTmp = Number(offset);
+ if (isNaN(offsetTmp) || offsetTmp <= 0) return error('Offset must be valid positive number: ' + offset);
+
+ var data = this.getFilesFor(pos).data;
+ if (!data) return error('Incorrect POS - 2nd argument must be a, r, n or v.');
+
+ return data.lookup(offset, callback);
+
+ function error(msg) {
+ var err = new Error(msg);
+ callback && callback(err, {});
+ return Promise.reject(err);
+ }
+}
+
const LEX_NAMES = [
'adj.all',
'adj.pert',
@@ -266,10 +296,12 @@ const LEX_NAMES = [
'adj.ppl'
];
-export {
+// console.log(333, typeof export)
+module.exports= {
indexLookup,
is,
get,
+ seek,
lineDataToJSON,
LEX_NAMES,
diff --git a/src/util.js b/src/util.js
index 0b2d7ba..28e6718 100644
--- a/src/util.js
+++ b/src/util.js
@@ -1,7 +1,15 @@
+/**
+* util.js
+*
+* Copyright (c) 2012-2019 mooster@42at.com
+* https://github.com/moos/wordpos
+*
+* Released under MIT license
+*/
+
let stopwords = require('../lib/natural/util/stopwords').words;
let stopwordsStr = makeStopwordString(stopwords);
-
function makeStopwordString(stopwords) {
return ' ' + stopwords.join(' ') + ' ';
}
@@ -18,8 +26,8 @@ function normalize(word) {
return word.toLowerCase().replace(/\s+/g, '_');
}
-function isStopword(stopwords, word) {
- return stopwords.indexOf(' '+word+' ') >= 0;
+function isStopword(stopwordsStr, word) {
+ return stopwordsStr.indexOf(' '+word+' ') >= 0;
}
function tokenizer(str) {
@@ -47,7 +55,8 @@ function prepText(text) {
));
}
-export {
+module.exports = {
+ stopwords,
nextTick,
normalize,
tokenizer,