diff --git a/samples/self-hosted/index.html b/samples/self-hosted/index.html index 05fee23..e71dc4d 100644 --- a/samples/self-hosted/index.html +++ b/samples/self-hosted/index.html @@ -1,51 +1,42 @@ + + + + - +

Self-hosted WordPOS sample

- + Open console to see results. + +

+   var a = "foo"
+ 
+ + + diff --git a/samples/self-hosted/main.js b/samples/self-hosted/main.js index 4e5db59..a57c33e 100644 --- a/samples/self-hosted/main.js +++ b/samples/self-hosted/main.js @@ -1,4 +1,4 @@ -import WordPOS from '../../src/browser'; +import WordPOS from '../../src/wordpos'; console.log(__dirname, WordPOS.defaults) @@ -9,11 +9,35 @@ let wordpos = window.wordpos = new WordPOS({ // stopwords: false }); -wordpos.isAdverb('likely').then(res => console.log('likely is adverb:', res)); -// wordpos.isAdverb('likely', (res, ...profile) => console.log('likely callback', res, profile)); -wordpos.getAdverbs('this is is likely a likely tricky business this is').then( - res => console.log('getAdverb', res) -); +let assertLikely = (r) => { + console.assert(r.def === 'with considerable certainty'); + console.assert(r.pos === 'r'); + console.assert(r.synsetOffset === '00139421'); +}; -wordpos.lookupAdverb('likely').then(res => console.log('lookup ===', res)) +console.group('Likely'); +wordpos.isAdverb('likely').then(res => console.assert(res)); +wordpos.isAdverb('likely', (res, ...profile) => console.log('callback with profile', res, profile)); + +wordpos.getAdverbs('this is is lately a likely tricky business this is') + .then(res => { + console.log('getAdverbs:', res); + console.assert(res[0] === 'lately'); + console.assert(res[1] === 'likely'); + }); + +wordpos.lookupAdverb('likely') + .then(res => { + console.log('lookupAdverb:', res); + assertLikely(res[0]); + + }); // wordpos.lookup('likely').then(res, console.log('lookup ===', res)) + +wordpos.seek('00139421', 'r') + .then(res => { + console.log('seek:', res); + assertLikely(res); + }); + +// console.groupEnd('Likely'); diff --git a/samples/self-hosted/main.txt b/samples/self-hosted/main.txt new file mode 120000 index 0000000..82df346 --- /dev/null +++ b/samples/self-hosted/main.txt @@ -0,0 +1 @@ +main.js \ No newline at end of file diff --git a/src/browser/baseFile.js b/src/browser/baseFile.js index 68af93b..acb56d1 100644 --- a/src/browser/baseFile.js +++ b/src/browser/baseFile.js @@ -1,16 +1,23 @@ - +/** + * browser/baseFile.js + * + * Copyright (c) 2012-2019 mooster@42at.com + * https://github.com/moos/wordpos + * + * Released under MIT license + */ class BaseFile { /** - * file contents + * file contents - in browser it's just a string & not a file! * @type {Object} */ file = {}; constructor(type, dictPath, posName) { - this.filePath = `${dictPath}/${type}.${posName}.js`; this.type = type; + this.filePath = `${dictPath}/${type}.${posName}.js`; } load() { diff --git a/src/browser/dataFile.js b/src/browser/dataFile.js index 6c72327..1238423 100644 --- a/src/browser/dataFile.js +++ b/src/browser/dataFile.js @@ -1,5 +1,5 @@ -/*! - * dataFile.js +/** + * browser/dataFile.js * * Copyright (c) 2012-2019 mooster@42at.com * https://github.com/moos/wordpos @@ -61,7 +61,7 @@ function lookup(offsets, callback) { * DataFile class * * @param dictPath {string} - path to dict folder - * @param name {string} - POS name + * @param posName {string} - POS name * @constructor */ class DataFile extends BaseFile { @@ -73,14 +73,8 @@ class DataFile extends BaseFile { lookup() { return this.ready(lookup, arguments); } - - seek() { - // return this.ready(find, arguments); - } - } - /** * map of lexFilenum to lex names * diff --git a/src/browser/index.js b/src/browser/index.js index 344b549..4bd8d21 100644 --- a/src/browser/index.js +++ b/src/browser/index.js @@ -1,5 +1,14 @@ +/** +* browser/index.js +* +* Copyright (c) 2012-2019 mooster@42at.com +* https://github.com/moos/wordpos +* +* Released under MIT license +*/ + import { stopwords, prepText, makeStopwordString } from '../util'; -import { is, get, lookup } from '../common'; +import { is, get, lookup, seek } from '../common'; import IndexFile from './indexFile'; import DataFile from './dataFile'; @@ -10,7 +19,6 @@ const POS = { r: 'adv' }; - class WordPOS { options = {}; @@ -18,7 +26,6 @@ class WordPOS { constructor(config) { this.options = Object.assign({}, WordPOS.defaults, config); - console.log('wpos ctor -- ', this.options) this.initFiles(); if (Array.isArray(this.options.stopwords)) { @@ -81,6 +88,8 @@ class WordPOS { parse = prepText; + seek = seek; + /** * isX() - Test if word is given POS * @see is @@ -144,7 +153,6 @@ WordPOS.defaults = { * include data files in preload * @type {boolean} */ - includeData: false }; @@ -154,7 +162,7 @@ WordPOS.defaults = { * access to WordNet DB * @type {object} */ -// WordPOS.WNdb = WNdb; +// WordPOS.WNdb = WNdb; // FIXME /** * access to stopwords diff --git a/src/browser/indexFile.js b/src/browser/indexFile.js index 88d0166..2c0308a 100644 --- a/src/browser/indexFile.js +++ b/src/browser/indexFile.js @@ -1,13 +1,9 @@ -/*! - * indexFile.js - * - * implements fast index lookup of WordNet's index files +/** + * browser/indexFile.js * * Copyright (c) 2012-2019 mooster@42at.com * https://github.com/moos/wordpos * - * Portions: Copyright (c) 2011, Chris Umbel - * * Released under MIT license */ @@ -50,7 +46,7 @@ function find(search, callback) { * IndexFile class * * @param dictPath {string} - WordNet db dict path - * @param name {string} - name of index: noun, verb, adj, adv + * @param posName {string} - name of index: noun, verb, adj, adv * @constructor */ class IndexFile extends BaseFile { diff --git a/src/browser/piper.js b/src/browser/piper.js deleted file mode 100644 index c0985de..0000000 --- a/src/browser/piper.js +++ /dev/null @@ -1,82 +0,0 @@ -/*! - * piper.js - * - * executes multiple async i/o tasks and pools similar callbacks, - * calling i/o open/close when all incoming tasks are done. - * - * Copyright (c) 2012-2016 mooster@42at.com - * https://github.com/moos/wordpos - * - * Released under MIT license - */ - -var _ = require('underscore')._, - util = require('util'), - fs = require('fs'); - -/** - * run single 'task' method sharing callbacks. Method MUST take callback as LAST arg. - * piper is bound to an IndexFile. - * - * @param task {string} - task name unique to method! - * @param method {function} - method to execute, gets (args, ... , callback) - * @param args {Array} - args to pass to method - * @param context {object} - other params to remember and sent to callback - * @param callback {function} - result callback - */ -function piper(task, method, args, context, callback){ - var readCallbacks = this.callbackQueue, - memoArgs = _.rest(arguments, 2), - wrappedCallback; - - //console.log('piper', task, [method]); - - // queue up if already reading file for this task - if (task in readCallbacks){ - readCallbacks[task].push(memoArgs); - return; - } - readCallbacks[task] = [memoArgs]; - - if (!this.fd) { - //console.log(' ... opening', this.filePath); - this.fd = fs.openSync(this.filePath, 'r'); - } - - // ref count so we know when to close the main index file - ++this.refcount; - - wrappedCallback = _.partial(piper.wrapper, this, task); - - // call method -- replace original callback (last arg) with wrapped one - method.apply(null, [].concat( args, wrappedCallback )); -} - -// result is the *same* for same task -piper.wrapper = function(self, task /*, result...*/){ - var readCallbacks = self.callbackQueue, - result = _.rest(arguments, 2), - callback, args; - - // live access callbacks cache in case nested cb's - // add to the array. - while (args = readCallbacks[task].shift()) { - callback = args.pop(); // last arg MUST be callback - -// console.log('>>>> pper wrapper', self.fastIndex.name, task, result.toString()) - callback.apply(null, [].concat(_.flatten(args, /*shallow*/true), result)); - } - - // now done - delete cb cache - delete readCallbacks[task]; - - if (--self.refcount === 0) { - //console.log(' ... closing', self.filePath); - fs.closeSync(self.fd); - self.fd = null; - } -}; - - -module.exports = piper; - diff --git a/src/browser/rand.js b/src/browser/rand.js deleted file mode 100644 index 17808c8..0000000 --- a/src/browser/rand.js +++ /dev/null @@ -1,267 +0,0 @@ -/*! - * rand.js - * - * define rand() and randX() functions on wordpos - * - * Copyright (c) 2012-2016 mooster@42at.com - * https://github.com/moos/wordpos - * - * Released under MIT license - */ - -var _ = require('underscore')._, - util = require('util'), - Trie = require('../lib/natural/trie/trie'), - IndexFile = require('./indexFile'), - KEY_LENGTH = 3; - - -/** - * factory function for randX() - * - * @param pos {string} - a,r,n,v - * @returns {Function} - rand function bound to an index file - */ -function makeRandX(pos){ - return function(opts, callback, _noprofile) { - // disable profiling when isX() used internally - var profile = this.options.profile && !_noprofile, - start = profile && new Date(), - args = [], - index = this.getFilesFor(pos).index, - startsWith = opts && opts.startsWith || '', - count = opts && opts.count || 1; - - if (typeof opts === 'function') { - callback = opts; - } - - return index.rand(startsWith, count, function (record) { - args.push(record, startsWith); - profile && args.push(new Date() - start); - callback && callback.apply(null, args); - }); - }; -} - -/** - * rand function (bound to index) - * - * @param startsWith {string} - get random word(s) that start with this, or '' - * @param num {number} - number of words to return - * @param callback {function} - callback function, receives words array and startsWith - * @returns Promise - */ -function rand(startsWith, num, callback){ - var self = this, - nextKey = null, - trie = this.fastIndex.trie, - key, keys; - - return new Promise(function(resolve, reject) { - - //console.log('-- ', startsWith, num, self.fastIndex.indexKeys.length); - if (startsWith) { - key = startsWith.slice(0, KEY_LENGTH); - - /** - * if key is 'a' or 'ab' (<3 chars), search for ALL keys starting with that. - */ - if (key.length < KEY_LENGTH) { - - // calc trie if haven't done so yet - if (!trie) { - trie = new Trie(); - trie.addStrings(self.fastIndex.indexKeys); - self.fastIndex.trie = trie; - //console.log(' +++ Trie calc '); - } - - try { - // trie throws if not found!!!!! - keys = trie.keysWithPrefix(startsWith); - } catch (e) { - keys = []; - } - - // read all keys then select random word. - // May be large disk read! - key = keys[0]; - nextKey = _.last(keys); - } - - if (!key || !(key in self.fastIndex.offsets)) { - callback && callback([], startsWith); - resolve([]); - } - - } else { - // no startWith given - random select among keys - keys = _.sample(self.fastIndex.indexKeys, num); - - // if num > 1, run each key independently and collect results - if (num > 1) { - var results = [], ii = 0; - _(keys).each(function (startsWith) { - self.rand(startsWith, 1, function (result) { - results.push(result[0]); - if (++ii == num) { - callback && callback(results, ''); - resolve(results); - } - }); - }); - return; - } - key = keys; - } - - // prepare the piper - var args = [key, nextKey, self], - task = 'rand:' + key + nextKey, - context = [startsWith, num, callback]; // last arg MUST be callback - - // pay the piper - self.piper(task, IndexFile.readIndexBetweenKeys, args, context, collector); - - function collector(key, nextKey, index, startsWith, num, callback, buffer) { - var lines = buffer.toString().split('\n'), - matches = lines.map(function (line) { - return line.substring(0, line.indexOf(' ')); - }); - //console.log(' got lines for key ', key, lines.length); - - // we got bunch of matches for key - now search within for startsWith - if (startsWith !== key) { - // binary search for startsWith within set of matches - var ind = _.sortedIndex(matches, startsWith); - if (ind >= lines.length || matches[ind].indexOf(startsWith) === -1) { - callback && callback([], startsWith); - resolve([]); - return; - } - - var trie = new Trie(); - trie.addStrings(matches); - //console.log('Trie > ', trie.matchesWithPrefix( startsWith )); - matches = trie.keysWithPrefix(startsWith); - } - - var words = _.sample(matches, num); - callback && callback(words, startsWith); - resolve(words); - } - - }); // Promise -} - -// relative weight of each POS word count (DB 3.1 numbers) -var POS_factor = { - Noun: 26, - Verb: 3, - Adjective: 5, - Adverb: 1, - Total: 37 -}; - -/** - * rand() - for all Index files - * @returns Promise - */ -function randAll(opts, callback) { - - if (typeof opts === 'function') { - callback = opts; - opts = {}; - } else { - opts = _.clone(opts || {}); - } - - var - profile = this.options.profile, - start = profile && new Date(), - results = [], - startsWith = opts && opts.startsWith || '', - count = opts && opts.count || 1, - args = [null, startsWith], - parts = 'Noun Verb Adjective Adverb'.split(' '), - self = this; - - - - return new Promise(function(resolve, reject) { - // select at random a POS to look at - var doParts = _.sample(parts, parts.length); - tryPart(); - - function tryPart() { - var part = doParts.pop(), - rand = 'rand' + part, - factor = POS_factor[part], - weight = factor / POS_factor.Total; - - // pick count according to relative weight - opts.count = Math.ceil(count * weight * 1.1); // guard against dupes - self[rand](opts, partCallback); - } - - function partCallback(result) { - if (result) { - results = _.uniq(results.concat(result)); // make sure it's unique! - } - - if (results.length < count && doParts.length) { - return tryPart(); - } - - // final random and trim excess - results = _.sample(results, count); - done(); - } - - function done() { - profile && (args.push(new Date() - start)); - args[0] = results; - callback && callback.apply(null, args); - resolve(results); - } - - }); // Promise -} - -/** - * bind rand() to index - * - * @param index {object} - the IndexFile instance - * @returns {function} - bound rand function for index - */ -function randomify(index){ - if (!index.fastIndex) throw 'rand requires fastIndex'; - return _.bind(rand, index); -} - - - -module.exports = { - - init: function(wordposProto) { - wordposProto.nounIndex.rand = randomify(wordposProto.nounIndex); - wordposProto.verbIndex.rand = randomify(wordposProto.verbIndex); - wordposProto.adjIndex.rand = randomify(wordposProto.adjIndex); - wordposProto.advIndex.rand = randomify(wordposProto.advIndex); - - /** - * define rand() - */ - wordposProto.rand = randAll; - - /** - * define randX() - */ - wordposProto.randAdjective = makeRandX('a'); - wordposProto.randAdverb = makeRandX('r'); - wordposProto.randNoun = makeRandX('n'); - wordposProto.randVerb = makeRandX('v'); - } -}; - diff --git a/src/common.js b/src/common.js index a405af2..057df38 100644 --- a/src/common.js +++ b/src/common.js @@ -1,6 +1,15 @@ -import { normalize, nextTick } from './util'; - +/** +* common.js +* +* Copyright (c) 2012-2019 mooster@42at.com +* https://github.com/moos/wordpos +* +* Portions: Copyright (c) 2011, Chris Umbel +* +* Released under MIT license +*/ +var { normalize, nextTick } = require('./util'); /** * factory for main lookup function @@ -57,7 +66,6 @@ function lookup(pos) { */ function indexLookup(word, callback) { var self = this; - return new Promise(function(resolve, reject){ self.find(word, function (record) { var indexRecord = null, @@ -91,8 +99,6 @@ function indexLookup(word, callback) { }); } - - /** * getX() factory function * @@ -129,7 +135,6 @@ function get(isFn) { }; } - /** * isX() factory function * @@ -158,7 +163,6 @@ function is(pos){ }; } - /** * parse a single data file line, returning data object * @@ -218,6 +222,32 @@ function lineDataToJSON(line, location) { }; } + +/** + * seek - get record at offset for pos + * + * @param offset {number} - synset offset + * @param pos {string} - POS a/r/n/v + * @param callback {function} - optional callback + * @returns Promise + * @this WordPOS + */ +function seek(offset, pos, callback){ + var offsetTmp = Number(offset); + if (isNaN(offsetTmp) || offsetTmp <= 0) return error('Offset must be valid positive number: ' + offset); + + var data = this.getFilesFor(pos).data; + if (!data) return error('Incorrect POS - 2nd argument must be a, r, n or v.'); + + return data.lookup(offset, callback); + + function error(msg) { + var err = new Error(msg); + callback && callback(err, {}); + return Promise.reject(err); + } +} + const LEX_NAMES = [ 'adj.all', 'adj.pert', @@ -266,10 +296,12 @@ const LEX_NAMES = [ 'adj.ppl' ]; -export { +// console.log(333, typeof export) +module.exports= { indexLookup, is, get, + seek, lineDataToJSON, LEX_NAMES, diff --git a/src/util.js b/src/util.js index 0b2d7ba..28e6718 100644 --- a/src/util.js +++ b/src/util.js @@ -1,7 +1,15 @@ +/** +* util.js +* +* Copyright (c) 2012-2019 mooster@42at.com +* https://github.com/moos/wordpos +* +* Released under MIT license +*/ + let stopwords = require('../lib/natural/util/stopwords').words; let stopwordsStr = makeStopwordString(stopwords); - function makeStopwordString(stopwords) { return ' ' + stopwords.join(' ') + ' '; } @@ -18,8 +26,8 @@ function normalize(word) { return word.toLowerCase().replace(/\s+/g, '_'); } -function isStopword(stopwords, word) { - return stopwords.indexOf(' '+word+' ') >= 0; +function isStopword(stopwordsStr, word) { + return stopwordsStr.indexOf(' '+word+' ') >= 0; } function tokenizer(str) { @@ -47,7 +55,8 @@ function prepText(text) { )); } -export { +module.exports = { + stopwords, nextTick, normalize, tokenizer,