browser complete

This commit is contained in:
Moos 2018-10-14 22:20:56 -07:00
parent 364b2648f7
commit e56463f94d
11 changed files with 144 additions and 431 deletions

View File

@ -1,51 +1,42 @@
<!doctype html> <!doctype html>
<html> <html>
<head> <head>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css" />
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/languages/javascript.min.js"></script>
<script src="./main.js"></script> <script src="./main.js"></script>
<script type="ignore-me"> <style>
pre {
// import IndexFile from "../../src/browser/indexFile.js"; padding: 2em;
console.log(333, IndexFile) display: block;
let posExt = ['adj', 'adv', 'noun', 'verb'];
let dictRoot = '../../dict/';
let files = {};
function loadPos(pos) {
return import(dictRoot + 'index.' + pos + '.js');
console.time('load-' + pos);
let get = (name) => {
let path = dictRoot + name + '.' + pos + '.json';
return fetch(path).then(res => res.json()).then(obj => {
// console.log(`got ${path}: `, text);
files[pos] = files[pos] || {};
files[pos][name] = obj;
console.timeEnd('load-' + pos);
});
};
// get('data');
return get('index');
} }
</style>
let pos = 'adv';
loadPos(pos).then(result => {
console.log('got', pos ,result);
window.res = result.default;
});
</script>
</head> </head>
<body> <body>
<h1>Self-hosted WordPOS sample</h1> <h1>Self-hosted WordPOS sample</h1>
Open console to see results.
<pre><code>
var a = "foo"
</code></pre>
<script> <script>
var el = document.querySelector('code');
if (fetch) {
fetch('./main.txt')
.then(res => res.text())
.then(txt => {
// console.log(111, txt)
el.innerText = txt;
hljs.initHighlightingOnLoad();
});
} else {
el.innerHTML = 'Open <a href=main.txt>main.js</a>.';
}
</script> </script>
</body> </body>
</html> </html>

View File

@ -1,4 +1,4 @@
import WordPOS from '../../src/browser'; import WordPOS from '../../src/wordpos';
console.log(__dirname, WordPOS.defaults) console.log(__dirname, WordPOS.defaults)
@ -9,11 +9,35 @@ let wordpos = window.wordpos = new WordPOS({
// stopwords: false // stopwords: false
}); });
wordpos.isAdverb('likely').then(res => console.log('likely is adverb:', res)); let assertLikely = (r) => {
// wordpos.isAdverb('likely', (res, ...profile) => console.log('likely callback', res, profile)); console.assert(r.def === 'with considerable certainty');
wordpos.getAdverbs('this is is likely a likely tricky business this is').then( console.assert(r.pos === 'r');
res => console.log('getAdverb', res) console.assert(r.synsetOffset === '00139421');
); };
wordpos.lookupAdverb('likely').then(res => console.log('lookup ===', res)) console.group('Likely');
wordpos.isAdverb('likely').then(res => console.assert(res));
wordpos.isAdverb('likely', (res, ...profile) => console.log('callback with profile', res, profile));
wordpos.getAdverbs('this is is lately a likely tricky business this is')
.then(res => {
console.log('getAdverbs:', res);
console.assert(res[0] === 'lately');
console.assert(res[1] === 'likely');
});
wordpos.lookupAdverb('likely')
.then(res => {
console.log('lookupAdverb:', res);
assertLikely(res[0]);
});
// wordpos.lookup('likely').then(res, console.log('lookup ===', res)) // wordpos.lookup('likely').then(res, console.log('lookup ===', res))
wordpos.seek('00139421', 'r')
.then(res => {
console.log('seek:', res);
assertLikely(res);
});
// console.groupEnd('Likely');

View File

@ -0,0 +1 @@
main.js

View File

@ -1,16 +1,23 @@
/**
* browser/baseFile.js
*
* Copyright (c) 2012-2019 mooster@42at.com
* https://github.com/moos/wordpos
*
* Released under MIT license
*/
class BaseFile { class BaseFile {
/** /**
* file contents * file contents - in browser it's just a string & not a file!
* @type {Object} * @type {Object}
*/ */
file = {}; file = {};
constructor(type, dictPath, posName) { constructor(type, dictPath, posName) {
this.filePath = `${dictPath}/${type}.${posName}.js`;
this.type = type; this.type = type;
this.filePath = `${dictPath}/${type}.${posName}.js`;
} }
load() { load() {

View File

@ -1,5 +1,5 @@
/*! /**
* dataFile.js * browser/dataFile.js
* *
* Copyright (c) 2012-2019 mooster@42at.com * Copyright (c) 2012-2019 mooster@42at.com
* https://github.com/moos/wordpos * https://github.com/moos/wordpos
@ -61,7 +61,7 @@ function lookup(offsets, callback) {
* DataFile class * DataFile class
* *
* @param dictPath {string} - path to dict folder * @param dictPath {string} - path to dict folder
* @param name {string} - POS name * @param posName {string} - POS name
* @constructor * @constructor
*/ */
class DataFile extends BaseFile { class DataFile extends BaseFile {
@ -73,14 +73,8 @@ class DataFile extends BaseFile {
lookup() { lookup() {
return this.ready(lookup, arguments); return this.ready(lookup, arguments);
} }
seek() {
// return this.ready(find, arguments);
}
} }
/** /**
* map of lexFilenum to lex names * map of lexFilenum to lex names
* *

View File

@ -1,5 +1,14 @@
/**
* browser/index.js
*
* Copyright (c) 2012-2019 mooster@42at.com
* https://github.com/moos/wordpos
*
* Released under MIT license
*/
import { stopwords, prepText, makeStopwordString } from '../util'; import { stopwords, prepText, makeStopwordString } from '../util';
import { is, get, lookup } from '../common'; import { is, get, lookup, seek } from '../common';
import IndexFile from './indexFile'; import IndexFile from './indexFile';
import DataFile from './dataFile'; import DataFile from './dataFile';
@ -10,7 +19,6 @@ const POS = {
r: 'adv' r: 'adv'
}; };
class WordPOS { class WordPOS {
options = {}; options = {};
@ -18,7 +26,6 @@ class WordPOS {
constructor(config) { constructor(config) {
this.options = Object.assign({}, WordPOS.defaults, config); this.options = Object.assign({}, WordPOS.defaults, config);
console.log('wpos ctor -- ', this.options)
this.initFiles(); this.initFiles();
if (Array.isArray(this.options.stopwords)) { if (Array.isArray(this.options.stopwords)) {
@ -81,6 +88,8 @@ class WordPOS {
parse = prepText; parse = prepText;
seek = seek;
/** /**
* isX() - Test if word is given POS * isX() - Test if word is given POS
* @see is * @see is
@ -144,7 +153,6 @@ WordPOS.defaults = {
* include data files in preload * include data files in preload
* @type {boolean} * @type {boolean}
*/ */
includeData: false includeData: false
}; };
@ -154,7 +162,7 @@ WordPOS.defaults = {
* access to WordNet DB * access to WordNet DB
* @type {object} * @type {object}
*/ */
// WordPOS.WNdb = WNdb; // WordPOS.WNdb = WNdb; // FIXME
/** /**
* access to stopwords * access to stopwords

View File

@ -1,13 +1,9 @@
/*! /**
* indexFile.js * browser/indexFile.js
*
* implements fast index lookup of WordNet's index files
* *
* Copyright (c) 2012-2019 mooster@42at.com * Copyright (c) 2012-2019 mooster@42at.com
* https://github.com/moos/wordpos * https://github.com/moos/wordpos
* *
* Portions: Copyright (c) 2011, Chris Umbel
*
* Released under MIT license * Released under MIT license
*/ */
@ -50,7 +46,7 @@ function find(search, callback) {
* IndexFile class * IndexFile class
* *
* @param dictPath {string} - WordNet db dict path * @param dictPath {string} - WordNet db dict path
* @param name {string} - name of index: noun, verb, adj, adv * @param posName {string} - name of index: noun, verb, adj, adv
* @constructor * @constructor
*/ */
class IndexFile extends BaseFile { class IndexFile extends BaseFile {

View File

@ -1,82 +0,0 @@
/*!
* piper.js
*
* executes multiple async i/o tasks and pools similar callbacks,
* calling i/o open/close when all incoming tasks are done.
*
* Copyright (c) 2012-2016 mooster@42at.com
* https://github.com/moos/wordpos
*
* Released under MIT license
*/
var _ = require('underscore')._,
util = require('util'),
fs = require('fs');
/**
* run single 'task' method sharing callbacks. Method MUST take callback as LAST arg.
* piper is bound to an IndexFile.
*
* @param task {string} - task name unique to method!
* @param method {function} - method to execute, gets (args, ... , callback)
* @param args {Array} - args to pass to method
* @param context {object} - other params to remember and sent to callback
* @param callback {function} - result callback
*/
function piper(task, method, args, context, callback){
var readCallbacks = this.callbackQueue,
memoArgs = _.rest(arguments, 2),
wrappedCallback;
//console.log('piper', task, [method]);
// queue up if already reading file for this task
if (task in readCallbacks){
readCallbacks[task].push(memoArgs);
return;
}
readCallbacks[task] = [memoArgs];
if (!this.fd) {
//console.log(' ... opening', this.filePath);
this.fd = fs.openSync(this.filePath, 'r');
}
// ref count so we know when to close the main index file
++this.refcount;
wrappedCallback = _.partial(piper.wrapper, this, task);
// call method -- replace original callback (last arg) with wrapped one
method.apply(null, [].concat( args, wrappedCallback ));
}
// result is the *same* for same task
piper.wrapper = function(self, task /*, result...*/){
var readCallbacks = self.callbackQueue,
result = _.rest(arguments, 2),
callback, args;
// live access callbacks cache in case nested cb's
// add to the array.
while (args = readCallbacks[task].shift()) {
callback = args.pop(); // last arg MUST be callback
// console.log('>>>> pper wrapper', self.fastIndex.name, task, result.toString())
callback.apply(null, [].concat(_.flatten(args, /*shallow*/true), result));
}
// now done - delete cb cache
delete readCallbacks[task];
if (--self.refcount === 0) {
//console.log(' ... closing', self.filePath);
fs.closeSync(self.fd);
self.fd = null;
}
};
module.exports = piper;

View File

@ -1,267 +0,0 @@
/*!
* rand.js
*
* define rand() and randX() functions on wordpos
*
* Copyright (c) 2012-2016 mooster@42at.com
* https://github.com/moos/wordpos
*
* Released under MIT license
*/
var _ = require('underscore')._,
util = require('util'),
Trie = require('../lib/natural/trie/trie'),
IndexFile = require('./indexFile'),
KEY_LENGTH = 3;
/**
* factory function for randX()
*
* @param pos {string} - a,r,n,v
* @returns {Function} - rand function bound to an index file
*/
function makeRandX(pos){
return function(opts, callback, _noprofile) {
// disable profiling when isX() used internally
var profile = this.options.profile && !_noprofile,
start = profile && new Date(),
args = [],
index = this.getFilesFor(pos).index,
startsWith = opts && opts.startsWith || '',
count = opts && opts.count || 1;
if (typeof opts === 'function') {
callback = opts;
}
return index.rand(startsWith, count, function (record) {
args.push(record, startsWith);
profile && args.push(new Date() - start);
callback && callback.apply(null, args);
});
};
}
/**
* rand function (bound to index)
*
* @param startsWith {string} - get random word(s) that start with this, or ''
* @param num {number} - number of words to return
* @param callback {function} - callback function, receives words array and startsWith
* @returns Promise
*/
function rand(startsWith, num, callback){
var self = this,
nextKey = null,
trie = this.fastIndex.trie,
key, keys;
return new Promise(function(resolve, reject) {
//console.log('-- ', startsWith, num, self.fastIndex.indexKeys.length);
if (startsWith) {
key = startsWith.slice(0, KEY_LENGTH);
/**
* if key is 'a' or 'ab' (<3 chars), search for ALL keys starting with that.
*/
if (key.length < KEY_LENGTH) {
// calc trie if haven't done so yet
if (!trie) {
trie = new Trie();
trie.addStrings(self.fastIndex.indexKeys);
self.fastIndex.trie = trie;
//console.log(' +++ Trie calc ');
}
try {
// trie throws if not found!!!!!
keys = trie.keysWithPrefix(startsWith);
} catch (e) {
keys = [];
}
// read all keys then select random word.
// May be large disk read!
key = keys[0];
nextKey = _.last(keys);
}
if (!key || !(key in self.fastIndex.offsets)) {
callback && callback([], startsWith);
resolve([]);
}
} else {
// no startWith given - random select among keys
keys = _.sample(self.fastIndex.indexKeys, num);
// if num > 1, run each key independently and collect results
if (num > 1) {
var results = [], ii = 0;
_(keys).each(function (startsWith) {
self.rand(startsWith, 1, function (result) {
results.push(result[0]);
if (++ii == num) {
callback && callback(results, '');
resolve(results);
}
});
});
return;
}
key = keys;
}
// prepare the piper
var args = [key, nextKey, self],
task = 'rand:' + key + nextKey,
context = [startsWith, num, callback]; // last arg MUST be callback
// pay the piper
self.piper(task, IndexFile.readIndexBetweenKeys, args, context, collector);
function collector(key, nextKey, index, startsWith, num, callback, buffer) {
var lines = buffer.toString().split('\n'),
matches = lines.map(function (line) {
return line.substring(0, line.indexOf(' '));
});
//console.log(' got lines for key ', key, lines.length);
// we got bunch of matches for key - now search within for startsWith
if (startsWith !== key) {
// binary search for startsWith within set of matches
var ind = _.sortedIndex(matches, startsWith);
if (ind >= lines.length || matches[ind].indexOf(startsWith) === -1) {
callback && callback([], startsWith);
resolve([]);
return;
}
var trie = new Trie();
trie.addStrings(matches);
//console.log('Trie > ', trie.matchesWithPrefix( startsWith ));
matches = trie.keysWithPrefix(startsWith);
}
var words = _.sample(matches, num);
callback && callback(words, startsWith);
resolve(words);
}
}); // Promise
}
// relative weight of each POS word count (DB 3.1 numbers)
var POS_factor = {
Noun: 26,
Verb: 3,
Adjective: 5,
Adverb: 1,
Total: 37
};
/**
* rand() - for all Index files
* @returns Promise
*/
function randAll(opts, callback) {
if (typeof opts === 'function') {
callback = opts;
opts = {};
} else {
opts = _.clone(opts || {});
}
var
profile = this.options.profile,
start = profile && new Date(),
results = [],
startsWith = opts && opts.startsWith || '',
count = opts && opts.count || 1,
args = [null, startsWith],
parts = 'Noun Verb Adjective Adverb'.split(' '),
self = this;
return new Promise(function(resolve, reject) {
// select at random a POS to look at
var doParts = _.sample(parts, parts.length);
tryPart();
function tryPart() {
var part = doParts.pop(),
rand = 'rand' + part,
factor = POS_factor[part],
weight = factor / POS_factor.Total;
// pick count according to relative weight
opts.count = Math.ceil(count * weight * 1.1); // guard against dupes
self[rand](opts, partCallback);
}
function partCallback(result) {
if (result) {
results = _.uniq(results.concat(result)); // make sure it's unique!
}
if (results.length < count && doParts.length) {
return tryPart();
}
// final random and trim excess
results = _.sample(results, count);
done();
}
function done() {
profile && (args.push(new Date() - start));
args[0] = results;
callback && callback.apply(null, args);
resolve(results);
}
}); // Promise
}
/**
* bind rand() to index
*
* @param index {object} - the IndexFile instance
* @returns {function} - bound rand function for index
*/
function randomify(index){
if (!index.fastIndex) throw 'rand requires fastIndex';
return _.bind(rand, index);
}
module.exports = {
init: function(wordposProto) {
wordposProto.nounIndex.rand = randomify(wordposProto.nounIndex);
wordposProto.verbIndex.rand = randomify(wordposProto.verbIndex);
wordposProto.adjIndex.rand = randomify(wordposProto.adjIndex);
wordposProto.advIndex.rand = randomify(wordposProto.advIndex);
/**
* define rand()
*/
wordposProto.rand = randAll;
/**
* define randX()
*/
wordposProto.randAdjective = makeRandX('a');
wordposProto.randAdverb = makeRandX('r');
wordposProto.randNoun = makeRandX('n');
wordposProto.randVerb = makeRandX('v');
}
};

View File

@ -1,6 +1,15 @@
import { normalize, nextTick } from './util'; /**
* common.js
*
* Copyright (c) 2012-2019 mooster@42at.com
* https://github.com/moos/wordpos
*
* Portions: Copyright (c) 2011, Chris Umbel
*
* Released under MIT license
*/
var { normalize, nextTick } = require('./util');
/** /**
* factory for main lookup function * factory for main lookup function
@ -57,7 +66,6 @@ function lookup(pos) {
*/ */
function indexLookup(word, callback) { function indexLookup(word, callback) {
var self = this; var self = this;
return new Promise(function(resolve, reject){ return new Promise(function(resolve, reject){
self.find(word, function (record) { self.find(word, function (record) {
var indexRecord = null, var indexRecord = null,
@ -91,8 +99,6 @@ function indexLookup(word, callback) {
}); });
} }
/** /**
* getX() factory function * getX() factory function
* *
@ -129,7 +135,6 @@ function get(isFn) {
}; };
} }
/** /**
* isX() factory function * isX() factory function
* *
@ -158,7 +163,6 @@ function is(pos){
}; };
} }
/** /**
* parse a single data file line, returning data object * parse a single data file line, returning data object
* *
@ -218,6 +222,32 @@ function lineDataToJSON(line, location) {
}; };
} }
/**
* seek - get record at offset for pos
*
* @param offset {number} - synset offset
* @param pos {string} - POS a/r/n/v
* @param callback {function} - optional callback
* @returns Promise
* @this WordPOS
*/
function seek(offset, pos, callback){
var offsetTmp = Number(offset);
if (isNaN(offsetTmp) || offsetTmp <= 0) return error('Offset must be valid positive number: ' + offset);
var data = this.getFilesFor(pos).data;
if (!data) return error('Incorrect POS - 2nd argument must be a, r, n or v.');
return data.lookup(offset, callback);
function error(msg) {
var err = new Error(msg);
callback && callback(err, {});
return Promise.reject(err);
}
}
const LEX_NAMES = [ const LEX_NAMES = [
'adj.all', 'adj.all',
'adj.pert', 'adj.pert',
@ -266,10 +296,12 @@ const LEX_NAMES = [
'adj.ppl' 'adj.ppl'
]; ];
export { // console.log(333, typeof export)
module.exports= {
indexLookup, indexLookup,
is, is,
get, get,
seek,
lineDataToJSON, lineDataToJSON,
LEX_NAMES, LEX_NAMES,

View File

@ -1,7 +1,15 @@
/**
* util.js
*
* Copyright (c) 2012-2019 mooster@42at.com
* https://github.com/moos/wordpos
*
* Released under MIT license
*/
let stopwords = require('../lib/natural/util/stopwords').words; let stopwords = require('../lib/natural/util/stopwords').words;
let stopwordsStr = makeStopwordString(stopwords); let stopwordsStr = makeStopwordString(stopwords);
function makeStopwordString(stopwords) { function makeStopwordString(stopwords) {
return ' ' + stopwords.join(' ') + ' '; return ' ' + stopwords.join(' ') + ' ';
} }
@ -18,8 +26,8 @@ function normalize(word) {
return word.toLowerCase().replace(/\s+/g, '_'); return word.toLowerCase().replace(/\s+/g, '_');
} }
function isStopword(stopwords, word) { function isStopword(stopwordsStr, word) {
return stopwords.indexOf(' '+word+' ') >= 0; return stopwordsStr.indexOf(' '+word+' ') >= 0;
} }
function tokenizer(str) { function tokenizer(str) {
@ -47,7 +55,8 @@ function prepText(text) {
)); ));
} }
export { module.exports = {
stopwords,
nextTick, nextTick,
normalize, normalize,
tokenizer, tokenizer,