Compare commits

..

No commits in common. "master" and "browser" have entirely different histories.

24 changed files with 1265 additions and 7400 deletions

View File

@ -8,7 +8,9 @@
"plugins": [
"@babel/plugin-proposal-class-properties",
"babel-plugin-dynamic-import-node",
"@babel/plugin-syntax-dynamic-import",
"@babel/plugin-syntax-import-meta",
"@babel/plugin-proposal-json-strings",
[
"@babel/plugin-proposal-decorators",
{

1
.gitignore vendored
View File

@ -5,4 +5,3 @@ node_modules
build
dict
dist
*.tgz

View File

@ -2,4 +2,3 @@ node_modules
.project
.idea
*.iml
*.tgz

View File

@ -1,6 +1,5 @@
language: node_js
node_js:
- '13'
- '12'
- '11'
- '10'

View File

@ -1,62 +0,0 @@
2.1.0
- Fix CLI script when used without specific POS (#41)
- :boom: Stopwords are now case-insensitive, i.e., "The", "And", "Him", etc. are all filtered out.
2.0.0
- Support for running wordpos in the **browser** (no breaking change for node environment)
- Dropped support for node 4.x.
1.2.0 (tagged v1.2.0)
- Fix `new Buffer()` deprecation warning.
- Fix npm audit vulnerabilities
1.1.6
- Fix #25 rand().then with no args
1.1.5
- rollback 1.1.4 changes. Fix is made in [wordnet-db](https://github.com/moos/wordnet-db).
1.1.4
- temporary fix for #19 issue with npm@5
1.1.2
- Fix DeprecationWarning for node 7.x (1.1.1)
- Fix occasional error for large offsets during seek
1.1.0
- added seek() method
- added lexName property
1.0.1
- Removed npm dependency on Natural. Certain modules are included in /lib.
- Add support for ES6 Promises.
- Improved data file reads for up to **5x** performance increase compared to previous version.
- Tests are now [mocha](https://mochajs.org/)-based with [chai](http://chaijs.com/) assert interface.
0.1.16
- Changed dependency to wordnet-db (renamed from WNdb)
0.1.15
- Added `syn` (synonym) and `exp` (example) CLI commands.
- Fixed `rand` CLI command when no start word given.
- Removed -N, --num CLI option. Use `wordpos rand [N]` to get N random numbers.
- Changed CLI option -s to -w (include stopwords).
0.1.13
- Fix crlf issue for command-line script
0.1.12
- fix stopwords not getting excluded when running with CLI
- added 'stopwords' CLI *command* to show list of stopwords
- CLI *option* --stopword now renamed to --withStopwords
0.1.10
- rand functionality added
0.1.6
- added command line tool
0.1.4
- added fast index

110
README.md
View File

@ -10,7 +10,7 @@ Version 1.x is a major update with no direct dependence on [natural's](https://g
> ~~**CAUTION** The WordNet database [wordnet-db](https://github.com/moos/wordnet-db) comprises [155,287 words](https://wordnet.princeton.edu/documentation/wnstats7wn) (3.0 numbers) which uncompress to over **30 MB** of data in several *un*[browserify](https://github.com/substack/node-browserify)-able files. It is *not* meant for the browser environment.~~
🔥 Version 2.x is totally refactored and **works in browsers** also -- see [wordpos-web](https://github.com/moos/wordpos-web).
:zap: v2.x can work in browsers -- to try it out `npm i wordpos@beta` or [see it in action](https://moos.github.io/wordpos). See below for usage.
## Installation
@ -86,7 +86,7 @@ WordPOS.defaults = {
* include data files in preload
* @type {boolean}
*/
includeData: false,
includeData: false, // WIP
/**
* set to true to enable debug logging
@ -206,7 +206,7 @@ wordpos.lookupAdjective('awesome', console.log);
```
In this case only one lookup was found, but there could be several.
Version 1.1 adds the `lexName` parameter, which maps the lexFilenum to one of [45 lexicographer domains](https://wordnet.princeton.edu/documentation/lexnames5wn).
Version 1.1 adds the `lexName` parameter, which maps the lexFilenum to one of [45 lexicographer domains](https://wordnet.princeton.edu/wordnet/man/lexnames.5WN.html).
#### seek(offset, pos, callback)
@ -294,9 +294,41 @@ wordpos.isVerb('fish', console.log)
```
Note that callback receives full arguments (including profile, if enabled), while the Promise receives only the result of the call. Also, beware that exceptions in the _callback_ will result in the Promise being _rejected_ and caught by `catch()`, if provided.
## Running inside the browsers?
## Running inside the browsers
See [wordpos-web](https://github.com/moos/wordpos-web).
v2.0 introduces the capability of running wordpos in the browser. The dictionary files are optimized for fast access (lookup by lemma), but they must be fetched, parsed and loaded into browser memory. The files are loaded on-demand (unless the option `preload: true` is given).
The dict files can be served locally or from CDN (see [samples/cdn](samples/cdn/) for code, or [see it in action](https://moos.github.io/wordpos)). Include the following scripts in your `index.html`:
```html
<script src="wordpos/dist/wordpos.min.js"></script>
<script>
let wordpos = new WordPOS({
// preload: true,
dictPath: '/wordpos/dict',
profile: true
});
wordpos.getAdverbs('this is is lately a likely tricky business this is')
.then(res => {
console.log(res); // ["lately", "likely"]
});
</script>
```
Above assumes wordpos is installed to the directory `./wordpos`. `./wordpos/dict` holds the index and data WordNet files generated for the web in a postinstall script.
See [samples/self-hosted](samples/self-hosted/).
To run the samples locally, install [parcel](https://github.com/parcel-bundler/parcel) if you don't already have it (`npm i -g parcel`), then:
```bash
$ npm run start-self
Server running at http://localhost:1234
...
$ npm run start-cdn
Server running at http://localhost:1234
...
```
and open your browser to that url.
## Fast Index (node)
@ -306,7 +338,7 @@ Fast index improves performance **30x** over Natural's native methods. See blog
As of version 1.0, fast index is always on and cannot be turned off.
## Command-line (CLI) usage
## Command-line: CLI
For CLI usage and examples, see [bin/README](bin).
@ -316,12 +348,72 @@ For CLI usage and examples, see [bin/README](bin).
See [bench/README](bench).
## TODO
- implement `includeData` option for preload
## Changes
See [CHANGELOG](./CHANGELOG.md).
**2.0.0**
- Support for running wordpos in browser (no breaking change for node environment)
- Dropped support for node 4.x.
1.2.0
- Fix `new Buffer()` deprecation warning.
- Fix npm audit vulnerabilities
1.1.6
- Fix #25 rand().then with no args
1.1.5
- rollback 1.1.4 changes. Fix is made in [wordnet-db](https://github.com/moos/wordnet-db).
1.1.4
- temporary fix for #19 issue with npm@5
1.1.2
- Fix DeprecationWarning for node 7.x (1.1.1)
- Fix occasional error for large offsets during seek
1.1.0
- added seek() method
- added lexName property
1.0.1
- Removed npm dependency on Natural. Certain modules are included in /lib.
- Add support for ES6 Promises.
- Improved data file reads for up to **5x** performance increase compared to previous version.
- Tests are now [mocha](https://mochajs.org/)-based with [chai](http://chaijs.com/) assert interface.
0.1.16
- Changed dependency to wordnet-db (renamed from WNdb)
0.1.15
- Added `syn` (synonym) and `exp` (example) CLI commands.
- Fixed `rand` CLI command when no start word given.
- Removed -N, --num CLI option. Use `wordpos rand [N]` to get N random numbers.
- Changed CLI option -s to -w (include stopwords).
0.1.13
- Fix crlf issue for command-line script
0.1.12
- fix stopwords not getting excluded when running with CLI
- added 'stopwords' CLI *command* to show list of stopwords
- CLI *option* --stopword now renamed to --withStopwords
0.1.10
- rand functionality added
0.1.6
- added command line tool
0.1.4
- added fast index
License
-------
https://github.com/moos/wordpos
Copyright (c) 2012-2020 mooster@42at.com
(The MIT License)
Copyright (c) 2012-2019 mooster@42at.com

View File

@ -269,3 +269,4 @@ function sprint(results) {
},'');
}
}

53
docs/cdn/index.html Normal file
View File

@ -0,0 +1,53 @@
<!doctype html>
<html>
<head>
<meta http-equiv="Content-Security-Policy" content="script-src https: http: 'unsafe-inline' 'unsafe-eval'">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css" />
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/languages/javascript.min.js"></script>
<!-- cdn source -->
<script src="https://unpkg.com/wordpos@2.0.0-beta/dist/wordpos.min.js"></script>
<script>
let wordpos = window.wordpos = new WordPOS({
// preload: true,
dictPath: 'https://unpkg.com/wordpos@2.0.0-beta/dict',
profile: true,
// stopwords: false
});
</script>
<script src="../main.js" name="main"></script>
<style>
pre {
padding: 2em;
display: block;
}
</style>
</head>
<body>
<h1>CDN WordPOS sample</h1>
Open console to see results.
<pre><code> </code></pre>
<script>
var el = document.querySelector('code');
if (fetch) {
fetch('../main.js')
.then(res => res.text())
.then(txt => {
el.innerText = txt;
window.hljs && hljs.initHighlightingOnLoad();
});
} else {
el.innerHTML = 'Open <a href="../main.js">main.js</a>.';
}
</script>
</body>
</html>

32
docs/main.js Normal file
View File

@ -0,0 +1,32 @@
let assertLikely = (r) => {
console.assert(r.def === 'with considerable certainty');
console.assert(r.pos === 'r');
console.assert(r.synsetOffset === '00139421');
};
console.group('Likely');
wordpos.isAdverb('likely').then(res => console.assert(res));
wordpos.isAdverb('likely', (res, ...profile) => console.log('callback with profile', res, profile));
wordpos.getAdverbs('this is is lately a likely tricky business this is')
.then(res => {
let expect = {lately: 1, likely: 1};
console.log('getAdverbs:', res);
console.assert(res[0] in expect); // NOTE: order is NOT gauranteed!
console.assert(res[1] in expect);
});
wordpos.lookupAdverb('likely')
.then(res => {
console.log('lookupAdverb:', res[0]);
assertLikely(res[0]);
});
// wordpos.lookup('likely').then(res, console.log('lookup ===', res))
wordpos.seek('00139421', 'r')
.then(res => {
console.log('seek:', res);
assertLikely(res);
});
setTimeout(() => console.groupEnd('Likely'), 1000);

View File

@ -0,0 +1,52 @@
<!doctype html>
<html>
<head>
<meta http-equiv="Content-Security-Policy" content="script-src https: http: 'unsafe-inline' 'unsafe-eval'">
<title>Wordpos in the browser</title>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css" />
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/languages/javascript.min.js"></script>
<script src="/dist/wordpos.min.js"></script>
<script>
let wordpos = window.wordpos = new WordPOS({
// preload: true,
dictPath: '/samples/self-hosted/dict',
profile: true,
// stopwords: false
});
</script>
<script src="../main.js" name="main"></script>
<style>
pre {
padding: 2em;
display: block;
}
</style>
</head>
<body>
<h1>Self-hosted WordPOS sample</h1>
Open console to see results.
<pre><code> </code></pre>
<script>
var el = document.querySelector('code');
if (fetch) {
fetch('../main.js')
.then(res => res.text())
.then(txt => {
el.innerText = txt;
window.hljs && hljs.initHighlightingOnLoad();
});
} else {
el.innerHTML = 'Open <a href="../main.js">main.js</a>.';
}
</script>
</body>
</html>

7629
package-lock.json generated Normal file → Executable file

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
{
"name": "wordpos",
"version": "2.1.0",
"version": "2.0.0-beta.2",
"description": "wordpos is a set of part-of-speech utilities for Node.js & browser using the WordNet database.",
"author": "Moos <mooster@42at.com>",
"keywords": [
@ -17,44 +17,47 @@
"node": ">=6"
},
"files": [
"bench",
"bin",
"dict",
"dist",
"lib",
"src",
"scripts",
"test",
"!test/dict",
"tools"
],
"bin": "./bin/wordpos-cli.js",
"dependencies": {
"commander": "^2.20.3",
"commander": "^2.0.0",
"symlink-dir": "1.1.3",
"underscore": ">=1.3.1",
"wordnet-db": "^3.1.14"
"wordnet-db": "^3.1.11"
},
"devDependencies": {
"@babel/core": "^7.7.7",
"@babel/plugin-proposal-class-properties": "^7.7.4",
"@babel/plugin-proposal-decorators": "^7.7.4",
"@babel/plugin-proposal-do-expressions": "^7.7.4",
"@babel/plugin-proposal-export-default-from": "^7.7.4",
"@babel/plugin-proposal-export-namespace-from": "^7.7.4",
"@babel/plugin-proposal-function-sent": "^7.7.4",
"@babel/plugin-proposal-json-strings": "^7.7.4",
"@babel/plugin-proposal-logical-assignment-operators": "^7.7.4",
"@babel/plugin-proposal-nullish-coalescing-operator": "^7.7.4",
"@babel/plugin-proposal-numeric-separator": "^7.7.4",
"@babel/plugin-proposal-optional-chaining": "^7.7.5",
"@babel/plugin-proposal-pipeline-operator": "^7.7.7",
"@babel/plugin-proposal-throw-expressions": "^7.7.4",
"@babel/plugin-syntax-dynamic-import": "^7.7.4",
"@babel/plugin-syntax-import-meta": "^7.7.4",
"@babel/preset-env": "^7.7.7",
"@babel/register": "^7.7.7",
"babel-plugin-dynamic-import-node": "^2.3.0",
"chai": "^4.2.0",
"@babel/core": "^7.0.0",
"@babel/plugin-proposal-class-properties": "^7.0.0",
"@babel/plugin-proposal-decorators": "^7.0.0",
"@babel/plugin-proposal-do-expressions": "^7.0.0",
"@babel/plugin-proposal-export-default-from": "^7.0.0",
"@babel/plugin-proposal-export-namespace-from": "^7.0.0",
"@babel/plugin-proposal-function-sent": "^7.0.0",
"@babel/plugin-proposal-json-strings": "^7.0.0",
"@babel/plugin-proposal-logical-assignment-operators": "^7.0.0",
"@babel/plugin-proposal-nullish-coalescing-operator": "^7.0.0",
"@babel/plugin-proposal-numeric-separator": "^7.0.0",
"@babel/plugin-proposal-optional-chaining": "^7.0.0",
"@babel/plugin-proposal-pipeline-operator": "^7.0.0",
"@babel/plugin-proposal-throw-expressions": "^7.0.0",
"@babel/plugin-syntax-dynamic-import": "^7.0.0",
"@babel/plugin-syntax-import-meta": "^7.0.0",
"@babel/preset-env": "^7.0.0",
"@babel/register": "^7.0.0",
"babel-plugin-dynamic-import-node": "^2.2.0",
"chai": "^4.0.2",
"mini-bench": "^1.0.0",
"mocha": "^5.2.0",
"parcel": "^1.12.4"
"mocha": "^5.2.0"
},
"repository": {
"type": "git",
@ -63,14 +66,20 @@
"main": "./src/node/index.js",
"browser": "./src/browser/index.js",
"scripts": {
"postinstall": "node tools/stat.js --no-stats index.adv index.adj index.verb index.noun",
"postinstall": "npm run postinstall-web && npm run postinstall-node",
"postinstall-node": "node tools/stat.js --no-stats index.adv index.adj index.verb index.noun",
"postinstall-web": "node scripts/makeJsonDict.js index data",
"build": "parcel build --detailed-report -d dist -o wordpos.min.js --global WordPOS -t browser src/browser/index.js",
"postbuild": "sed -i 's/ES6_IMPORT/import/' dist/wordpos.min.js && node tools/banner.js | cat - dist/wordpos.min.js > _tmp_ && mv _tmp_ dist/wordpos.min.js",
"prepare": "npm run build",
"pretest": "node tools/makeJsonDict.js index data",
"postbuild": "sed -i 's/ES6_IMPORT/import/' dist/wordpos.min.js",
"test": "npm run test-node && npm run test-browser",
"test-node": "mocha test --exclude test/browser_test.js",
"test-browser": "mocha test/browser_test test/wordpos_test --require @babel/register"
"test-node": "mocha test",
"test-browser": "mocha test/wordpos_test --require @babel/register",
"prestart": "symlink-dir dict samples/self-hosted/dict",
"start": "npm run build && http-server",
"prestart-dev": "rm -rf build && mkdir build && symlink-dir dict build/dict && cp samples/main.js build/main.txt",
"start-dev": "npm run start-self -- -d build",
"start-self": "parcel samples/self-hosted/index.html",
"start-cdn": "parcel samples/cdn/index.html"
},
"license": "MIT"
}

53
samples/cdn/index.html Normal file
View File

@ -0,0 +1,53 @@
<!doctype html>
<html>
<head>
<meta http-equiv="Content-Security-Policy" content="script-src https: http: 'unsafe-inline' 'unsafe-eval'">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css" />
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/languages/javascript.min.js"></script>
<!-- cdn source -->
<script src="https://unpkg.com/wordpos@2.0.0-beta/dist/wordpos.min.js"></script>
<script>
let wordpos = window.wordpos = new WordPOS({
// preload: true,
dictPath: 'https://unpkg.com/wordpos@2.0.0-beta/dict',
profile: true,
// stopwords: false
});
</script>
<script src="../main.js" name="main"></script>
<style>
pre {
padding: 2em;
display: block;
}
</style>
</head>
<body>
<h1>CDN WordPOS sample</h1>
Open console to see results.
<pre><code> </code></pre>
<script>
var el = document.querySelector('code');
if (fetch) {
fetch('../main.js')
.then(res => res.text())
.then(txt => {
el.innerText = txt;
window.hljs && hljs.initHighlightingOnLoad();
});
} else {
el.innerHTML = 'Open <a href="../main.js">main.js</a>.';
}
</script>
</body>
</html>

32
samples/main.js Normal file
View File

@ -0,0 +1,32 @@
let assertLikely = (r) => {
console.assert(r.def === 'with considerable certainty');
console.assert(r.pos === 'r');
console.assert(r.synsetOffset === '00139421');
};
console.group('Likely');
wordpos.isAdverb('likely').then(res => console.assert(res));
wordpos.isAdverb('likely', (res, ...profile) => console.log('callback with profile', res, profile));
wordpos.getAdverbs('this is is lately a likely tricky business this is')
.then(res => {
let expect = {lately: 1, likely: 1};
console.log('getAdverbs:', res);
console.assert(res[0] in expect); // NOTE: order is NOT gauranteed!
console.assert(res[1] in expect);
});
wordpos.lookupAdverb('likely')
.then(res => {
console.log('lookupAdverb:', res[0]);
assertLikely(res[0]);
});
// wordpos.lookup('likely').then(res, console.log('lookup ===', res))
wordpos.seek('00139421', 'r')
.then(res => {
console.log('seek:', res);
assertLikely(res);
});
setTimeout(() => console.groupEnd('Likely'), 1000);

View File

@ -0,0 +1,52 @@
<!doctype html>
<html>
<head>
<meta http-equiv="Content-Security-Policy" content="script-src https: http: 'unsafe-inline' 'unsafe-eval'">
<title>Wordpos in the browser</title>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css" />
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/languages/javascript.min.js"></script>
<script src="/dist/wordpos.min.js"></script>
<script>
let wordpos = window.wordpos = new WordPOS({
// preload: true,
dictPath: '/samples/self-hosted/dict',
profile: true,
// stopwords: false
});
</script>
<script src="../main.js" name="main"></script>
<style>
pre {
padding: 2em;
display: block;
}
</style>
</head>
<body>
<h1>Self-hosted WordPOS sample</h1>
Open console to see results.
<pre><code> </code></pre>
<script>
var el = document.querySelector('code');
if (fetch) {
fetch('../main.js')
.then(res => res.text())
.then(txt => {
el.innerText = txt;
window.hljs && hljs.initHighlightingOnLoad();
});
} else {
el.innerHTML = 'Open <a href="../main.js">main.js</a>.';
}
</script>
</body>
</html>

View File

@ -5,32 +5,27 @@
* exported JSON format with lemma as the key.
*/
const fs = require('fs');
const path = require('path');
const pkg = require('../package.json');
const wndb = require('wordnet-db');
let fs = require('fs');
let path = require('path');
let outPath = './dict'; // browser-use files
let testPath = './test/dict'; // mocha files in CJS format
let posExt = ['adj', 'adv', 'noun', 'verb'];
let dictRoot = require('wordnet-db').path; // source files
const outPath = './dict'; // browser-use files
const testPath = './test/dict'; // mocha files in CJS format
const testOpt = '--no-test'; // don't do test format
const posExt = ['adj', 'adv', 'noun', 'verb'];
const dictRoot = wndb.path; // source files
const copyright = require('./banner').copyright;
const fileTypes = {
data: true,
index: true
};
const [,, ...args] = process.argv;
if (!args.length || args.filter(p => p !== testOpt && !(p in fileTypes)).length) {
if (!args.length || args.filter(p => !(p in fileTypes)).length) {
console.log('Converts wordnet-db index & data files to JSON format for use in the browser.');
console.log(`\nUsage: makeJsonDict.js index|data [${testOpt}]`);
console.log('\nUsage: makeJsonDict.js index|data');
process.exit(1);
}
const doTest = !args.includes(testOpt);
if (!doTest) args.splice(args.indexOf(testOpt));
function uniq(arr) {
return arr.filter((v, i) => arr.indexOf(v) === i);
}
@ -47,7 +42,7 @@ const ensurePath = (path) => {
};
ensurePath(outPath);
if (doTest) ensurePath(testPath);
ensurePath(testPath);
function processFile(name) {
@ -80,11 +75,11 @@ function processFile(name) {
console.time(' write');
let text = JSON.stringify(obj);
fs.writeFileSync(path.resolve(outPath, name + '.' + pos + '.js'),
copyright + 'export default ' + text);
'export default ' + text);
// also write for mocha tests
if (doTest) fs.writeFileSync(path.resolve(testPath, name + '.' + pos + '.js'),
copyright + 'module.exports.default = ' + text);
fs.writeFileSync(path.resolve(testPath, name + '.' + pos + '.js'),
'module.exports.default = ' + text);
console.timeEnd(' write');
}

View File

@ -35,8 +35,6 @@ class BaseFile {
load() {
if (this.loadError) return Promise.reject(this.loadError);
if (this.loaded) return this.loaded;
this.options.debug && console.time('index load ' + this.posName);
let promise = isTest
@ -44,10 +42,9 @@ class BaseFile {
: ES6_IMPORT(`${this.filePath}`); // prevent parcel from clobbering dynamic import
this.options.debug && console.timeEnd('index load ' + this.posName)
return this.loaded = promise
return promise
.then(exports => {
this.file = exports.default;
return this;
this.file = exports.default
})
.catch(err => {
console.error(`Error loading "${this.type}" file ${this.filePath}.`, err);
@ -57,7 +54,7 @@ class BaseFile {
}
ready(fn, args) {
return this.load().then(res => fn && fn.apply(this, args) || res);
return this.load().then(() => fn.apply(this, args));
}
}

View File

@ -7,7 +7,7 @@
* Released under MIT license
*/
const { stopwords, prepText, makeStopwordString, flat } = require('../util');
const { stopwords, prepText, makeStopwordString } = require('../util');
const { is, get, getPOS, lookup, seek, lookupPOS } = require('../common');
const { randX, rand } = require('../rand');
const IndexFile = require('./indexFile');
@ -34,10 +34,6 @@ class WordPOS {
}
}
ready() {
return this.loaded || Promise.resolve();
}
initFiles() {
const keys = Object.keys(POS);
const loadOne = (Comp, pos) => new Comp(this.options.dictPath, POS[pos], this.options);
@ -48,7 +44,7 @@ class WordPOS {
this.dataFiles = reducer(loader(DataFile));
if (this.options.preload) {
this.loaded = this.preloadFiles(this.options.preload);
this.loaded = this.preloadIndexes(this.options.preload);
}
}
@ -65,28 +61,23 @@ class WordPOS {
* @param {string|Array} [pos] POS to load (default: all)
* @return {Promise.<index data>}
*/
preloadFiles(pos) {
let promise = this._preload(this.indexFiles, pos);
if (this.options.includeData) {
promise = Promise.all([].concat(promise, this._preload(this.dataFiles, pos)))
.then(res => flat(res));
}
return promise;
}
_preload(files, pos) {
let load = p => files[p].load();
preloadIndexes(pos) {
let file = this.indexFile[pos];
let load = p => file.load();
let promise;
if (!pos || pos === true) { // preload all
promise = Promise.all(Object.keys(POS).map(load));
}
else if (typeof pos === 'string' && files[pos]) {
else if (typeof pos === 'string' && file) {
promise = load(pos);
}
else if (pos instanceof Array) {
promise = Promise.all(pos.map(load));
promise = pos.forEach(pos => file && load(pos));
}
// TODO includeData
return promise || Promise.reject(new RangeError(`Unknown POS "${pos}" for preload.`));
}
@ -192,8 +183,6 @@ WordPOS.defaults = {
*/
WordPOS.stopwords = stopwords;
WordPOS.POS = POS;
// Export as CJS handled by Parcel, otherwise will get WordPOS.default
// if use: export default WordPOS;
module.exports = WordPOS;

View File

@ -160,12 +160,12 @@ function get(isFn) {
words = this.parse(text),
results = [],
self = this,
first = words[0];
first = words.shift();
// test one first & check for error, otherwise
// map is inoccuous to errors!
return exec(first)
.then(() => Promise.all(words.slice(1).map(exec)))
.then(() => Promise.all(words.map(exec)))
.then(done)
.catch(err => {
// done(); // callback signature is same! // FIXME
@ -224,13 +224,13 @@ function getPOS(text, callback) {
var args = [data];
var matches = uniq(flat(Object.values(data)));
data.rest = diff(words, matches);
profile && args.push(new Date() - start);
nextTick(callback, args);
return data;
}
function error(err) {
console.log('Error >>> ', err);
nextTick(callback, []);
throw err;
}

View File

@ -33,7 +33,7 @@ function normalize(word) {
}
function isStopword(stopwordsStr, word) {
return stopwordsStr.indexOf(' '+ word.toLowerCase() +' ') >= 0;
return stopwordsStr.indexOf(' '+word+' ') >= 0;
}
function tokenizer(str) {

View File

@ -1,237 +0,0 @@
/**
* browser_test.js
*
* test file for browser-specific functionality
*
* Usage:
* npm install mocha -g
* mocha browser_test.js -require @babel/register
*
* or
*
* npm test
*
* Copyright (c) 2012-2020 mooster@42at.com
* https://github.com/moos/wordpos
*
* Released under MIT license
*/
// used in src code to signal test mode
global.window = global.window || {};
global.window.__mocha = true;
var
chai = require('chai'),
_ = require('underscore'),
assert = chai.assert,
browser = process.browser = process.argv.includes('@babel/register'),
WordPOS = require('../src/wordpos'),
wordpos,
path = require('path'),
dictPath = browser ? path.resolve('./test/dict') : undefined;
const dictRequired = () => Object.keys(require.cache).filter(p => /\bdict\b/i.test(p));
if (!browser) {
throw new Error('Not in browser mode!');
}
chai.config.showDiff = true;
describe('options: preload', () => {
// clear require.cache before each test
beforeEach(() => {
dictRequired().forEach((m) => delete require.cache[m]);
});
it('preload: false', () => {
wordpos = new WordPOS({
preload: false,
dictPath: dictPath
});
return wordpos.ready().then(res => {
assert.equal(res, undefined);
assert.equal(dictRequired().length, 0);
});
});
it('preload: true', () => {
assert.equal(dictRequired().length, 0);
wordpos = new WordPOS({
preload: true,
dictPath: dictPath
});
return wordpos.ready().then(res => {
assert.equal(res.length, 4);
res.forEach(index => assert.equal(index.type, 'index'));
res.forEach(index => assert.equal(/\bdict.index\./.test(index.filePath), true));
let reqs = dictRequired();
assert.equal(reqs.length, 4);
reqs.forEach(req => assert.equal(/\bdict.index\./.test(req), true));
Object.values(WordPOS.POS).forEach(pos => assert.notEqual(reqs.join().indexOf(`index.${pos}.js`), -1));
});
});
it('preload: "r"', () => {
wordpos = new WordPOS({
preload: 'r',
dictPath: dictPath
});
return wordpos.ready().then(res => {
assert.equal(res.type, 'index');
assert.equal(res.posName, 'adv');
let reqs = dictRequired();
assert.equal(reqs.length, 1);
assert.equal(/index\.adv\.js/.test(reqs[0]), true);
});
});
it('preload: ["r","a"]', () => {
wordpos = new WordPOS({
preload: ['r','a'],
dictPath: dictPath
});
return wordpos.ready().then(res => {
assert.equal(res.length, 2);
// TODO -- order may NOT be always the same!!!
assert.equal(res[0].type, 'index');
assert.equal(res[0].posName, 'adv');
assert.equal(res[1].type, 'index');
assert.equal(res[1].posName, 'adj');
let reqs = dictRequired();
assert.equal(reqs.length, 2);
assert.equal(/index\.adv\.js/.test(reqs[0]), true);
assert.equal(/index\.adj\.js/.test(reqs[1]), true);
});
});
it('preload: "foo"', () => {
wordpos = new WordPOS({
preload: 'foo',
dictPath: dictPath
});
return wordpos.ready().then(res => {
// shouldn't get here
assert(false);
}).catch(err => {
assert.equal(err, 'RangeError: Unknown POS "foo" for preload.')
});
});
});
describe('options: preload with includeData', () => {
// clear require.cache before each test
beforeEach(() => {
dictRequired().forEach((m) => delete require.cache[m]);
});
it('preload: false', () => {
wordpos = new WordPOS({
preload: false,
includeData: true,
dictPath: dictPath
});
return wordpos.ready().then(res => {
assert.equal(res, undefined);
assert.equal(dictRequired().length, 0);
});
});
it('preload: true', () => {
assert.equal(dictRequired().length, 0);
wordpos = new WordPOS({
preload: true,
includeData: true,
dictPath: dictPath
});
return wordpos.ready().then(res => {
assert.equal(res.length, 8);
assert.equal(res.filter(m => m.type === 'index').length, 4);
assert.equal(res.filter(m => m.type === 'data').length, 4);
assert.equal(res.filter(m => /\bdict.index\./.test(m.filePath)).length, 4);
assert.equal(res.filter(m => /\bdict.data\./.test(m.filePath)).length, 4);
let reqs = dictRequired();
assert.equal(reqs.length, 8);
assert.equal(reqs.filter(m => /\bdict.index\./.test(m)).length, 4);
assert.equal(reqs.filter(m => /\bdict.data\./.test(m)).length, 4);
let reqsStr = reqs.join();
Object.values(WordPOS.POS).forEach(pos => {
assert.equal(reqs.filter(m => m.indexOf(`index.${pos}.js`) !== -1).length, 1);
assert.equal(reqs.filter(m => m.indexOf(`data.${pos}.js`) !== -1).length, 1);
});
});
});
it('preload: "r"', () => {
wordpos = new WordPOS({
preload: 'r',
includeData: true,
dictPath: dictPath
});
return wordpos.ready().then(res => {
assert.equal(res.length, 2);
assert.equal(res[0].type, 'index');
assert.equal(res[0].posName, 'adv');
assert.equal(res[1].type, 'data');
assert.equal(res[1].posName, 'adv');
let reqs = dictRequired();
assert.equal(reqs.length, 2);
assert.equal(/index\.adv\.js/.test(reqs[0]), true);
assert.equal(/data\.adv\.js/.test(reqs[1]), true);
});
});
it('preload: ["r","a"]', () => {
wordpos = new WordPOS({
preload: ['r','a'],
includeData: true,
dictPath: dictPath
});
return wordpos.ready().then(res => {
assert.equal(res.length, 4);
// TODO -- order may NOT be always the same!!!
assert.equal(res[0].type, 'index');
assert.equal(res[0].posName, 'adv');
assert.equal(res[1].type, 'index');
assert.equal(res[1].posName, 'adj');
assert.equal(res[2].type, 'data');
assert.equal(res[2].posName, 'adv');
assert.equal(res[3].type, 'data');
assert.equal(res[3].posName, 'adj');
let reqs = dictRequired();
assert.equal(reqs.length, 4);
assert.equal(/index\.adv\.js/.test(reqs[0]), true);
assert.equal(/index\.adj\.js/.test(reqs[1]), true);
assert.equal(/data\.adv\.js/.test(reqs[2]), true);
assert.equal(/data\.adj\.js/.test(reqs[3]), true);
});
});
it('preload: "foo"', () => {
wordpos = new WordPOS({
preload: 'foo',
includeData: true,
dictPath: dictPath
});
return wordpos.ready().then(res => {
// shouldn't get here
assert(false);
}).catch(err => {
assert.equal(err, 'RangeError: Unknown POS "foo" for preload.')
});
});
});

View File

@ -1,171 +0,0 @@
/**
* cli_test.js
*
* Test CLI script
*
* Copyright (c) 2012-2020 mooster@42at.com
* https://github.com/moos/wordpos
*
* Released under MIT license
*/
var
chai = require('chai'),
assert = chai.assert,
exec = require('child_process').exec,
testStr = 'The angry bear chased the frightened little squirrel',
cmd = 'node ' + __dirname + '/../bin/wordpos-cli ',
gDone;
// compare two list of words independent of word order
function cmp(act, exp) {
assert.equal(act.trim().split(' ').sort().join(), exp.split(' ').sort().join());
}
describe('CLI tests', function() {
this.slow(300);
describe('Test CLI get', function() {
it('should get nouns', done => {
exec(cmd + '-n -b get ' + testStr, (error, stdout) => {
assert.isNull(error);
cmp(stdout, 'bear chased squirrel little');
done();
});
});
it('should get adjectives', done => {
exec(cmd + '-a -b get ' + testStr, (error, stdout) => {
assert.isNull(error);
cmp(stdout, 'angry frightened little');
done();
});
});
it('should get verbs', done => {
exec(cmd + '-v -b get ' + testStr, (error, stdout) => {
assert.isNull(error);
cmp(stdout, 'bear');
done();
});
});
it('should get adverbs', done => {
exec(cmd + '-r -b get ' + testStr, (error, stdout) => {
assert.isNull(error);
cmp(stdout, 'little');
done();
});
});
it('should get POS', done => {
exec(cmd + '-b get ' + testStr, (error, stdout, stderr) => {
assert.isNull(error);
cmp(stdout, 'bear chased squirrel little \n' +
'angry frightened little \n' +
'bear \n' +
'little');
done();
});
});
it('should get POS (single word)', done => {
exec(cmd + '-b get angry', (error, stdout, stderr) => {
assert.isNull(error);
assert.equal(stdout.trim(), 'angry');
done();
});
});
it('should get counts', done => {
exec(cmd + '-b -c get ' + testStr, (error, stdout, stderr) => {
assert.isNull(error);
assert.equal(stdout.trim(), '4 3 1 1 6');
done();
});
});
});
describe('Test CLI def', function() {
it('should define word', done => {
exec(cmd + 'def angry', (error, stdout) => {
assert.isNull(error);
assert(stdout.trim().startsWith('angry (def)\n a: feeling or showing anger;'));
done();
});
});
});
describe('Test CLI syn', function() {
it('should get synonyms', done => {
exec(cmd + 'syn angry', (error, stdout) => {
assert.isNull(error);
assert(stdout.trim().startsWith('angry (syn)\n a: angry'));
done();
});
});
});
describe('Test CLI exp', function() {
it('should get exmpale', done => {
exec(cmd + 'exp angry', (error, stdout) => {
assert.isNull(error);
assert(stdout.trim().startsWith('angry (exp)\n a: "angry at the weather"'));
done();
});
});
});
describe('Test CLI seek', function() {
it('should seek by offset', done => {
exec(cmd + '-a seek 00114629', (error, stdout) => {
assert.isNull(error);
assert(/lemma/.test(stdout), 'found lemma');
assert(/angry/.test(stdout), 'found angry');
done();
});
});
});
describe('Test CLI rand', function() {
it('should get a random word', done => {
exec(cmd + 'rand', (error, stdout) => {
assert.isNull(error);
assert(stdout.length > 1, 'got answer')
done();
});
});
it('should get a random word starting with...', done => {
exec(cmd + '-b rand angr', (error, stdout) => {
assert.isNull(error);
assert.equal(stdout.substr(0,4), 'angr');
done();
});
});
});
describe('Test CLI parse', function() {
it('should parse input', done => {
exec(cmd + '-b parse ' + testStr, (error, stdout) => {
assert.isNull(error);
assert.equal(stdout.trim(), 'angry bear chased frightened little squirrel');
done();
});
});
});
describe('Test CLI stopwords', function() {
let WordPOS = require('../src/wordpos');
it('should list stopwords', done => {
exec(cmd + '-j stopwords ' + testStr, (error, stdout) => {
assert.isNull(error);
assert.equal(stdout.trim(), JSON.stringify(WordPOS.stopwords));
done();
});
});
});
});

View File

@ -56,7 +56,7 @@ var str = "The angry bear chased the frightened little squirrel",
verbs: [ 'bear' ],
adjectives: [ 'little', 'angry', 'frightened' ],
adverbs: [ 'little' ],
rest: []
rest: [ 'The' ]
},
garble = 'garblegarble', // expect not to find word
offset = 1285602;

View File

@ -1,22 +0,0 @@
const pkg = require('../package.json');
const wndb = require('wordnet-db');
const copyright = `/*!
Copyright (c) 2012-2020 mooster@42at.com
https://github.com/moos/wordpos (The MIT License)
Princeton University "About WordNet." WordNet (https://wordnet.princeton.edu/). Princeton University. 2010.
${pkg.name} v${pkg.version}
wordnet-db v${wndb.libVersion}
WordNet DB version ${wndb.version}
*/
`;
module.exports = {
copyright
};
if (require.main === module) {
process.stdout.write(copyright);
}