fix stopwords with CLI
This commit is contained in:
parent
946147d05f
commit
cb884d2dfa
124
README.md
124
README.md
|
@ -3,7 +3,7 @@ wordpos
|
|||
|
||||
wordpos is a set of part-of-speech (POS) utilities for Node.js using [natural's](http://github.com/NaturalNode/natural) WordNet module.
|
||||
|
||||
*Update*: New version 0.1.10 - get random word(s).
|
||||
*Update*: get random word(s).
|
||||
|
||||
## Quick usage
|
||||
Command-line:
|
||||
|
@ -50,6 +50,7 @@ Note: `wordpos-bench.js` requires a [forked uubench](https://github.com/moos/uub
|
|||
To run spec:
|
||||
|
||||
npm install jasmine-node -g
|
||||
cd spec
|
||||
jasmine-node wordpos_spec.js --verbose
|
||||
jasmine-node validate_spec.js --verbose
|
||||
|
||||
|
@ -116,7 +117,7 @@ If you're only interested in a certain POS (say, adjectives), using the particul
|
|||
than getPOS() which looks up the word in all index files. [stopwords] (https://github.com/NaturalNode/natural/blob/master/lib/natural/util/stopwords.js)
|
||||
are stripped out from text before lookup.
|
||||
|
||||
If text is an array, all words are looked-up -- no deduplication, stopword filter or tokenization is applied.
|
||||
If text is an *array*, all words are looked-up -- no deduplication, stopword filter or tokenization is applied.
|
||||
|
||||
getX() functions return the number of parsed words that will be looked up (less duplicates and stopwords).
|
||||
|
||||
|
@ -298,105 +299,11 @@ See blog article [Optimizing WordPos](http://blog.42at.com/optimizing-wordpos).
|
|||
|
||||
## Command-line: CLI
|
||||
|
||||
Version 0.1.6 introduces the command-line interface (./bin/wordpos-cli.js), available as 'wordpos' if installed globally
|
||||
"npm install wordpos -g", otherwise as 'node_modules/.bin/wordpos' if installed without the -g.
|
||||
|
||||
```bash
|
||||
$ wordpos get The angry bear chased the frightened little squirrel
|
||||
# Noun 4:
|
||||
bear
|
||||
chased
|
||||
little
|
||||
squirrel
|
||||
|
||||
# Adjective 3:
|
||||
angry
|
||||
frightened
|
||||
little
|
||||
|
||||
# Verb 1:
|
||||
bear
|
||||
|
||||
# Adverb 1:
|
||||
little
|
||||
```
|
||||
Just the nouns, brief output:
|
||||
```bash
|
||||
$ wordpos get --noun -b The angry bear chased the frightened little squirrel
|
||||
bear chased little squirrel
|
||||
```
|
||||
Just the counts: (nouns, adjectives, verbs, adverbs, total parsed words)
|
||||
```bash
|
||||
$ wordpos get -c The angry bear chased the frightened little squirrel
|
||||
4 3 1 1 7
|
||||
```
|
||||
Just the adjective count: (0, adjectives, 0, 0, total parsed words)
|
||||
```bash
|
||||
$ wordpos get --adj -c The angry bear chased the frightened little squirrel
|
||||
0 3 0 0 7
|
||||
```
|
||||
|
||||
Get definitions:
|
||||
```bash
|
||||
$ wordpos def git
|
||||
git
|
||||
n: a person who is deemed to be despicable or contemptible; "only a rotter would do that"; "kill the rat"; "throw the bum out"; "you cowardly little pukes!"; "the British call a contemptible persona `git'"
|
||||
```
|
||||
Get full result object:
|
||||
```bash
|
||||
$ wordpos def git -f
|
||||
{ git:
|
||||
[ { synsetOffset: 10539715,
|
||||
lexFilenum: 18,
|
||||
pos: 'n',
|
||||
wCnt: 0,
|
||||
lemma: 'rotter',
|
||||
synonyms: [],
|
||||
lexId: '0',
|
||||
ptrs: [],
|
||||
gloss: 'a person who is deemed to be despicable or contemptible; "only a rotter would do that
|
||||
"; "kill the rat"; "throw the bum out"; "you cowardly little pukes!"; "the British call a contemptib
|
||||
le person a `git\'" ' } ] }
|
||||
```
|
||||
As JSON:
|
||||
```bash
|
||||
$ wordpos def git -j
|
||||
{"git":[{"synsetOffset":10539715,"lexFilenum":18,"pos":"n","wCnt":0,"lemma":"rotter","synonyms":[],"
|
||||
lexId":"0","ptrs":[],"gloss":"a person who is deemed to be despicable or contemptible; \"only a rotter
|
||||
would do that\"; \"kill the rat\"; \"throw the bum out\"; \"you cowardly little pukes!\"; \"the British
|
||||
call a contemptible person a `git'\" "}]}
|
||||
```
|
||||
|
||||
Get random words:
|
||||
```bash
|
||||
$ wordpos rand
|
||||
# 1:
|
||||
hopelessly
|
||||
|
||||
$ wordpos rand -N 2 foot
|
||||
# foot 2:
|
||||
footprint
|
||||
footlights
|
||||
|
||||
$ wordpos rand -N 2 foot hand
|
||||
# foot 2:
|
||||
footlocker
|
||||
footmark
|
||||
|
||||
# hand 2:
|
||||
hand-hewn
|
||||
handstitched
|
||||
|
||||
$ wordpos rand --adj foot
|
||||
# foot 1:
|
||||
foot-shaped
|
||||
```
|
||||
|
||||
Usage:
|
||||
```bash
|
||||
$ wordpos
|
||||
|
||||
Usage: wordpos-cli.js [options] <command> [word ... | -i <file> | <stdin>]
|
||||
Usage: wordpos [options] <command> [word ... | -i <file> | <stdin>]
|
||||
|
||||
Commands:
|
||||
|
||||
|
@ -404,9 +311,11 @@ $ wordpos
|
|||
|
||||
def lookup definitions
|
||||
|
||||
rand get random words (optionally starting with 'word' ...)
|
||||
|
||||
parse show parsed words, deduped and less stopwords
|
||||
|
||||
rand get random words (optionally starting with 'word' ...)
|
||||
stopwords show list of stopwords (valid options are -b and -j)
|
||||
|
||||
Options:
|
||||
|
||||
|
@ -421,10 +330,12 @@ $ wordpos
|
|||
-f, --full full results object
|
||||
-j, --json full results object as JSON
|
||||
-i, --file <file> input file
|
||||
-s, --stopwords include stopwords
|
||||
-s, --withStopwords include stopwords (default: stopwords are excluded)
|
||||
-N, --num <num> number of random words to get
|
||||
```
|
||||
|
||||
For CLI examples, see [bin/README](bin/README.md).
|
||||
|
||||
## Benchmark
|
||||
|
||||
node wordpos-bench.js
|
||||
|
@ -452,6 +363,21 @@ done in 1375 msecs
|
|||
|
||||
220 words are looked-up (less stopwords and duplicates) on a win7/64-bit/dual-core/3GHz. getPOS() is slowest as it searches through all four index files.
|
||||
|
||||
## Changes
|
||||
|
||||
v0.1.11
|
||||
- fix stopwords not getting excluded when running with CLI
|
||||
- added 'stopwords' CLI *command* to show list of stopwords
|
||||
- CLI *option* --stopword now renamed to --withStopwords
|
||||
|
||||
v0.1.10
|
||||
- rand functionality added
|
||||
|
||||
v0.1.6
|
||||
- added command line tool
|
||||
|
||||
v0.1.4
|
||||
- added fast index
|
||||
|
||||
License
|
||||
-------
|
||||
|
|
|
@ -0,0 +1,136 @@
|
|||
wordpos
|
||||
=======
|
||||
|
||||
## Command-line: CLI
|
||||
|
||||
Version 0.1.6 introduces the command-line interface (./bin/wordpos-cli.js), available as 'wordpos' if installed globally
|
||||
"npm install wordpos -g", otherwise as 'node_modules/.bin/wordpos' if installed without the -g.
|
||||
|
||||
```bash
|
||||
$ wordpos get The angry bear chased the frightened little squirrel
|
||||
# Noun 4:
|
||||
bear
|
||||
chased
|
||||
little
|
||||
squirrel
|
||||
|
||||
# Adjective 3:
|
||||
angry
|
||||
frightened
|
||||
little
|
||||
|
||||
# Verb 1:
|
||||
bear
|
||||
|
||||
# Adverb 1:
|
||||
little
|
||||
```
|
||||
Just the nouns, brief output:
|
||||
```bash
|
||||
$ wordpos get --noun -b The angry bear chased the frightened little squirrel
|
||||
bear chased little squirrel
|
||||
```
|
||||
Just the counts: (nouns, adjectives, verbs, adverbs, total parsed words)
|
||||
```bash
|
||||
$ wordpos get -c The angry bear chased the frightened little squirrel
|
||||
4 3 1 1 7
|
||||
```
|
||||
Just the adjective count: (0, adjectives, 0, 0, total parsed words)
|
||||
```bash
|
||||
$ wordpos get --adj -c The angry bear chased the frightened little squirrel
|
||||
0 3 0 0 7
|
||||
```
|
||||
|
||||
Get definitions:
|
||||
```bash
|
||||
$ wordpos def git
|
||||
git
|
||||
n: a person who is deemed to be despicable or contemptible; "only a rotter would do that"; "kill the rat"; "throw the bum out"; "you cowardly little pukes!"; "the British call a contemptible persona `git'"
|
||||
```
|
||||
Get full result object:
|
||||
```bash
|
||||
$ wordpos def git -f
|
||||
{ git:
|
||||
[ { synsetOffset: 10539715,
|
||||
lexFilenum: 18,
|
||||
pos: 'n',
|
||||
wCnt: 0,
|
||||
lemma: 'rotter',
|
||||
synonyms: [],
|
||||
lexId: '0',
|
||||
ptrs: [],
|
||||
gloss: 'a person who is deemed to be despicable or contemptible; "only a rotter would do that
|
||||
"; "kill the rat"; "throw the bum out"; "you cowardly little pukes!"; "the British call a contemptib
|
||||
le person a `git\'" ' } ] }
|
||||
```
|
||||
As JSON:
|
||||
```bash
|
||||
$ wordpos def git -j
|
||||
{"git":[{"synsetOffset":10539715,"lexFilenum":18,"pos":"n","wCnt":0,"lemma":"rotter","synonyms":[],"
|
||||
lexId":"0","ptrs":[],"gloss":"a person who is deemed to be despicable or contemptible; \"only a rotter
|
||||
would do that\"; \"kill the rat\"; \"throw the bum out\"; \"you cowardly little pukes!\"; \"the British
|
||||
call a contemptible person a `git'\" "}]}
|
||||
```
|
||||
|
||||
Get random words:
|
||||
```bash
|
||||
$ wordpos rand
|
||||
# 1:
|
||||
hopelessly
|
||||
|
||||
$ wordpos rand -N 2 foot
|
||||
# foot 2:
|
||||
footprint
|
||||
footlights
|
||||
|
||||
$ wordpos rand -N 2 foot hand
|
||||
# foot 2:
|
||||
footlocker
|
||||
footmark
|
||||
|
||||
# hand 2:
|
||||
hand-hewn
|
||||
handstitched
|
||||
|
||||
$ wordpos rand --adj foot
|
||||
# foot 1:
|
||||
foot-shaped
|
||||
|
||||
$ wordpos stopwords -b
|
||||
about after all also am an and another any are as at be because ...
|
||||
```
|
||||
|
||||
## Usage:
|
||||
```bash
|
||||
$ wordpos
|
||||
|
||||
Usage: wordpos-cli.js [options] <command> [word ... | -i <file> | <stdin>]
|
||||
|
||||
Commands:
|
||||
|
||||
get get list of words for particular POS
|
||||
|
||||
def lookup definitions
|
||||
|
||||
rand get random words (optionally starting with 'word' ...)
|
||||
|
||||
parse show parsed words, deduped and less stopwords
|
||||
|
||||
stopwords show list of stopwords (valid options are -b and -j)
|
||||
|
||||
Options:
|
||||
|
||||
-h, --help output usage information
|
||||
-V, --version output the version number
|
||||
-n, --noun Get nouns
|
||||
-a, --adj Get adjectives
|
||||
-v, --verb Get verbs
|
||||
-r, --adv Get adverbs
|
||||
-c, --count get counts only (noun, adj, verb, adv, total parsed words)
|
||||
-b, --brief brief output (all on one line, no headers)
|
||||
-f, --full full results object
|
||||
-j, --json full results object as JSON
|
||||
-i, --file <file> input file
|
||||
-s, --withStopwords include stopwords (default: stopwords are excluded)
|
||||
-N, --num <num> number of random words to get
|
||||
```
|
|
@ -34,13 +34,13 @@ program
|
|||
.option('-f, --full', 'full results object')
|
||||
.option('-j, --json', 'full results object as JSON')
|
||||
.option('-i, --file <file>', 'input file')
|
||||
.option('-s, --stopwords', 'include stopwords')
|
||||
.option('-s, --withStopwords', 'include stopwords (default: stopwords are excluded)')
|
||||
.option('-N, --num <num>', 'number of random words to return')
|
||||
;
|
||||
|
||||
program.command('get')
|
||||
.description('get list of words for particular POS')
|
||||
.action(exec);
|
||||
.description('get list of words for particular POS')
|
||||
.action(exec);
|
||||
|
||||
program.command('def')
|
||||
.description('lookup definitions')
|
||||
|
@ -49,14 +49,26 @@ program.command('def')
|
|||
exec.apply(this, arguments);
|
||||
});
|
||||
|
||||
program.command('parse')
|
||||
.description('show parsed words, deduped and less stopwords')
|
||||
.action(exec);
|
||||
|
||||
program.command('rand')
|
||||
.description('get random words (starting with word, optionally)')
|
||||
.description('get random words (starting with <word>, optionally)')
|
||||
.action(exec);
|
||||
|
||||
program.command('parse')
|
||||
.description('show parsed words, deduped and less stopwords')
|
||||
.action(exec);
|
||||
|
||||
program.command('stopwords')
|
||||
.description('show list of stopwords (valid options are -b and -j)')
|
||||
.action(function(){
|
||||
cmd = _.last(arguments)._name;
|
||||
var stopwords = WordPos.natural.stopwords;
|
||||
|
||||
if (program.json)
|
||||
output(stopwords);
|
||||
else
|
||||
console.log(stopwords.join(program.brief ? ' ' : '\n'))
|
||||
});
|
||||
|
||||
var
|
||||
WordPos = require('../src/wordpos'),
|
||||
util = require('util'),
|
||||
|
@ -109,9 +121,9 @@ function optToFn() {
|
|||
|
||||
function run(data) {
|
||||
var
|
||||
opts = {stopwords: !program.stopwords},
|
||||
opts = {stopwords: !program.withStopwords},
|
||||
wordpos = new WordPos(opts),
|
||||
words = wordpos.parse(data.split(' ')), // make array
|
||||
words = wordpos.parse(data),
|
||||
fns = optToFn(),
|
||||
plural = (cmd=='get' ? 's':''),
|
||||
results = {},
|
||||
|
@ -143,7 +155,7 @@ function run(data) {
|
|||
});
|
||||
} else {
|
||||
words.forEach(function(word){
|
||||
wordpos[method](word, cb);
|
||||
wordpos [method](word, cb);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
"author": "Moos <mooster@42at.com>",
|
||||
"keywords": ["natural", "language", "wordnet", "adjectives", "nouns", "adverbs", "verbs"],
|
||||
"description": "wordpos is a set of part-of-speech utilities for Node.js using natural's WordNet module.",
|
||||
"version": "0.1.10",
|
||||
"version": "0.1.11",
|
||||
"homepage": "https://github.com/moos/wordpos",
|
||||
"engines": {
|
||||
"node": ">=0.6"
|
||||
|
|
Loading…
Reference in New Issue