diff --git a/README.md b/README.md index f304a18..6c6e6d1 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,6 @@ wordpos wordpos is a set of part-of-speech (POS) utilities for Node.js using [natural's](http://github.com/NaturalNode/natural) WordNet module. -*Update*: get [random](#randx) word(s). ## Installation @@ -237,11 +236,11 @@ Access to the [WNdb](https://github.com/moos/WNdb) object containing the diction Access to underlying [natural](http://github.com/NaturalNode/natural) module. For example, WordPOS.natural.stopwords is the list of stopwords. -### Fast Index +## Fast Index Version 0.1.4 introduces `fastIndex` option. This uses a secondary index on the index files and is much faster. It is on by default. Secondary index files are generated at install time and placed in the same directory as WNdb.path. Details can be found in tools/stat.js. -See blog article [Optimizing WordPos](http://blog.42at.com/optimizing-wordpos). +Fast index improves performance **30x** over Natural's native methods. See blog article [Optimizing WordPos](http://blog.42at.com/optimizing-wordpos). ## Command-line: CLI @@ -256,7 +255,7 @@ Note: `wordpos-bench.js` requires a [forked uubench](https://github.com/moos/uub node wordpos-bench.js -512-word corpus (< v0.1.4) : +512-word corpus (< v0.1.4, comparable to Natural) : ``` getPOS : 0 ops/s { iterations: 1, elapsed: 9039 } getNouns : 0 ops/s { iterations: 1, elapsed: 2347 } @@ -280,6 +279,12 @@ done in 1375 msecs ## Changes +0.1.14 +- Added `syn` (synonym) and `exp` (example) CLI commands. +- Fixed `rand` CLI command when no start word given. +- Removed -N, --num CLI option. Use `wordpos rand [N]` to get N random numbers. +- Changed CLI option -s to -w (include stopwords). + 0.1.13 - Fix crlf issue for command-line script diff --git a/bin/README.md b/bin/README.md index 53cedb2..456d2de 100644 --- a/bin/README.md +++ b/bin/README.md @@ -1,6 +1,11 @@ -wordpos +wordpos CLI ======= +## Command-line + +Version 0.1.6 introduces the command-line interface (./bin/wordpos-cli.js), available as 'wordpos' if installed globally +`npm install -g wordpos`, otherwise as `node_modules/.bin/wordpos` if installed without the -g. + ## Usage: ```bash $ wordpos @@ -9,40 +14,40 @@ $ wordpos Commands: - get get list of words for particular POS + get get list of words for particular POS - def lookup definitions + def lookup definitions (use -b for brief definition, less examples) + + syn lookup synonyms + + exp lookup examples - rand get random words (optionally starting with 'word' ...) + rand get random words (starting with [word]). If first arg is a number, returns + that many random words. Valid options are -b, -f, -j, -s, -i. - parse show parsed words, deduped and less stopwords + parse show parsed words, deduped and less stopwords stopwords show list of stopwords (valid options are -b and -j) Options: - -h, --help output usage information - -V, --version output the version number - -n, --noun Get nouns - -a, --adj Get adjectives - -v, --verb Get verbs - -r, --adv Get adverbs - -c, --count get counts only (noun, adj, verb, adv, total parsed words) - -b, --brief brief output (all on one line, no headers) - -f, --full full result object - -j, --json full result object as JSON - -i, --file input file - -s, --withStopwords include stopwords (default: stopwords are excluded) - -N, --num number of random words to get + -h, --help output usage information + -V, --version output the version number + -n, --noun get nouns only + -a, --adj get adjectives only + -v, --verb get verbs only + -r, --adv get adverbs only + -c, --count get counts only, used with get + -b, --brief brief output (all on one line, no headers) + -f, --full full result object + -j, --json full result object as JSON string + -i, --file input file + -w, --withStopwords include stopwords (default: stopwords are excluded) ``` -## Command-line: CLI - -Version 0.1.6 introduces the command-line interface (./bin/wordpos-cli.js), available as 'wordpos' if installed globally -`npm install -g wordpos`, otherwise as `node_modules/.bin/wordpos` if installed without the -g. ### Examples: - +Get part-of-speech: ```bash $ wordpos get The angry bear chased the frightened little squirrel # Noun 4: @@ -62,29 +67,47 @@ bear # Adverb 1: little ``` -Just the nouns, brief output: +#### Just the nouns, brief output: ```bash $ wordpos get --noun -b The angry bear chased the frightened little squirrel bear chased little squirrel ``` -Just the counts: (nouns, adjectives, verbs, adverbs, total parsed words) +#### Just the counts: ```bash $ wordpos get -c The angry bear chased the frightened little squirrel +# Noun Adjective Verb Adverb Parsed 4 3 1 1 7 ``` -Just the adjective count: (0, adjectives, 0, 0, total parsed words) +#### Just the adjective count: ```bash $ wordpos get --adj -c The angry bear chased the frightened little squirrel +# Noun Adjective Verb Adverb Parsed 0 3 0 0 7 ``` -Get definitions: +#### Get definitions: ```bash $ wordpos def git -git +git (def) n: a person who is deemed to be despicable or contemptible; "only a rotter would do that"; "kill the rat"; "throw the bum out"; "you cowardly little pukes!"; "the British call a contemptible persona `git'" ``` -Get full result object: +#### Brief definition: (excludes examples) +```bash +$ wordpos def -b git +git (def) + n: a person who is deemed to be despicable or contemptible +``` +#### Multiple definitions: +```bash +$ wordpos def git gat +git (def) + n: a person who is deemed to be despicable or contemptible + +gat (def) + n: a gangster's pistol +``` + +#### Get full result object: ```bash $ wordpos def git -f { git: @@ -100,7 +123,8 @@ $ wordpos def git -f "; "kill the rat"; "throw the bum out"; "you cowardly little pukes!"; "the British call a contemptib le person a `git\'" ' } ] } ``` -As JSON: + +#### As JSON: ```bash $ wordpos def git -j {"git":[{"synsetOffset":10539715,"lexFilenum":18,"pos":"n","wCnt":0,"lemma":"rotter","synonyms":[]," @@ -109,30 +133,82 @@ would do that\"; \"kill the rat\"; \"throw the bum out\"; \"you cowardly little call a contemptible person a `git'\" "}]} ``` -Get random words: +#### Get synonyms: +``` +$ wordpos syn git gat +git (syn) + n: rotter, dirty_dog, rat, skunk, stinker, stinkpot, bum, puke, crumb, lowlife, scum_bag, so-and-so, git + +gat (syn) + n: gat, rod +``` + +#### Get examples: +``` +$ wordpos syn git +git (exp) + n: "only a rotter would do that", "kill the rat", "throw the bum out", "you cowardly little pukes!", "the British call a contemptible person a `git'" +``` + +#### Get random words: ```bash $ wordpos rand # 1: hopelessly - -$ wordpos rand -N 2 foot -# foot 2: +``` +Get 5 random words: +```sh +$ wordpos rand 5 +# 5: +bemire +swan +dignify +jaunt +daydream +``` +Get a word staring with "foot": +```sh +$ wordpos rand foot +# foot 1: footprint -footlights - -$ wordpos rand -N 2 foot hand -# foot 2: +``` +Get 3 random words string with "foot" and "hand" each: +```sh +$ wordpos rand 3 foot hand +# foot 3: footlocker footmark +footwall -# hand 2: +# hand 3: hand-hewn handstitched - +handicap +``` +Get a random adjective: +```sh +$ wordpos rand --adj +# Adjective 1: +soaked +``` +Get a random adjective starting with "foot" +```sh $ wordpos rand --adj foot # foot 1: foot-shaped +``` +#### Stopwords +List stopwords: +```bash $ wordpos stopwords -b about after all also am an and another any are as at be because ... ``` + +Get definition of a stopword: +```bash +$ wordpos def both -w +both (def) + s: (used with count nouns) two considered together; the two; "both girls are pretty" + +``` diff --git a/bin/wordpos-cli.js b/bin/wordpos-cli.js index 7a3d140..7cca429 100644 --- a/bin/wordpos-cli.js +++ b/bin/wordpos-cli.js @@ -5,7 +5,7 @@ * command-line interface to wordpos * * Usage: - * wordpos [options] + * wordpos [options] * * Copyright (c) 2012 mooster@42at.com * https://github.com/moos/wordpos @@ -18,24 +18,26 @@ var program = require('commander'), fs = require('fs'), POS = {noun:'Noun', adj:'Adjective', verb:'Verb', adv:'Adverb'}, version = JSON.parse(fs.readFileSync(__dirname + '/../package.json', 'utf8')).version, + rawCmd = '', + RAND_PLACEHOLDER = '__', nWords; program .version(version) .usage(' [options] [word ... | -i | ]') - .option('-n, --noun', 'Get nouns') - .option('-a, --adj', 'Get adjectives') - .option('-v, --verb', 'Get verbs') - .option('-r, --adv', 'Get adverbs') + .option('-n, --noun', 'get nouns only') + .option('-a, --adj', 'get adjectives only') + .option('-v, --verb', 'get verbs only') + .option('-r, --adv', 'get adverbs only') - .option('-c, --count', 'count only (noun, adj, verb, adv, total parsed words)') + .option('-c, --count', 'get counts only, used with get') .option('-b, --brief', 'brief output (all on one line, no headers)') .option('-f, --full', 'full results object') - .option('-j, --json', 'full results object as JSON') + .option('-j, --json', 'full results object as JSON string') .option('-i, --file ', 'input file') - .option('-s, --withStopwords', 'include stopwords (default: stopwords are excluded)') - .option('-N, --num ', 'number of random words to return') + .option('-w, --withStopwords', 'include stopwords (default: stopwords are excluded)') +// .option('-N, --num ', 'number of random words to return') ; program.command('get') @@ -43,15 +45,50 @@ program.command('get') .action(exec); program.command('def') - .description('lookup definitions') + .description('lookup definitions (use -b for brief definition, less examples)') .action(function(){ + rawCmd = 'def'; + _.last(arguments)._name = 'lookup'; + exec.apply(this, arguments); + }); + +program.command('syn') + .description('lookup synonyms') + .action(function(){ + rawCmd = 'syn'; + _.last(arguments)._name = 'lookup'; + exec.apply(this, arguments); + }); + +program.command('exp') + .description('lookup examples') + .action(function(){ + rawCmd = 'exp'; _.last(arguments)._name = 'lookup'; exec.apply(this, arguments); }); program.command('rand') - .description('get random words (starting with , optionally)') - .action(exec); + .description('get random words (starting with [word]). If first arg is a number, returns ' + + 'that many random words. Valid options are -b, -f, -j, -s, -i.') + .action(function(/* arg, ..., program.command */){ + var args = _.toArray(arguments), + num = args.length > 1 && Number(args[0]); + delete program.count; + + // first arg is count? + if (num) { + args.shift(); + program.num = num; + } + + // no startsWith given, add a placeholder + if (args.length === 1){ + args.unshift(RAND_PLACEHOLDER); + } + + exec.apply(this, args); + }); program.command('parse') .description('show parsed words, deduped and less stopwords') @@ -61,6 +98,7 @@ program.command('stopwords') .description('show list of stopwords (valid options are -b and -j)') .action(function(){ cmd = _.last(arguments)._name; + rawCmd = rawCmd || cmd; var stopwords = WordPos.natural.stopwords; if (program.json) @@ -83,6 +121,7 @@ if (!cmd) console.log(program.helpInformation()); function exec(/* args, ..., program.command */){ var args = _.initial(arguments); cmd = _.last(arguments)._name; + rawCmd = rawCmd || cmd; if (program.file) { fs.readFile(program.file, 'utf8', function(err, data){ @@ -150,6 +189,7 @@ function run(data) { if (cmd == 'get') { wordpos[method](words, cb); } else if (cmd == 'rand') { + if (words[0] === RAND_PLACEHOLDER) words[0] = ''; words.forEach(function(word){ wordpos[method]({startsWith: word, count: program.num || 1}, cb); }); @@ -164,9 +204,10 @@ function run(data) { function output(results) { var str; if (program.count && cmd != 'lookup') { - str = (cmd == 'get' && _.reduce(POS, function(memo, v){ + var label = program.brief ? '' : _.flatten(['#', _.values(POS), 'Parsed\n']).join(' '); + str = (cmd == 'get' && (label + _.reduce(POS, function(memo, v){ return memo + ((results[v] && results[v].length) || 0) +" "; - },'')) + nWords; + },''))) + nWords; } else { str = sprint(results); } @@ -184,7 +225,7 @@ function sprint(results) { switch (cmd) { case 'lookup': return _.reduce(results, function(memo, v, k){ - return memo + (v.length && (k +"\n"+ print_def(v) +"\n") || ''); + return memo + (v.length && util.format('%s (%s)\n%s\n', k, rawCmd, print_def(v)) || ''); }, ''); default: return _.reduce(results, function(memo, v, k){ @@ -194,8 +235,18 @@ function sprint(results) { } function print_def(defs) { + var proc = { + def: _.property(program.brief ? 'def' : 'gloss'), + syn: function(res){ + return res.synonyms.join(', '); + }, + exp: function(res) { + return '"' + res.exp.join('", "') + '"'; + } + }[ rawCmd ]; + return _.reduce(defs, function(memo, v, k){ - return memo + util.format(' %s: %s\n', v.pos, v.gloss); + return memo + util.format(' %s: %s\n', v.pos, proc(v)); },''); } } diff --git a/package.json b/package.json index f629c78..ec184bb 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,7 @@ "author": "Moos ", "keywords": ["natural", "language", "wordnet", "adjectives", "nouns", "adverbs", "verbs"], "description": "wordpos is a set of part-of-speech utilities for Node.js using natural's WordNet module.", - "version": "0.1.13", + "version": "0.1.14", "homepage": "https://github.com/moos/wordpos", "engines": { "node": ">=0.6"