diff --git a/README.md b/README.md index 7491a67..1d38edc 100644 --- a/README.md +++ b/README.md @@ -211,12 +211,114 @@ To override, pass an options hash to the constructor. With the `profile` option, // true 'fast' 29 ``` -## Fast Index +### Fast Index Version 0.1.4 introduces `fastIndex` option. This uses a secondary index on the index files and is much faster. It is on by default. Secondary index files are generated at install time and placed in the same directory as WNdb.path. Details can be found in tools/stat.js. See blog article [Optimizing WordPos](http://blog.42at.com/optimizing-wordpos). +## CLI + +Version 0.1.6 introduces the command-line interface (./bin/wordpos-cli.js), available as 'wordpos' when using npm install. + +```bash +$ wordpos get The angry bear chased the frightened little squirrel +# Noun 4: +bear +chased +little +squirrel + +# Adjective 3: +angry +frightened +little + +# Verb 1: +bear + +# Adverb 1: +little +``` +Just the nouns, brief output: +```bash +$ wordpos get --noun -b The angry bear chased the frightened little squirrel +bear chased little squirrel +``` +Just the counts: (nouns, adjectives, verbs, adverbs, total parsed words) +```bash +$ wordpos get -c The angry bear chased the frightened little squirrel +4 3 1 1 7 +``` +Just the adjective count: (nouns, adjectives, verbs, adverbs, total parsed words) +```bash +$ wordpos get --adj -c The angry bear chased the frightened little squirrel +0 3 0 0 7 +``` + +Get definitions: +```bash +$ wordpos def git +git + n: a person who is deemed to be despicable or contemptible; "only a rotter would do that"; "kill the rat"; "throw the bum out"; "you cowardly little pukes!"; "the British call a contemptible persona `git'" +``` +Get full result object: +```bash +$ wordpos def git -f +{ git: + [ { synsetOffset: 10539715, + lexFilenum: 18, + pos: 'n', + wCnt: 0, + lemma: 'rotter', + synonyms: [], + lexId: '0', + ptrs: [], + gloss: 'a person who is deemed to be despicable or contemptible; "only a rotter would do that +"; "kill the rat"; "throw the bum out"; "you cowardly little pukes!"; "the British call a contemptib +le person a `git\'" ' } ] } +``` +As JSON: +```bash +$ wordpos def git -j +{"git":[{"synsetOffset":10539715,"lexFilenum":18,"pos":"n","wCnt":0,"lemma":"rotter","synonyms":[]," +lexId":"0","ptrs":[],"gloss":"a person who is deemed to be despicable or contemptible; \"only a rotter +would do that\"; \"kill the rat\"; \"throw the bum out\"; \"you cowardly little pukes!\"; \"the British +call a contemptible person a `git'\" "}]} +``` +Usage: +```bash +$ wordpos + + Usage: wordpos-cli.js [options] [word ... | -i | ] + + Commands: + + get + get list of words for particular POS + + def + lookup definitions + + parse + show parsed words, deduped and less stopwords + + Options: + + -h, --help output usage information + -V, --version output the version number + -n, --noun Get nouns + -a, --adj Get adjectives + -v, --verb Get verbs + -r, --adv Get adverbs + -c, --count count only (noun, adj, verb, adv, total parsed words) + -b, --brief brief output (all on one line, no headers) + -f, --full full results object + -j, --json full results object as JSON + -i, --file input file + -s, --stopwords include stopwords +``` + ## Benchmark node wordpos-bench.js diff --git a/bin/wordpos-cli.js b/bin/wordpos-cli.js new file mode 100644 index 0000000..471e3f5 --- /dev/null +++ b/bin/wordpos-cli.js @@ -0,0 +1,178 @@ +#!/usr/bin/env node +/** + * wordpos.js + * + * command-line interface to wordpos + * + * Usage: + * wordpos [options] + * + * Copyright (c) 2012 mooster@42at.com + * https://github.com/moos/wordpos + * + * Released under MIT license + */ + +var program = require('commander'), + _ = require('underscore')._, + POS = {noun:'Noun', adj:'Adjective', verb:'Verb', adv:'Adverb'}, + nWords; + +program + .version('0.1.0') + .usage('[options] [word ... | -i | ]') + + .option('-n, --noun', 'Get nouns') + .option('-a, --adj', 'Get adjectives') + .option('-v, --verb', 'Get verbs') + .option('-r, --adv', 'Get adverbs') + + .option('-c, --count', 'count only (noun, adj, verb, adv, total parsed words)') + .option('-b, --brief', 'brief output (all on one line, no headers)') + .option('-f, --full', 'full results object') + .option('-j, --json', 'full results object as JSON') + .option('-i, --file ', 'input file') + .option('-s, --stopwords', 'include stopwords') + ; + +program.command('get') + .description('get list of words for particular POS') + .action(exec); + +program.command('def') + .description('lookup definitions') + .action(function(){ + _.last(arguments).name = 'lookup'; + exec.apply(this, arguments); + }); + +program.command('parse') + .description('show parsed words, deduped and less stopwords') + .action(exec); + +var + WordPos = require('../src/wordpos'), + fs = require('fs'), + util = require('util'), + results = {}, + cmd = null; + + +program.parse(process.argv); +if (!cmd) console.log(program.helpInformation()); + + +function exec(/* args, ..., program.command */){ + var args = _.initial(arguments); + cmd = _.last(arguments).name; + + if (program.file) { + fs.readFile(program.file, 'utf8', function(err, data){ + if (err) return console.log(err); + run(data); + }); + } else if (args.length){ + run(args.join(' ')); + } else { + read_stdin(run); + } +} + +function read_stdin(callback) { + var data = ''; + process.stdin.resume(); + process.stdin.setEncoding('utf8'); + process.stdin.on('data', function (chunk) { + var c = chunk.charCodeAt(0); + if (c == 4 || c == 26) // ^c or ^d followed by \n + return process.stdin.emit('end') && process.stdin.pause(); + data += chunk; + }); + process.stdin.on('end', function () { + callback(data); + }); +} + +function optToFn() { + var fns = _.reject(POS, function(fn, opt) { return !program[opt] }); + if (!fns.length) fns = _.values(POS); //default to all if no POS given + return fns; +} + + +function run(data) { + var + opts = {stopwords: !program.stopwords}, + wordpos = new WordPos(opts), + words = wordpos.parse(data), + fns = optToFn(), + plural = (cmd=='get' ? 's':''), + results = {}, + finale = _.after( + plural ? fns.length : words.length * fns.length, + _.bind(output, null, results)), + collect = function(what, result, word){ + if (word) { // lookup + results[word] = [].concat(results[word] || [], result); + } else { // get + results[what] = result; + } + finale(); + }; + + nWords = words.length; + if (cmd == 'parse') return output({words: words}); + + // loop over desired POS + _(fns).each(function(fn){ + var method = cmd + fn + plural, + cb = _.bind(collect, null, fn); + if (cmd == 'get') { + wordpos[method](words, cb); + } else { + words.forEach(function(word){ + wordpos[method](word, cb); + }); + } + }); +} + +function output(results) { + var str; + if (program.count && cmd != 'lookup') { + str = (cmd == 'get' && _.reduce(POS, function(memo, v){ + return memo + ((results[v] && results[v].length) || 0) +" "; + },'')) + nWords; + } else { + str = sprint(results); + } + console.log(str); +} + +function sprint(results) { + if (program.json) { + return util.format('%j',results); + } else if (program.full) { + return util.inspect(results,false,10, true); + } + var sep = program.brief ? ' ' : '\n'; + + switch (cmd) { + case 'lookup': + return _.reduce(results, function(memo, v, k){ + return memo + (v.length && (k +"\n"+ print_def(v) +"\n") || ''); + }, ''); + default: + return _.reduce(results, function(memo, v, k){ + var pre = program.brief ? '' : util.format('# %s %d:%s', k, v.length, sep); + return memo + (v.length && util.format('%s%s%s\n', pre, v.join(sep), sep) || ''); + }, ''); + } + + function print_def(defs) { + return _.reduce(defs, function(memo, v, k){ + return memo + util.format(' %s: %s\n', v.pos, v.gloss); + },''); + } +} + diff --git a/package.json b/package.json index 2009050..59af5f7 100644 --- a/package.json +++ b/package.json @@ -3,11 +3,12 @@ "author": "Moos ", "keywords": ["natural", "language", "wordnet", "pos"], "description": "wordpos is a set of part-of-speech utilities for Node.js using natural's WordNet module.", - "version": "0.1.5", + "version": "0.1.6", "homepage": "https://github.com/moos/wordpos", "engines": { - "node": ">=0.4.10" + "node": ">=0.6" }, + "bin": "./bin/wordpos-cli.js", "dependencies": { "natural": "latest", "underscore": ">=1.3.1",