wordpos/bin/wordpos-cli.js

273 lines
7.4 KiB
JavaScript
Raw Normal View History

2012-05-30 23:12:22 +00:00
#!/usr/bin/env node
/**
* wordpos.js
*
* command-line interface to wordpos
*
* Usage:
* wordpos [options] <get|parse|def|rand|syn|exp|seek> <stdin|words*>
2012-05-30 23:12:22 +00:00
*
* Copyright (c) 2012, 2016 mooster@42at.com
2012-05-30 23:12:22 +00:00
* https://github.com/moos/wordpos
*
* Released under MIT license
*/
var program = require('commander'),
_ = require('underscore')._,
2012-05-30 23:39:31 +00:00
fs = require('fs'),
2012-05-30 23:12:22 +00:00
POS = {noun:'Noun', adj:'Adjective', verb:'Verb', adv:'Adverb'},
POS_abbr = {noun:'n', adj:'a', verb:'v', adv:'r'},
2012-05-31 00:10:44 +00:00
version = JSON.parse(fs.readFileSync(__dirname + '/../package.json', 'utf8')).version,
rawCmd = '',
RAND_PLACEHOLDER = '__',
2012-05-31 00:10:44 +00:00
nWords;
2012-05-30 23:12:22 +00:00
program
2012-05-30 23:39:31 +00:00
.version(version)
2014-09-26 05:38:07 +00:00
.usage('<command> [options] [word ... | -i <file> | <stdin>]')
2012-05-30 23:12:22 +00:00
.option('-n, --noun', 'get nouns only')
.option('-a, --adj', 'get adjectives only')
.option('-v, --verb', 'get verbs only')
.option('-r, --adv', 'get adverbs only')
2012-05-30 23:12:22 +00:00
.option('-c, --count', 'get counts only, used with get')
2012-05-30 23:12:22 +00:00
.option('-b, --brief', 'brief output (all on one line, no headers)')
.option('-f, --full', 'full results object')
.option('-j, --json', 'full results object as JSON string')
2012-05-30 23:12:22 +00:00
.option('-i, --file <file>', 'input file')
.option('-w, --withStopwords', 'include stopwords (default: stopwords are excluded)')
2012-05-30 23:12:22 +00:00
;
program.command('get')
2014-09-25 11:37:33 +00:00
.description('get list of words for particular POS')
.action(exec);
2012-05-30 23:12:22 +00:00
program.command('def')
.description('lookup definitions (use -b for brief definition, less examples)')
2012-05-30 23:12:22 +00:00
.action(function(){
rawCmd = 'def';
_.last(arguments)._name = 'lookup';
exec.apply(this, arguments);
});
program.command('syn')
.description('lookup synonyms')
.action(function(){
rawCmd = 'syn';
_.last(arguments)._name = 'lookup';
exec.apply(this, arguments);
});
program.command('exp')
.description('lookup examples')
.action(function(){
rawCmd = 'exp';
2013-04-14 05:35:50 +00:00
_.last(arguments)._name = 'lookup';
2012-05-30 23:12:22 +00:00
exec.apply(this, arguments);
});
program.command('seek')
.description('get record at synset offset. Must include one of POS -n, -a, -v, -r')
.action(function(){
var one = _.chain(program).pick('noun adj adv verb'.split(' ')).countBy().value().true;
if (!one || one > 1) {
console.error('Must include one and only one of -n, -a, -v, -r');
process.exit(-1);
}
// force full output mode
program.full = 1;
exec.apply(this, arguments);
});
2014-04-28 06:41:44 +00:00
program.command('rand')
.description('get random words (starting with [word]). If first arg is a number, returns ' +
'that many random words. Valid options are -b, -f, -j, -s, -i.')
.action(function(/* arg, ..., program.command */){
var args = _.toArray(arguments),
num = args.length > 1 && Number(args[0]);
delete program.count;
// first arg is count?
if (num) {
args.shift();
program.num = num;
}
// no startsWith given, add a placeholder
if (args.length === 1){
args.unshift(RAND_PLACEHOLDER);
}
exec.apply(this, args);
});
2014-09-25 11:37:33 +00:00
program.command('parse')
.description('show parsed words, deduped and less stopwords')
2012-05-30 23:12:22 +00:00
.action(exec);
2014-09-25 11:37:33 +00:00
program.command('stopwords')
.description('show list of stopwords (valid options are -b and -j)')
.action(function(){
cmd = _.last(arguments)._name;
rawCmd = rawCmd || cmd;
2016-01-18 08:09:56 +00:00
var stopwords = WordPos.stopwords;
2014-09-25 11:37:33 +00:00
if (program.json)
output(stopwords);
else
console.log(stopwords.join(program.brief ? ' ' : '\n'))
});
2012-05-30 23:12:22 +00:00
var
WordPos = require('../src/wordpos'),
util = require('util'),
results = {},
cmd = null;
program.parse(process.argv);
if (!cmd) console.log(program.helpInformation());
function exec(/* args, ..., program.command */){
var args = _.initial(arguments);
2013-04-14 05:35:50 +00:00
cmd = _.last(arguments)._name;
rawCmd = rawCmd || cmd;
2012-05-30 23:12:22 +00:00
if (program.file) {
fs.readFile(program.file, 'utf8', function(err, data){
if (err) return console.log(err);
run(data);
});
2014-04-28 06:41:44 +00:00
} else if (args.length || cmd == 'rand'){
2012-05-30 23:12:22 +00:00
run(args.join(' '));
} else {
read_stdin(run);
}
}
function read_stdin(callback) {
var data = '';
process.stdin.resume();
process.stdin.setEncoding('utf8');
process.stdin.on('data', function (chunk) {
var c = chunk.charCodeAt(0);
if (c == 4 || c == 26) // ^c or ^d followed by \n
return process.stdin.emit('end') && process.stdin.pause();
data += chunk;
});
process.stdin.on('end', function () {
callback(data);
});
}
function optToFn() {
var
map = cmd === 'seek' ? POS_abbr : POS,
fns = _.reject(map, function(fn, opt) { return !program[opt] });
2014-05-03 21:41:39 +00:00
if (!fns.length && cmd === 'rand') return fns = ['']; // run rand()
if (!fns.length) fns = _.values(map); //default to all if no POS given
2012-05-30 23:12:22 +00:00
return fns;
}
function run(data) {
var
2014-09-25 11:37:33 +00:00
opts = {stopwords: !program.withStopwords},
2012-05-30 23:12:22 +00:00
wordpos = new WordPos(opts),
seek = cmd === 'seek',
words = seek ? data.split(' ') : wordpos.parse(data),
2012-05-30 23:12:22 +00:00
fns = optToFn(),
plural = (cmd === 'get' ? 's':''),
2012-05-30 23:12:22 +00:00
results = {},
finale = _.after(plural ? fns.length : words.length * fns.length,
2012-05-30 23:12:22 +00:00
_.bind(output, null, results)),
collect = function(what, result, word){
if (word) { // lookup
results[word] = [].concat(results[word] || [], result);
} else { // get
results[what] = result;
}
finale();
};
nWords = words.length;
if (cmd == 'parse') return output({words: words});
// loop over desired POS
_(fns).each(function(fn){
var method = cmd + fn + plural,
cb = _.bind(collect, null, fn);
if (cmd === 'get') {
2012-05-30 23:12:22 +00:00
wordpos[method](words, cb);
} else if (cmd === 'rand') {
if (words[0] === RAND_PLACEHOLDER) words[0] = '';
2014-05-03 21:41:39 +00:00
words.forEach(function(word){
wordpos[method]({startsWith: word, count: program.num || 1}, cb);
});
} else if (seek) {
words.forEach(function(offset){
wordpos.seek(offset, fn, function(err, result){
results[offset.trim()] = result;
finale();
});
});
2012-05-30 23:12:22 +00:00
} else {
words.forEach(function(word){
2016-01-18 08:09:56 +00:00
wordpos[method](word, cb);
2012-05-30 23:12:22 +00:00
});
}
});
}
function output(results) {
var str;
if (program.count && cmd != 'lookup') {
var label = program.brief ? '' : _.flatten(['#', _.values(POS), 'Parsed\n']).join(' ');
str = (cmd == 'get' && (label + _.reduce(POS, function(memo, v){
2012-05-30 23:12:22 +00:00
return memo + ((results[v] && results[v].length) || 0) +" ";
},''))) + nWords;
2012-05-30 23:12:22 +00:00
} else {
str = sprint(results);
}
console.log(str);
}
function sprint(results) {
if (program.json) {
return util.format('%j',results);
} else if (program.full) {
return util.inspect(results,false,10, true);
}
var sep = program.brief ? ' ' : '\n';
switch (cmd) {
case 'lookup':
return _.reduce(results, function(memo, v, k){
return memo + (v.length && util.format('%s (%s)\n%s\n', k, rawCmd, print_def(v)) || '');
2012-05-30 23:12:22 +00:00
}, '');
default:
return _.reduce(results, function(memo, v, k){
var pre = program.brief ? '' : util.format('# %s %d:%s', k, v.length, sep),
res = v.length ? v.join(sep) : '';
return memo + (v.length && util.format('%s%s%s\n', pre, res, sep) || '');
2012-05-30 23:12:22 +00:00
}, '');
}
function print_def(defs) {
var proc = {
def: _.property(program.brief ? 'def' : 'gloss'),
syn: function(res){
return res.synonyms.join(', ');
},
exp: function(res) {
return '"' + res.exp.join('", "') + '"';
}
}[ rawCmd ];
2012-05-30 23:12:22 +00:00
return _.reduce(defs, function(memo, v, k){
return memo + util.format(' %s: %s\n', v.pos, proc(v));
2012-05-30 23:12:22 +00:00
},'');
}
}