2012-05-30 23:12:22 +00:00
|
|
|
#!/usr/bin/env node
|
|
|
|
/**
|
|
|
|
* wordpos.js
|
|
|
|
*
|
|
|
|
* command-line interface to wordpos
|
|
|
|
*
|
|
|
|
* Usage:
|
2014-10-16 06:58:06 +00:00
|
|
|
* wordpos [options] <get|parse|def|rand|syn|exp> <stdin|words*>
|
2012-05-30 23:12:22 +00:00
|
|
|
*
|
|
|
|
* Copyright (c) 2012 mooster@42at.com
|
|
|
|
* https://github.com/moos/wordpos
|
|
|
|
*
|
|
|
|
* Released under MIT license
|
|
|
|
*/
|
|
|
|
|
|
|
|
var program = require('commander'),
|
|
|
|
_ = require('underscore')._,
|
2012-05-30 23:39:31 +00:00
|
|
|
fs = require('fs'),
|
2012-05-30 23:12:22 +00:00
|
|
|
POS = {noun:'Noun', adj:'Adjective', verb:'Verb', adv:'Adverb'},
|
2012-05-31 00:10:44 +00:00
|
|
|
version = JSON.parse(fs.readFileSync(__dirname + '/../package.json', 'utf8')).version,
|
2014-10-16 06:58:06 +00:00
|
|
|
rawCmd = '',
|
|
|
|
RAND_PLACEHOLDER = '__',
|
2012-05-31 00:10:44 +00:00
|
|
|
nWords;
|
2012-05-30 23:12:22 +00:00
|
|
|
|
|
|
|
program
|
2012-05-30 23:39:31 +00:00
|
|
|
.version(version)
|
2014-09-26 05:38:07 +00:00
|
|
|
.usage('<command> [options] [word ... | -i <file> | <stdin>]')
|
2012-05-30 23:12:22 +00:00
|
|
|
|
2014-10-16 06:58:06 +00:00
|
|
|
.option('-n, --noun', 'get nouns only')
|
|
|
|
.option('-a, --adj', 'get adjectives only')
|
|
|
|
.option('-v, --verb', 'get verbs only')
|
|
|
|
.option('-r, --adv', 'get adverbs only')
|
2012-05-30 23:12:22 +00:00
|
|
|
|
2014-10-16 06:58:06 +00:00
|
|
|
.option('-c, --count', 'get counts only, used with get')
|
2012-05-30 23:12:22 +00:00
|
|
|
.option('-b, --brief', 'brief output (all on one line, no headers)')
|
|
|
|
.option('-f, --full', 'full results object')
|
2014-10-16 06:58:06 +00:00
|
|
|
.option('-j, --json', 'full results object as JSON string')
|
2012-05-30 23:12:22 +00:00
|
|
|
.option('-i, --file <file>', 'input file')
|
2014-10-16 06:58:06 +00:00
|
|
|
.option('-w, --withStopwords', 'include stopwords (default: stopwords are excluded)')
|
2012-05-30 23:12:22 +00:00
|
|
|
;
|
|
|
|
|
|
|
|
program.command('get')
|
2014-09-25 11:37:33 +00:00
|
|
|
.description('get list of words for particular POS')
|
|
|
|
.action(exec);
|
2012-05-30 23:12:22 +00:00
|
|
|
|
|
|
|
program.command('def')
|
2014-10-16 06:58:06 +00:00
|
|
|
.description('lookup definitions (use -b for brief definition, less examples)')
|
2012-05-30 23:12:22 +00:00
|
|
|
.action(function(){
|
2014-10-16 06:58:06 +00:00
|
|
|
rawCmd = 'def';
|
|
|
|
_.last(arguments)._name = 'lookup';
|
|
|
|
exec.apply(this, arguments);
|
|
|
|
});
|
|
|
|
|
|
|
|
program.command('syn')
|
|
|
|
.description('lookup synonyms')
|
|
|
|
.action(function(){
|
|
|
|
rawCmd = 'syn';
|
|
|
|
_.last(arguments)._name = 'lookup';
|
|
|
|
exec.apply(this, arguments);
|
|
|
|
});
|
|
|
|
|
|
|
|
program.command('exp')
|
|
|
|
.description('lookup examples')
|
|
|
|
.action(function(){
|
|
|
|
rawCmd = 'exp';
|
2013-04-14 05:35:50 +00:00
|
|
|
_.last(arguments)._name = 'lookup';
|
2012-05-30 23:12:22 +00:00
|
|
|
exec.apply(this, arguments);
|
|
|
|
});
|
|
|
|
|
2014-04-28 06:41:44 +00:00
|
|
|
program.command('rand')
|
2014-10-16 06:58:06 +00:00
|
|
|
.description('get random words (starting with [word]). If first arg is a number, returns ' +
|
|
|
|
'that many random words. Valid options are -b, -f, -j, -s, -i.')
|
|
|
|
.action(function(/* arg, ..., program.command */){
|
|
|
|
var args = _.toArray(arguments),
|
|
|
|
num = args.length > 1 && Number(args[0]);
|
|
|
|
delete program.count;
|
|
|
|
|
|
|
|
// first arg is count?
|
|
|
|
if (num) {
|
|
|
|
args.shift();
|
|
|
|
program.num = num;
|
|
|
|
}
|
|
|
|
|
|
|
|
// no startsWith given, add a placeholder
|
|
|
|
if (args.length === 1){
|
|
|
|
args.unshift(RAND_PLACEHOLDER);
|
|
|
|
}
|
|
|
|
|
|
|
|
exec.apply(this, args);
|
|
|
|
});
|
2014-09-25 11:37:33 +00:00
|
|
|
|
|
|
|
program.command('parse')
|
|
|
|
.description('show parsed words, deduped and less stopwords')
|
2012-05-30 23:12:22 +00:00
|
|
|
.action(exec);
|
|
|
|
|
2014-09-25 11:37:33 +00:00
|
|
|
program.command('stopwords')
|
|
|
|
.description('show list of stopwords (valid options are -b and -j)')
|
|
|
|
.action(function(){
|
|
|
|
cmd = _.last(arguments)._name;
|
2014-10-16 06:58:06 +00:00
|
|
|
rawCmd = rawCmd || cmd;
|
2014-09-25 11:37:33 +00:00
|
|
|
var stopwords = WordPos.natural.stopwords;
|
|
|
|
|
|
|
|
if (program.json)
|
|
|
|
output(stopwords);
|
|
|
|
else
|
|
|
|
console.log(stopwords.join(program.brief ? ' ' : '\n'))
|
|
|
|
});
|
|
|
|
|
2012-05-30 23:12:22 +00:00
|
|
|
var
|
|
|
|
WordPos = require('../src/wordpos'),
|
|
|
|
util = require('util'),
|
|
|
|
results = {},
|
|
|
|
cmd = null;
|
|
|
|
|
|
|
|
|
|
|
|
program.parse(process.argv);
|
|
|
|
if (!cmd) console.log(program.helpInformation());
|
|
|
|
|
|
|
|
|
|
|
|
function exec(/* args, ..., program.command */){
|
|
|
|
var args = _.initial(arguments);
|
2013-04-14 05:35:50 +00:00
|
|
|
cmd = _.last(arguments)._name;
|
2014-10-16 06:58:06 +00:00
|
|
|
rawCmd = rawCmd || cmd;
|
2012-05-30 23:12:22 +00:00
|
|
|
|
|
|
|
if (program.file) {
|
|
|
|
fs.readFile(program.file, 'utf8', function(err, data){
|
|
|
|
if (err) return console.log(err);
|
|
|
|
run(data);
|
|
|
|
});
|
2014-04-28 06:41:44 +00:00
|
|
|
} else if (args.length || cmd == 'rand'){
|
2012-05-30 23:12:22 +00:00
|
|
|
run(args.join(' '));
|
|
|
|
} else {
|
|
|
|
read_stdin(run);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function read_stdin(callback) {
|
|
|
|
var data = '';
|
|
|
|
process.stdin.resume();
|
|
|
|
process.stdin.setEncoding('utf8');
|
|
|
|
process.stdin.on('data', function (chunk) {
|
|
|
|
var c = chunk.charCodeAt(0);
|
|
|
|
if (c == 4 || c == 26) // ^c or ^d followed by \n
|
|
|
|
return process.stdin.emit('end') && process.stdin.pause();
|
|
|
|
data += chunk;
|
|
|
|
});
|
|
|
|
process.stdin.on('end', function () {
|
|
|
|
callback(data);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
function optToFn() {
|
|
|
|
var fns = _.reject(POS, function(fn, opt) { return !program[opt] });
|
2014-05-03 21:41:39 +00:00
|
|
|
if (!fns.length && cmd === 'rand') return fns = ['']; // run rand()
|
2012-05-30 23:12:22 +00:00
|
|
|
if (!fns.length) fns = _.values(POS); //default to all if no POS given
|
|
|
|
return fns;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function run(data) {
|
|
|
|
var
|
2014-09-25 11:37:33 +00:00
|
|
|
opts = {stopwords: !program.withStopwords},
|
2012-05-30 23:12:22 +00:00
|
|
|
wordpos = new WordPos(opts),
|
2014-09-25 11:37:33 +00:00
|
|
|
words = wordpos.parse(data),
|
2012-05-30 23:12:22 +00:00
|
|
|
fns = optToFn(),
|
|
|
|
plural = (cmd=='get' ? 's':''),
|
|
|
|
results = {},
|
|
|
|
finale = _.after(
|
|
|
|
plural ? fns.length : words.length * fns.length,
|
|
|
|
_.bind(output, null, results)),
|
|
|
|
collect = function(what, result, word){
|
|
|
|
if (word) { // lookup
|
|
|
|
results[word] = [].concat(results[word] || [], result);
|
|
|
|
} else { // get
|
|
|
|
results[what] = result;
|
|
|
|
}
|
|
|
|
finale();
|
|
|
|
};
|
|
|
|
|
|
|
|
nWords = words.length;
|
|
|
|
if (cmd == 'parse') return output({words: words});
|
|
|
|
|
|
|
|
// loop over desired POS
|
|
|
|
_(fns).each(function(fn){
|
|
|
|
var method = cmd + fn + plural,
|
|
|
|
cb = _.bind(collect, null, fn);
|
2014-04-28 06:41:44 +00:00
|
|
|
|
2012-05-30 23:12:22 +00:00
|
|
|
if (cmd == 'get') {
|
|
|
|
wordpos[method](words, cb);
|
2014-05-03 21:41:39 +00:00
|
|
|
} else if (cmd == 'rand') {
|
2014-10-16 06:58:06 +00:00
|
|
|
if (words[0] === RAND_PLACEHOLDER) words[0] = '';
|
2014-05-03 21:41:39 +00:00
|
|
|
words.forEach(function(word){
|
|
|
|
wordpos[method]({startsWith: word, count: program.num || 1}, cb);
|
|
|
|
});
|
2012-05-30 23:12:22 +00:00
|
|
|
} else {
|
|
|
|
words.forEach(function(word){
|
2014-09-25 11:37:33 +00:00
|
|
|
wordpos [method](word, cb);
|
2012-05-30 23:12:22 +00:00
|
|
|
});
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
function output(results) {
|
|
|
|
var str;
|
|
|
|
if (program.count && cmd != 'lookup') {
|
2014-10-16 06:58:06 +00:00
|
|
|
var label = program.brief ? '' : _.flatten(['#', _.values(POS), 'Parsed\n']).join(' ');
|
|
|
|
str = (cmd == 'get' && (label + _.reduce(POS, function(memo, v){
|
2012-05-30 23:12:22 +00:00
|
|
|
return memo + ((results[v] && results[v].length) || 0) +" ";
|
2014-10-16 06:58:06 +00:00
|
|
|
},''))) + nWords;
|
2012-05-30 23:12:22 +00:00
|
|
|
} else {
|
|
|
|
str = sprint(results);
|
|
|
|
}
|
|
|
|
console.log(str);
|
|
|
|
}
|
|
|
|
|
|
|
|
function sprint(results) {
|
|
|
|
if (program.json) {
|
|
|
|
return util.format('%j',results);
|
|
|
|
} else if (program.full) {
|
|
|
|
return util.inspect(results,false,10, true);
|
|
|
|
}
|
|
|
|
var sep = program.brief ? ' ' : '\n';
|
|
|
|
|
|
|
|
switch (cmd) {
|
|
|
|
case 'lookup':
|
|
|
|
return _.reduce(results, function(memo, v, k){
|
2014-10-16 06:58:06 +00:00
|
|
|
return memo + (v.length && util.format('%s (%s)\n%s\n', k, rawCmd, print_def(v)) || '');
|
2012-05-30 23:12:22 +00:00
|
|
|
}, '');
|
|
|
|
default:
|
|
|
|
return _.reduce(results, function(memo, v, k){
|
|
|
|
var pre = program.brief ? '' : util.format('# %s %d:%s', k, v.length, sep);
|
|
|
|
return memo + (v.length && util.format('%s%s%s\n', pre, v.join(sep), sep) || '');
|
|
|
|
}, '');
|
|
|
|
}
|
|
|
|
|
|
|
|
function print_def(defs) {
|
2014-10-16 06:58:06 +00:00
|
|
|
var proc = {
|
|
|
|
def: _.property(program.brief ? 'def' : 'gloss'),
|
|
|
|
syn: function(res){
|
|
|
|
return res.synonyms.join(', ');
|
|
|
|
},
|
|
|
|
exp: function(res) {
|
|
|
|
return '"' + res.exp.join('", "') + '"';
|
|
|
|
}
|
|
|
|
}[ rawCmd ];
|
|
|
|
|
2012-05-30 23:12:22 +00:00
|
|
|
return _.reduce(defs, function(memo, v, k){
|
2014-10-16 06:58:06 +00:00
|
|
|
return memo + util.format(' %s: %s\n', v.pos, proc(v));
|
2012-05-30 23:12:22 +00:00
|
|
|
},'');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|