added return value for getX(), expose parse() and WNdb object.

This commit is contained in:
moos 2012-05-20 10:19:55 -07:00
parent b859fc3bfe
commit d16a506ff1
3 changed files with 35 additions and 23 deletions

View File

@ -45,7 +45,7 @@ Please note: all API are async since the underlying WordNet library is async.
WordPOS is a subclass of natural's [WordNet class](https://github.com/NaturalNode/natural#wordnet) and inherits all its methods. WordPOS is a subclass of natural's [WordNet class](https://github.com/NaturalNode/natural#wordnet) and inherits all its methods.
### getX() ### getX()...
Get POS from text. Get POS from text.
@ -76,6 +76,8 @@ than getPOS() which looks up the word in all index files.
NB: [stopwords] (https://github.com/NaturalNode/natural/blob/master/lib/natural/util/stopwords.js) NB: [stopwords] (https://github.com/NaturalNode/natural/blob/master/lib/natural/util/stopwords.js)
are stripped out from str before lookup. are stripped out from str before lookup.
All getX() functions return the number of parsed words that will be looked up (less duplicates and stopwords).
Example: Example:
```js ```js
@ -108,7 +110,7 @@ would be considered nouns. (see http://nltk.googlecode.com/svn/trunk/doc/book/c
squirrel / NN squirrel / NN
### isX() ### isX()...
Determine if a word is a particular POS. Determine if a word is a particular POS.
@ -138,7 +140,7 @@ wordpos.isAdverb('fishly', console.log);
// false // false
``` ```
### lookupX() ### lookupX()...
These calls are similar to natural's [lookup()](https://github.com/NaturalNode/natural#wordnet) call, except they can be faster if you These calls are similar to natural's [lookup()](https://github.com/NaturalNode/natural#wordnet) call, except they can be faster if you
already know the POS of the word. already know the POS of the word.
@ -179,6 +181,15 @@ Or use WordNet's inherited method:
wordpos.lookup('great', console.log); wordpos.lookup('great', console.log);
// ... // ...
``` ```
### Other methods
```
WordPOS.WNdp -- access to the WNdb object
wordpos.parse(str) -- returns tokenized array of words, less duplicates and stopwords. This method is called on all getX() calls internally.
```
### Options ### Options
```js ```js

View File

@ -3,7 +3,7 @@
"author": "Moos <mooster@42at.com>", "author": "Moos <mooster@42at.com>",
"keywords": ["natural", "language", "wordnet", "pos"], "keywords": ["natural", "language", "wordnet", "pos"],
"description": "wordpos is a set of part-of-speech utilities for Node.js using natural's WordNet module.", "description": "wordpos is a set of part-of-speech utilities for Node.js using natural's WordNet module.",
"version": "0.1.2", "version": "0.1.3",
"homepage": "https://github.com/moos/wordpos", "homepage": "https://github.com/moos/wordpos",
"engines": { "engines": {
"node": ">=0.4.10" "node": ">=0.4.10"

View File

@ -68,18 +68,19 @@ function get(isFn) {
i = 0, i = 0,
self = this, self = this,
results = [], results = [],
args = [results]; args = [results],
profile && args.push(0); done = function(){
if (!n) return callback.apply(null, args); profile && (args[1] = new Date() - start);
callback.apply(null, args)
};
if (!n) return (process.nextTick(done),0);
words.forEach(function(word,j){ words.forEach(function(word,j){
self[isFn](word, function(yes){ self[isFn](word, function(yes){
yes && results.push(word); yes && results.push(word);
if (++i==n) { (++i==n) && done();
profile && (args[1] = new Date() - start);
callback.apply(null, args);
}
}, /*_noprofile*/ true); }, /*_noprofile*/ true);
}); });
return n;
}; };
} }
@ -146,6 +147,8 @@ wordposProto.getAdverbs = get('isAdverb');
wordposProto.getNouns = get('isNoun'); wordposProto.getNouns = get('isNoun');
wordposProto.getVerbs = get('isVerb'); wordposProto.getVerbs = get('isVerb');
wordposProto.parse = prepText;
if (!wordposProto.getIndexFile) { if (!wordposProto.getIndexFile) {
wordposProto.getIndexFile = function getIndexFile(pos) { wordposProto.getIndexFile = function getIndexFile(pos) {
switch(pos) { switch(pos) {
@ -181,16 +184,18 @@ wordposProto.getPOS = function(text, callback) {
nTests = testFns.length, nTests = testFns.length,
nWords = words.length, nWords = words.length,
self = this, self = this,
c = 0; c = 0,
done = function(){
profile && (args[1] = new Date() - start);
callback.apply(null, args)
};
profile && args.push(0); if (!nWords) return (process.nextTick(done),0);
if (!nWords) return callback.apply(null, args);
words.forEach(lookup); words.forEach(lookup);
function lookup(word){ function lookup(word){
var any = false, var any = false,
t=0; t=0;
word = normalize(word);
testFns.forEach(lookupPOS); testFns.forEach(lookupPOS);
function lookupPOS(isFn,i,list){ function lookupPOS(isFn,i,list){
@ -204,17 +209,13 @@ wordposProto.getPOS = function(text, callback) {
function donePOS() { function donePOS() {
if (++t == nTests) { if (++t == nTests) {
!any && data['rest'].push(word); !any && data['rest'].push(word);
done(); (++c == nWords) && done();
} }
} }
} }
return nWords;
function done(){
if (++c == nWords) {
profile && (args[1] = new Date() - start);
callback.apply(null, args);
}
}
}; };
WordPOS.WNdb = WNdb;
module.exports = WordPOS; module.exports = WordPOS;