added package.json and cleanup

This commit is contained in:
moos 2012-05-04 12:23:28 -07:00
parent f084e31994
commit 518725c189
5 changed files with 332 additions and 293 deletions

View File

@ -30,10 +30,10 @@ Installation
Get the script `wordpos.js` and use it. (npm module may be coming.)
You may also want to manually download WordNet files from [here](http://wordnet.princeton.edu/wordnet/download/current-version/). Unpack into folder (say `dict`). [natural](http://github.com/NaturalNode/natural) will auto-download WordNet files --
but I've found this to be unreliable as some of the files get truncated, leading the core program to hang.
You may also want to manually download [WordNet files](http://wordnet.princeton.edu/wordnet/download/current-version/). Unpack into folder (say `dict`). [natural](http://github.com/NaturalNode/natural) will auto-download WordNet files --
but I've found this to be unreliable as some of the files get truncated, leading the program to hang.
Note: `wordpos-bench` requires a customized [uubench](https://github.com/moos/uubench) module (forthcoming).
Note: `wordpos-bench.js` requires a [forked uubench](https://github.com/moos/uubench) module.
API
@ -48,7 +48,7 @@ WordPOS is a subclass of natural's [WordNet class](https://github.com/NaturalNod
Get POS from text.
```js
```
wordpos.getPOS(str, callback) -- callback receives a result object:
{
nouns:[], Array of str words that are nouns
@ -111,7 +111,7 @@ would be considered nouns. (see http://nltk.googlecode.com/svn/trunk/doc/book/c
Determine if a word is a particular POS.
```js
```
wordpos.isNoun(word, callback) -- callback receives result (true/false) if word is a noun.
wordpos.isVerb(word, callback) -- callback receives result (true/false) if word is a verb.
@ -142,7 +142,7 @@ wordpos.isAdverb('fishly', console.log);
These calls are similar to natural's [lookup()](https://github.com/NaturalNode/natural#wordnet) call, except they can be faster if you
already know the POS of the word.
```js
```
wordpos.lookupNoun(word, callback) -- callback receives array of lookup objects for a noun
wordpos.lookupVerb(word, callback) -- callback receives array of lookup objects for a verb
@ -185,12 +185,22 @@ Benchmark
Generally slow as it requires loading and searching large WordNet index files.
Single word lookup:
```
getPOS : 30 ops/s { iterations: 10, elapsed: 329 }
getNouns : 106 ops/s { iterations: 10, elapsed: 94 }
getVerbs : 111 ops/s { iterations: 10, elapsed: 90 }
getAdjectives : 132 ops/s { iterations: 10, elapsed: 76 }
getAdverbs : 137 ops/s { iterations: 10, elapsed: 73 }
```
getPOS : 22 ops/s { iterations: 10, elapsed: 451 }
getNouns : 66 ops/s { iterations: 10, elapsed: 152 }
getVerbs : 66 ops/s { iterations: 10, elapsed: 152 }
getAdjectives : 67 ops/s { iterations: 10, elapsed: 150 }
getAdverbs : 83 ops/s { iterations: 10, elapsed: 120 }
128-word lookup:
```
getPOS : 0 ops/s { iterations: 1, elapsed: 2210 }
getNouns : 2 ops/s { iterations: 1, elapsed: 666 }
getVerbs : 2 ops/s { iterations: 1, elapsed: 638 }
getAdjectives : 2 ops/s { iterations: 1, elapsed: 489 }
getAdverbs : 2 ops/s { iterations: 1, elapsed: 407 }
```
On a win7/64-bit/dual-core/3GHz. getPOS() is slowest as it searches through all four index files.

23
package.json Normal file
View File

@ -0,0 +1,23 @@
{
"name": "wordpos",
"description": "wordpos is a set of part-of-speech utilities for Node.js using natural's WordNet module.",
"version": "0.1.0",
"homepage": "https://github.com/moos/wordpos",
"engines": {
"node": ">=0.4.10"
},
"dependencies": {
"natural": "latest",
"underscore": ">=1.3.1"
},
"devDependencies": {
"uubench": "git://github.com/moos/uubench.git"
},
"repository" : {
"type" : "git",
"url" : "http://github.com/moos/wordpos.git"
},
"author": "Moos <mooster@42at.com>",
"keywords": ["natural", "language", "wordnet", "pos"],
"main": "./wordpos.js"
}

View File

@ -1,5 +1,5 @@
var uubench = require('uubench'),
var uubench = require('uubench'), // from: https://github.com/moos/uubench
fs = require('fs'),
_ = require('underscore')._,
WordPOS = require('./wordpos'),
@ -8,8 +8,7 @@ var uubench = require('uubench'),
suite = new uubench.Suite({
type: 'fixed',
iterations: 10,
//delay: 750,
sync: true,
sync: true, // important!
start: function(tests){
console.log('starting %d tests', tests.length);
@ -42,8 +41,7 @@ function out(res){
var text1 = 'laksasdf',
text128 = fs.readFileSync('text-128.txt', 'utf8'),
text,
pos,
str = "This is some sample text. This text can contain multiple sentences. It also works with urls like.";
pos;
function getPOS(next){

View File

@ -1,7 +1,7 @@
/*!
/**
* wordpos
*
* part-of-speech utilities using natural's wordnet module.
* Node.js part-of-speech utilities using natural's WordNet module.
*
* Copyright (c) 2012 mooster@42at.com
* Released under MIT license
@ -9,7 +9,7 @@
var _ = require('underscore')._,
util = require('util'),
natural = require('./lib/natural'),
natural = require('natural'),
WordNet = natural.WordNet,
tokenizer = new natural.WordTokenizer(),
stopwords = ' '+ natural.stopwords.join(' ') +' ';
@ -111,7 +111,7 @@ wordposProto.getAdverbs = get('isAdverb');
wordposProto.getNouns = get('isNoun');
wordposProto.getVerbs = get('isVerb');
if (!wordposProto.getIndexFile)
if (!wordposProto.getIndexFile) {
wordposProto.getIndexFile = function getIndexFile(pos) {
switch(pos) {
case 'n':
@ -124,6 +124,7 @@ if (!wordposProto.getIndexFile)
return this.advIndex;
}
};
}
/**
* getPOS()

View File

@ -1,3 +1,9 @@
// npm install jasmine-node -g
// jasmine-node wordpos_spec.js --verbose
/* Note: 'dict' folder should contain WordNet files.
* Download and unpack manually from http://wordnet.princeton.edu/wordnet/download/current-version/
*/
var WordPOS = require('./wordpos'),
wordpos = new WordPOS('dict');
@ -33,6 +39,7 @@ describe('get POS', function() {
expect(result.verbs).toEqualUnordered(expected.verbs);
expect(result.adjectives).toEqualUnordered(expected.adjectives);
expect(result.adverbs).toEqualUnordered(expected.adverbs);
expect(result.rest).toEqualUnordered(expected.rest);
asyncSpecDone();
});
asyncSpecWait();