added package.json and cleanup

This commit is contained in:
moos 2012-05-04 12:23:28 -07:00
parent f084e31994
commit 518725c189
5 changed files with 332 additions and 293 deletions

View File

@ -30,10 +30,10 @@ Installation
Get the script `wordpos.js` and use it. (npm module may be coming.) Get the script `wordpos.js` and use it. (npm module may be coming.)
You may also want to manually download WordNet files from [here](http://wordnet.princeton.edu/wordnet/download/current-version/). Unpack into folder (say `dict`). [natural](http://github.com/NaturalNode/natural) will auto-download WordNet files -- You may also want to manually download [WordNet files](http://wordnet.princeton.edu/wordnet/download/current-version/). Unpack into folder (say `dict`). [natural](http://github.com/NaturalNode/natural) will auto-download WordNet files --
but I've found this to be unreliable as some of the files get truncated, leading the core program to hang. but I've found this to be unreliable as some of the files get truncated, leading the program to hang.
Note: `wordpos-bench` requires a customized [uubench](https://github.com/moos/uubench) module (forthcoming). Note: `wordpos-bench.js` requires a [forked uubench](https://github.com/moos/uubench) module.
API API
@ -48,7 +48,7 @@ WordPOS is a subclass of natural's [WordNet class](https://github.com/NaturalNod
Get POS from text. Get POS from text.
```js ```
wordpos.getPOS(str, callback) -- callback receives a result object: wordpos.getPOS(str, callback) -- callback receives a result object:
{ {
nouns:[], Array of str words that are nouns nouns:[], Array of str words that are nouns
@ -111,7 +111,7 @@ would be considered nouns. (see http://nltk.googlecode.com/svn/trunk/doc/book/c
Determine if a word is a particular POS. Determine if a word is a particular POS.
```js ```
wordpos.isNoun(word, callback) -- callback receives result (true/false) if word is a noun. wordpos.isNoun(word, callback) -- callback receives result (true/false) if word is a noun.
wordpos.isVerb(word, callback) -- callback receives result (true/false) if word is a verb. wordpos.isVerb(word, callback) -- callback receives result (true/false) if word is a verb.
@ -142,7 +142,7 @@ wordpos.isAdverb('fishly', console.log);
These calls are similar to natural's [lookup()](https://github.com/NaturalNode/natural#wordnet) call, except they can be faster if you These calls are similar to natural's [lookup()](https://github.com/NaturalNode/natural#wordnet) call, except they can be faster if you
already know the POS of the word. already know the POS of the word.
```js ```
wordpos.lookupNoun(word, callback) -- callback receives array of lookup objects for a noun wordpos.lookupNoun(word, callback) -- callback receives array of lookup objects for a noun
wordpos.lookupVerb(word, callback) -- callback receives array of lookup objects for a verb wordpos.lookupVerb(word, callback) -- callback receives array of lookup objects for a verb
@ -185,12 +185,22 @@ Benchmark
Generally slow as it requires loading and searching large WordNet index files. Generally slow as it requires loading and searching large WordNet index files.
Single word lookup: Single word lookup:
```
getPOS : 30 ops/s { iterations: 10, elapsed: 329 }
getNouns : 106 ops/s { iterations: 10, elapsed: 94 }
getVerbs : 111 ops/s { iterations: 10, elapsed: 90 }
getAdjectives : 132 ops/s { iterations: 10, elapsed: 76 }
getAdverbs : 137 ops/s { iterations: 10, elapsed: 73 }
```
getPOS : 22 ops/s { iterations: 10, elapsed: 451 } 128-word lookup:
getNouns : 66 ops/s { iterations: 10, elapsed: 152 } ```
getVerbs : 66 ops/s { iterations: 10, elapsed: 152 } getPOS : 0 ops/s { iterations: 1, elapsed: 2210 }
getAdjectives : 67 ops/s { iterations: 10, elapsed: 150 } getNouns : 2 ops/s { iterations: 1, elapsed: 666 }
getAdverbs : 83 ops/s { iterations: 10, elapsed: 120 } getVerbs : 2 ops/s { iterations: 1, elapsed: 638 }
getAdjectives : 2 ops/s { iterations: 1, elapsed: 489 }
getAdverbs : 2 ops/s { iterations: 1, elapsed: 407 }
```
On a win7/64-bit/dual-core/3GHz. getPOS() is slowest as it searches through all four index files. On a win7/64-bit/dual-core/3GHz. getPOS() is slowest as it searches through all four index files.

23
package.json Normal file
View File

@ -0,0 +1,23 @@
{
"name": "wordpos",
"description": "wordpos is a set of part-of-speech utilities for Node.js using natural's WordNet module.",
"version": "0.1.0",
"homepage": "https://github.com/moos/wordpos",
"engines": {
"node": ">=0.4.10"
},
"dependencies": {
"natural": "latest",
"underscore": ">=1.3.1"
},
"devDependencies": {
"uubench": "git://github.com/moos/uubench.git"
},
"repository" : {
"type" : "git",
"url" : "http://github.com/moos/wordpos.git"
},
"author": "Moos <mooster@42at.com>",
"keywords": ["natural", "language", "wordnet", "pos"],
"main": "./wordpos.js"
}

View File

@ -1,5 +1,5 @@
var uubench = require('uubench'), var uubench = require('uubench'), // from: https://github.com/moos/uubench
fs = require('fs'), fs = require('fs'),
_ = require('underscore')._, _ = require('underscore')._,
WordPOS = require('./wordpos'), WordPOS = require('./wordpos'),
@ -8,8 +8,7 @@ var uubench = require('uubench'),
suite = new uubench.Suite({ suite = new uubench.Suite({
type: 'fixed', type: 'fixed',
iterations: 10, iterations: 10,
//delay: 750, sync: true, // important!
sync: true,
start: function(tests){ start: function(tests){
console.log('starting %d tests', tests.length); console.log('starting %d tests', tests.length);
@ -42,8 +41,7 @@ function out(res){
var text1 = 'laksasdf', var text1 = 'laksasdf',
text128 = fs.readFileSync('text-128.txt', 'utf8'), text128 = fs.readFileSync('text-128.txt', 'utf8'),
text, text,
pos, pos;
str = "This is some sample text. This text can contain multiple sentences. It also works with urls like.";
function getPOS(next){ function getPOS(next){

View File

@ -1,7 +1,7 @@
/*! /**
* wordpos * wordpos
* *
* part-of-speech utilities using natural's wordnet module. * Node.js part-of-speech utilities using natural's WordNet module.
* *
* Copyright (c) 2012 mooster@42at.com * Copyright (c) 2012 mooster@42at.com
* Released under MIT license * Released under MIT license
@ -9,7 +9,7 @@
var _ = require('underscore')._, var _ = require('underscore')._,
util = require('util'), util = require('util'),
natural = require('./lib/natural'), natural = require('natural'),
WordNet = natural.WordNet, WordNet = natural.WordNet,
tokenizer = new natural.WordTokenizer(), tokenizer = new natural.WordTokenizer(),
stopwords = ' '+ natural.stopwords.join(' ') +' '; stopwords = ' '+ natural.stopwords.join(' ') +' ';
@ -111,7 +111,7 @@ wordposProto.getAdverbs = get('isAdverb');
wordposProto.getNouns = get('isNoun'); wordposProto.getNouns = get('isNoun');
wordposProto.getVerbs = get('isVerb'); wordposProto.getVerbs = get('isVerb');
if (!wordposProto.getIndexFile) if (!wordposProto.getIndexFile) {
wordposProto.getIndexFile = function getIndexFile(pos) { wordposProto.getIndexFile = function getIndexFile(pos) {
switch(pos) { switch(pos) {
case 'n': case 'n':
@ -124,6 +124,7 @@ if (!wordposProto.getIndexFile)
return this.advIndex; return this.advIndex;
} }
}; };
}
/** /**
* getPOS() * getPOS()

View File

@ -1,3 +1,9 @@
// npm install jasmine-node -g
// jasmine-node wordpos_spec.js --verbose
/* Note: 'dict' folder should contain WordNet files.
* Download and unpack manually from http://wordnet.princeton.edu/wordnet/download/current-version/
*/
var WordPOS = require('./wordpos'), var WordPOS = require('./wordpos'),
wordpos = new WordPOS('dict'); wordpos = new WordPOS('dict');
@ -33,6 +39,7 @@ describe('get POS', function() {
expect(result.verbs).toEqualUnordered(expected.verbs); expect(result.verbs).toEqualUnordered(expected.verbs);
expect(result.adjectives).toEqualUnordered(expected.adjectives); expect(result.adjectives).toEqualUnordered(expected.adjectives);
expect(result.adverbs).toEqualUnordered(expected.adverbs); expect(result.adverbs).toEqualUnordered(expected.adverbs);
expect(result.rest).toEqualUnordered(expected.rest);
asyncSpecDone(); asyncSpecDone();
}); });
asyncSpecWait(); asyncSpecWait();