added package.json and cleanup
This commit is contained in:
parent
f084e31994
commit
518725c189
32
README.md
32
README.md
|
@ -30,10 +30,10 @@ Installation
|
|||
|
||||
Get the script `wordpos.js` and use it. (npm module may be coming.)
|
||||
|
||||
You may also want to manually download WordNet files from [here](http://wordnet.princeton.edu/wordnet/download/current-version/). Unpack into folder (say `dict`). [natural](http://github.com/NaturalNode/natural) will auto-download WordNet files --
|
||||
but I've found this to be unreliable as some of the files get truncated, leading the core program to hang.
|
||||
You may also want to manually download [WordNet files](http://wordnet.princeton.edu/wordnet/download/current-version/). Unpack into folder (say `dict`). [natural](http://github.com/NaturalNode/natural) will auto-download WordNet files --
|
||||
but I've found this to be unreliable as some of the files get truncated, leading the program to hang.
|
||||
|
||||
Note: `wordpos-bench` requires a customized [uubench](https://github.com/moos/uubench) module (forthcoming).
|
||||
Note: `wordpos-bench.js` requires a [forked uubench](https://github.com/moos/uubench) module.
|
||||
|
||||
|
||||
API
|
||||
|
@ -48,7 +48,7 @@ WordPOS is a subclass of natural's [WordNet class](https://github.com/NaturalNod
|
|||
|
||||
Get POS from text.
|
||||
|
||||
```js
|
||||
```
|
||||
wordpos.getPOS(str, callback) -- callback receives a result object:
|
||||
{
|
||||
nouns:[], Array of str words that are nouns
|
||||
|
@ -111,7 +111,7 @@ would be considered nouns. (see http://nltk.googlecode.com/svn/trunk/doc/book/c
|
|||
|
||||
Determine if a word is a particular POS.
|
||||
|
||||
```js
|
||||
```
|
||||
wordpos.isNoun(word, callback) -- callback receives result (true/false) if word is a noun.
|
||||
|
||||
wordpos.isVerb(word, callback) -- callback receives result (true/false) if word is a verb.
|
||||
|
@ -142,7 +142,7 @@ wordpos.isAdverb('fishly', console.log);
|
|||
These calls are similar to natural's [lookup()](https://github.com/NaturalNode/natural#wordnet) call, except they can be faster if you
|
||||
already know the POS of the word.
|
||||
|
||||
```js
|
||||
```
|
||||
wordpos.lookupNoun(word, callback) -- callback receives array of lookup objects for a noun
|
||||
|
||||
wordpos.lookupVerb(word, callback) -- callback receives array of lookup objects for a verb
|
||||
|
@ -185,12 +185,22 @@ Benchmark
|
|||
Generally slow as it requires loading and searching large WordNet index files.
|
||||
|
||||
Single word lookup:
|
||||
```
|
||||
getPOS : 30 ops/s { iterations: 10, elapsed: 329 }
|
||||
getNouns : 106 ops/s { iterations: 10, elapsed: 94 }
|
||||
getVerbs : 111 ops/s { iterations: 10, elapsed: 90 }
|
||||
getAdjectives : 132 ops/s { iterations: 10, elapsed: 76 }
|
||||
getAdverbs : 137 ops/s { iterations: 10, elapsed: 73 }
|
||||
```
|
||||
|
||||
getPOS : 22 ops/s { iterations: 10, elapsed: 451 }
|
||||
getNouns : 66 ops/s { iterations: 10, elapsed: 152 }
|
||||
getVerbs : 66 ops/s { iterations: 10, elapsed: 152 }
|
||||
getAdjectives : 67 ops/s { iterations: 10, elapsed: 150 }
|
||||
getAdverbs : 83 ops/s { iterations: 10, elapsed: 120 }
|
||||
128-word lookup:
|
||||
```
|
||||
getPOS : 0 ops/s { iterations: 1, elapsed: 2210 }
|
||||
getNouns : 2 ops/s { iterations: 1, elapsed: 666 }
|
||||
getVerbs : 2 ops/s { iterations: 1, elapsed: 638 }
|
||||
getAdjectives : 2 ops/s { iterations: 1, elapsed: 489 }
|
||||
getAdverbs : 2 ops/s { iterations: 1, elapsed: 407 }
|
||||
```
|
||||
|
||||
On a win7/64-bit/dual-core/3GHz. getPOS() is slowest as it searches through all four index files.
|
||||
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
{
|
||||
"name": "wordpos",
|
||||
"description": "wordpos is a set of part-of-speech utilities for Node.js using natural's WordNet module.",
|
||||
"version": "0.1.0",
|
||||
"homepage": "https://github.com/moos/wordpos",
|
||||
"engines": {
|
||||
"node": ">=0.4.10"
|
||||
},
|
||||
"dependencies": {
|
||||
"natural": "latest",
|
||||
"underscore": ">=1.3.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"uubench": "git://github.com/moos/uubench.git"
|
||||
},
|
||||
"repository" : {
|
||||
"type" : "git",
|
||||
"url" : "http://github.com/moos/wordpos.git"
|
||||
},
|
||||
"author": "Moos <mooster@42at.com>",
|
||||
"keywords": ["natural", "language", "wordnet", "pos"],
|
||||
"main": "./wordpos.js"
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
var uubench = require('uubench'),
|
||||
var uubench = require('uubench'), // from: https://github.com/moos/uubench
|
||||
fs = require('fs'),
|
||||
_ = require('underscore')._,
|
||||
WordPOS = require('./wordpos'),
|
||||
|
@ -8,8 +8,7 @@ var uubench = require('uubench'),
|
|||
suite = new uubench.Suite({
|
||||
type: 'fixed',
|
||||
iterations: 10,
|
||||
//delay: 750,
|
||||
sync: true,
|
||||
sync: true, // important!
|
||||
|
||||
start: function(tests){
|
||||
console.log('starting %d tests', tests.length);
|
||||
|
@ -42,8 +41,7 @@ function out(res){
|
|||
var text1 = 'laksasdf',
|
||||
text128 = fs.readFileSync('text-128.txt', 'utf8'),
|
||||
text,
|
||||
pos,
|
||||
str = "This is some sample text. This text can contain multiple sentences. It also works with urls like.";
|
||||
pos;
|
||||
|
||||
|
||||
function getPOS(next){
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*!
|
||||
/**
|
||||
* wordpos
|
||||
*
|
||||
* part-of-speech utilities using natural's wordnet module.
|
||||
* Node.js part-of-speech utilities using natural's WordNet module.
|
||||
*
|
||||
* Copyright (c) 2012 mooster@42at.com
|
||||
* Released under MIT license
|
||||
|
@ -9,7 +9,7 @@
|
|||
|
||||
var _ = require('underscore')._,
|
||||
util = require('util'),
|
||||
natural = require('./lib/natural'),
|
||||
natural = require('natural'),
|
||||
WordNet = natural.WordNet,
|
||||
tokenizer = new natural.WordTokenizer(),
|
||||
stopwords = ' '+ natural.stopwords.join(' ') +' ';
|
||||
|
@ -111,7 +111,7 @@ wordposProto.getAdverbs = get('isAdverb');
|
|||
wordposProto.getNouns = get('isNoun');
|
||||
wordposProto.getVerbs = get('isVerb');
|
||||
|
||||
if (!wordposProto.getIndexFile)
|
||||
if (!wordposProto.getIndexFile) {
|
||||
wordposProto.getIndexFile = function getIndexFile(pos) {
|
||||
switch(pos) {
|
||||
case 'n':
|
||||
|
@ -124,6 +124,7 @@ if (!wordposProto.getIndexFile)
|
|||
return this.advIndex;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* getPOS()
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
// npm install jasmine-node -g
|
||||
// jasmine-node wordpos_spec.js --verbose
|
||||
|
||||
/* Note: 'dict' folder should contain WordNet files.
|
||||
* Download and unpack manually from http://wordnet.princeton.edu/wordnet/download/current-version/
|
||||
*/
|
||||
|
||||
var WordPOS = require('./wordpos'),
|
||||
wordpos = new WordPOS('dict');
|
||||
|
@ -33,6 +39,7 @@ describe('get POS', function() {
|
|||
expect(result.verbs).toEqualUnordered(expected.verbs);
|
||||
expect(result.adjectives).toEqualUnordered(expected.adjectives);
|
||||
expect(result.adverbs).toEqualUnordered(expected.adverbs);
|
||||
expect(result.rest).toEqualUnordered(expected.rest);
|
||||
asyncSpecDone();
|
||||
});
|
||||
asyncSpecWait();
|
||||
|
|
Loading…
Reference in New Issue