added package.json and cleanup
This commit is contained in:
parent
f084e31994
commit
518725c189
32
README.md
32
README.md
|
@ -30,10 +30,10 @@ Installation
|
||||||
|
|
||||||
Get the script `wordpos.js` and use it. (npm module may be coming.)
|
Get the script `wordpos.js` and use it. (npm module may be coming.)
|
||||||
|
|
||||||
You may also want to manually download WordNet files from [here](http://wordnet.princeton.edu/wordnet/download/current-version/). Unpack into folder (say `dict`). [natural](http://github.com/NaturalNode/natural) will auto-download WordNet files --
|
You may also want to manually download [WordNet files](http://wordnet.princeton.edu/wordnet/download/current-version/). Unpack into folder (say `dict`). [natural](http://github.com/NaturalNode/natural) will auto-download WordNet files --
|
||||||
but I've found this to be unreliable as some of the files get truncated, leading the core program to hang.
|
but I've found this to be unreliable as some of the files get truncated, leading the program to hang.
|
||||||
|
|
||||||
Note: `wordpos-bench` requires a customized [uubench](https://github.com/moos/uubench) module (forthcoming).
|
Note: `wordpos-bench.js` requires a [forked uubench](https://github.com/moos/uubench) module.
|
||||||
|
|
||||||
|
|
||||||
API
|
API
|
||||||
|
@ -48,7 +48,7 @@ WordPOS is a subclass of natural's [WordNet class](https://github.com/NaturalNod
|
||||||
|
|
||||||
Get POS from text.
|
Get POS from text.
|
||||||
|
|
||||||
```js
|
```
|
||||||
wordpos.getPOS(str, callback) -- callback receives a result object:
|
wordpos.getPOS(str, callback) -- callback receives a result object:
|
||||||
{
|
{
|
||||||
nouns:[], Array of str words that are nouns
|
nouns:[], Array of str words that are nouns
|
||||||
|
@ -111,7 +111,7 @@ would be considered nouns. (see http://nltk.googlecode.com/svn/trunk/doc/book/c
|
||||||
|
|
||||||
Determine if a word is a particular POS.
|
Determine if a word is a particular POS.
|
||||||
|
|
||||||
```js
|
```
|
||||||
wordpos.isNoun(word, callback) -- callback receives result (true/false) if word is a noun.
|
wordpos.isNoun(word, callback) -- callback receives result (true/false) if word is a noun.
|
||||||
|
|
||||||
wordpos.isVerb(word, callback) -- callback receives result (true/false) if word is a verb.
|
wordpos.isVerb(word, callback) -- callback receives result (true/false) if word is a verb.
|
||||||
|
@ -142,7 +142,7 @@ wordpos.isAdverb('fishly', console.log);
|
||||||
These calls are similar to natural's [lookup()](https://github.com/NaturalNode/natural#wordnet) call, except they can be faster if you
|
These calls are similar to natural's [lookup()](https://github.com/NaturalNode/natural#wordnet) call, except they can be faster if you
|
||||||
already know the POS of the word.
|
already know the POS of the word.
|
||||||
|
|
||||||
```js
|
```
|
||||||
wordpos.lookupNoun(word, callback) -- callback receives array of lookup objects for a noun
|
wordpos.lookupNoun(word, callback) -- callback receives array of lookup objects for a noun
|
||||||
|
|
||||||
wordpos.lookupVerb(word, callback) -- callback receives array of lookup objects for a verb
|
wordpos.lookupVerb(word, callback) -- callback receives array of lookup objects for a verb
|
||||||
|
@ -185,12 +185,22 @@ Benchmark
|
||||||
Generally slow as it requires loading and searching large WordNet index files.
|
Generally slow as it requires loading and searching large WordNet index files.
|
||||||
|
|
||||||
Single word lookup:
|
Single word lookup:
|
||||||
|
```
|
||||||
|
getPOS : 30 ops/s { iterations: 10, elapsed: 329 }
|
||||||
|
getNouns : 106 ops/s { iterations: 10, elapsed: 94 }
|
||||||
|
getVerbs : 111 ops/s { iterations: 10, elapsed: 90 }
|
||||||
|
getAdjectives : 132 ops/s { iterations: 10, elapsed: 76 }
|
||||||
|
getAdverbs : 137 ops/s { iterations: 10, elapsed: 73 }
|
||||||
|
```
|
||||||
|
|
||||||
getPOS : 22 ops/s { iterations: 10, elapsed: 451 }
|
128-word lookup:
|
||||||
getNouns : 66 ops/s { iterations: 10, elapsed: 152 }
|
```
|
||||||
getVerbs : 66 ops/s { iterations: 10, elapsed: 152 }
|
getPOS : 0 ops/s { iterations: 1, elapsed: 2210 }
|
||||||
getAdjectives : 67 ops/s { iterations: 10, elapsed: 150 }
|
getNouns : 2 ops/s { iterations: 1, elapsed: 666 }
|
||||||
getAdverbs : 83 ops/s { iterations: 10, elapsed: 120 }
|
getVerbs : 2 ops/s { iterations: 1, elapsed: 638 }
|
||||||
|
getAdjectives : 2 ops/s { iterations: 1, elapsed: 489 }
|
||||||
|
getAdverbs : 2 ops/s { iterations: 1, elapsed: 407 }
|
||||||
|
```
|
||||||
|
|
||||||
On a win7/64-bit/dual-core/3GHz. getPOS() is slowest as it searches through all four index files.
|
On a win7/64-bit/dual-core/3GHz. getPOS() is slowest as it searches through all four index files.
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
{
|
||||||
|
"name": "wordpos",
|
||||||
|
"description": "wordpos is a set of part-of-speech utilities for Node.js using natural's WordNet module.",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"homepage": "https://github.com/moos/wordpos",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=0.4.10"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"natural": "latest",
|
||||||
|
"underscore": ">=1.3.1"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"uubench": "git://github.com/moos/uubench.git"
|
||||||
|
},
|
||||||
|
"repository" : {
|
||||||
|
"type" : "git",
|
||||||
|
"url" : "http://github.com/moos/wordpos.git"
|
||||||
|
},
|
||||||
|
"author": "Moos <mooster@42at.com>",
|
||||||
|
"keywords": ["natural", "language", "wordnet", "pos"],
|
||||||
|
"main": "./wordpos.js"
|
||||||
|
}
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
var uubench = require('uubench'),
|
var uubench = require('uubench'), // from: https://github.com/moos/uubench
|
||||||
fs = require('fs'),
|
fs = require('fs'),
|
||||||
_ = require('underscore')._,
|
_ = require('underscore')._,
|
||||||
WordPOS = require('./wordpos'),
|
WordPOS = require('./wordpos'),
|
||||||
|
@ -8,8 +8,7 @@ var uubench = require('uubench'),
|
||||||
suite = new uubench.Suite({
|
suite = new uubench.Suite({
|
||||||
type: 'fixed',
|
type: 'fixed',
|
||||||
iterations: 10,
|
iterations: 10,
|
||||||
//delay: 750,
|
sync: true, // important!
|
||||||
sync: true,
|
|
||||||
|
|
||||||
start: function(tests){
|
start: function(tests){
|
||||||
console.log('starting %d tests', tests.length);
|
console.log('starting %d tests', tests.length);
|
||||||
|
@ -42,8 +41,7 @@ function out(res){
|
||||||
var text1 = 'laksasdf',
|
var text1 = 'laksasdf',
|
||||||
text128 = fs.readFileSync('text-128.txt', 'utf8'),
|
text128 = fs.readFileSync('text-128.txt', 'utf8'),
|
||||||
text,
|
text,
|
||||||
pos,
|
pos;
|
||||||
str = "This is some sample text. This text can contain multiple sentences. It also works with urls like.";
|
|
||||||
|
|
||||||
|
|
||||||
function getPOS(next){
|
function getPOS(next){
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
/*!
|
/**
|
||||||
* wordpos
|
* wordpos
|
||||||
*
|
*
|
||||||
* part-of-speech utilities using natural's wordnet module.
|
* Node.js part-of-speech utilities using natural's WordNet module.
|
||||||
*
|
*
|
||||||
* Copyright (c) 2012 mooster@42at.com
|
* Copyright (c) 2012 mooster@42at.com
|
||||||
* Released under MIT license
|
* Released under MIT license
|
||||||
|
@ -9,7 +9,7 @@
|
||||||
|
|
||||||
var _ = require('underscore')._,
|
var _ = require('underscore')._,
|
||||||
util = require('util'),
|
util = require('util'),
|
||||||
natural = require('./lib/natural'),
|
natural = require('natural'),
|
||||||
WordNet = natural.WordNet,
|
WordNet = natural.WordNet,
|
||||||
tokenizer = new natural.WordTokenizer(),
|
tokenizer = new natural.WordTokenizer(),
|
||||||
stopwords = ' '+ natural.stopwords.join(' ') +' ';
|
stopwords = ' '+ natural.stopwords.join(' ') +' ';
|
||||||
|
@ -111,7 +111,7 @@ wordposProto.getAdverbs = get('isAdverb');
|
||||||
wordposProto.getNouns = get('isNoun');
|
wordposProto.getNouns = get('isNoun');
|
||||||
wordposProto.getVerbs = get('isVerb');
|
wordposProto.getVerbs = get('isVerb');
|
||||||
|
|
||||||
if (!wordposProto.getIndexFile)
|
if (!wordposProto.getIndexFile) {
|
||||||
wordposProto.getIndexFile = function getIndexFile(pos) {
|
wordposProto.getIndexFile = function getIndexFile(pos) {
|
||||||
switch(pos) {
|
switch(pos) {
|
||||||
case 'n':
|
case 'n':
|
||||||
|
@ -124,6 +124,7 @@ if (!wordposProto.getIndexFile)
|
||||||
return this.advIndex;
|
return this.advIndex;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* getPOS()
|
* getPOS()
|
||||||
|
|
|
@ -1,3 +1,9 @@
|
||||||
|
// npm install jasmine-node -g
|
||||||
|
// jasmine-node wordpos_spec.js --verbose
|
||||||
|
|
||||||
|
/* Note: 'dict' folder should contain WordNet files.
|
||||||
|
* Download and unpack manually from http://wordnet.princeton.edu/wordnet/download/current-version/
|
||||||
|
*/
|
||||||
|
|
||||||
var WordPOS = require('./wordpos'),
|
var WordPOS = require('./wordpos'),
|
||||||
wordpos = new WordPOS('dict');
|
wordpos = new WordPOS('dict');
|
||||||
|
@ -33,6 +39,7 @@ describe('get POS', function() {
|
||||||
expect(result.verbs).toEqualUnordered(expected.verbs);
|
expect(result.verbs).toEqualUnordered(expected.verbs);
|
||||||
expect(result.adjectives).toEqualUnordered(expected.adjectives);
|
expect(result.adjectives).toEqualUnordered(expected.adjectives);
|
||||||
expect(result.adverbs).toEqualUnordered(expected.adverbs);
|
expect(result.adverbs).toEqualUnordered(expected.adverbs);
|
||||||
|
expect(result.rest).toEqualUnordered(expected.rest);
|
||||||
asyncSpecDone();
|
asyncSpecDone();
|
||||||
});
|
});
|
||||||
asyncSpecWait();
|
asyncSpecWait();
|
||||||
|
|
Loading…
Reference in New Issue