diff --git a/README.md b/README.md index ae6697b..73f7f64 100644 --- a/README.md +++ b/README.md @@ -241,7 +241,7 @@ Single word lookup: getAdverbs : 2 ops/s { iterations: 1, elapsed: 407 } ``` -128-word lookup (fastIndex) : +128-word lookup (fastIndex, as of v0.1.4) : ``` getPOS : 36 ops/s { iterations: 1, elapsed: 28 } getNouns : 125 ops/s { iterations: 1, elapsed: 8 } diff --git a/tools/fastIndex.js b/tools/fastIndex.js index 9e03b7c..1e245e7 100644 --- a/tools/fastIndex.js +++ b/tools/fastIndex.js @@ -3,6 +3,10 @@ * * override natural.WordNet's IndexFile to use fast index data * + * Copyright (c) 2012 mooster@42at.com + * https://github.com/moos/wordpos + * + * Released under MIT license * */ var _ = require('underscore')._, @@ -11,7 +15,11 @@ var _ = require('underscore')._, fs = require('fs'), KEY_LENGTH = 3; -// load fast index bucket data +/** + * load fast index bucket data + * @param dir - dir path of index files + * @param name - name of index file, eg, 'index.verb' + */ function loadFastIndex(dir, name) { var jsonFile = path.join(dir, 'fast-' + name + '.json'), data = null; @@ -24,6 +32,12 @@ function loadFastIndex(dir, name) { return data; } +/** + * read index file using fast index data at key + * @param key - 3-char key into fast index + * @param index - index file name (eg, 'index.verb') + * @param callback - function receives buffer of data read + */ function readIndexForKey(key, index, callback) { var data = index.fastIndex, offset = data.offsets[key][0], @@ -39,6 +53,12 @@ function readIndexForKey(key, index, callback) { }); } +/** + * function that overrides WordNet's IndexFile.find() + * @param search - word to search for + * @param callback - callback receives found line and tokens + * @returns none + */ function find(search, callback) { var self = this, data = this.fastIndex, @@ -94,56 +114,16 @@ function find(search, callback) { }); } -function find____(search, callback) { -// console.log(' >> ', search, this.fileName, this.fd); - var self = this, - data = this.fastIndex, - miss = {status: 'miss'}; - - var key = search.slice(0, KEY_LENGTH); - if (!(key in data.offsets)) return callback(miss); - - if (!this.fd) { -// console.log(' ... opening', this.filePath); - this.fd = fs.openSync(this.filePath, 'r'); - } - - // ref count so we know when to close the main index file - ++this.refcount; - - var offset = data.offsets[key][0], - nextKey = data.offsets[key][1], - nextOffset = data.offsets[nextKey][0], - len = nextOffset - offset - 1, - buffer = new Buffer(len), - pos = Math.ceil(len / 2) - 0; - - console.log('--', offset, len, offset+len, offset+pos); - - // call base class's _findAt to search only relevant portion - this._findAt(this.fd, // fd - offset+len * 1, // size (more like 'end' of buffer) - offset+pos, // pos - null, // lastPos - pos * 1, // adjustment - search, // key - done); // callback - - function done(result) { - //console.log(self.refcount, search, result && result.line); - if (--self.refcount == 0) { - //console.log(' ... closing', self.filePath); - fs.close(self.fd); - self.fd = null; - } - callback(result); - } -} - // cache of fast index data across instances of WordPOS class var cache = {}; module.exports = { + /** + * loads fast index data and return fast index find function + * + * @param index is the IndexFile instance + * @return function - fast index find or origin find if errors + */ find: function(index){ var key = index.filePath,