clean up fastIndex

This commit is contained in:
moos 2012-05-20 11:39:23 -07:00
parent 6652265ef0
commit 2230300dc3
2 changed files with 28 additions and 48 deletions

View File

@ -241,7 +241,7 @@ Single word lookup:
getAdverbs : 2 ops/s { iterations: 1, elapsed: 407 } getAdverbs : 2 ops/s { iterations: 1, elapsed: 407 }
``` ```
128-word lookup (fastIndex) : 128-word lookup (fastIndex, as of v0.1.4) :
``` ```
getPOS : 36 ops/s { iterations: 1, elapsed: 28 } getPOS : 36 ops/s { iterations: 1, elapsed: 28 }
getNouns : 125 ops/s { iterations: 1, elapsed: 8 } getNouns : 125 ops/s { iterations: 1, elapsed: 8 }

View File

@ -3,6 +3,10 @@
* *
* override natural.WordNet's IndexFile to use fast index data * override natural.WordNet's IndexFile to use fast index data
* *
* Copyright (c) 2012 mooster@42at.com
* https://github.com/moos/wordpos
*
* Released under MIT license *
*/ */
var _ = require('underscore')._, var _ = require('underscore')._,
@ -11,7 +15,11 @@ var _ = require('underscore')._,
fs = require('fs'), fs = require('fs'),
KEY_LENGTH = 3; KEY_LENGTH = 3;
// load fast index bucket data /**
* load fast index bucket data
* @param dir - dir path of index files
* @param name - name of index file, eg, 'index.verb'
*/
function loadFastIndex(dir, name) { function loadFastIndex(dir, name) {
var jsonFile = path.join(dir, 'fast-' + name + '.json'), var jsonFile = path.join(dir, 'fast-' + name + '.json'),
data = null; data = null;
@ -24,6 +32,12 @@ function loadFastIndex(dir, name) {
return data; return data;
} }
/**
* read index file using fast index data at key
* @param key - 3-char key into fast index
* @param index - index file name (eg, 'index.verb')
* @param callback - function receives buffer of data read
*/
function readIndexForKey(key, index, callback) { function readIndexForKey(key, index, callback) {
var data = index.fastIndex, var data = index.fastIndex,
offset = data.offsets[key][0], offset = data.offsets[key][0],
@ -39,6 +53,12 @@ function readIndexForKey(key, index, callback) {
}); });
} }
/**
* function that overrides WordNet's IndexFile.find()
* @param search - word to search for
* @param callback - callback receives found line and tokens
* @returns none
*/
function find(search, callback) { function find(search, callback) {
var self = this, var self = this,
data = this.fastIndex, data = this.fastIndex,
@ -94,56 +114,16 @@ function find(search, callback) {
}); });
} }
function find____(search, callback) {
// console.log(' >> ', search, this.fileName, this.fd);
var self = this,
data = this.fastIndex,
miss = {status: 'miss'};
var key = search.slice(0, KEY_LENGTH);
if (!(key in data.offsets)) return callback(miss);
if (!this.fd) {
// console.log(' ... opening', this.filePath);
this.fd = fs.openSync(this.filePath, 'r');
}
// ref count so we know when to close the main index file
++this.refcount;
var offset = data.offsets[key][0],
nextKey = data.offsets[key][1],
nextOffset = data.offsets[nextKey][0],
len = nextOffset - offset - 1,
buffer = new Buffer(len),
pos = Math.ceil(len / 2) - 0;
console.log('--', offset, len, offset+len, offset+pos);
// call base class's _findAt to search only relevant portion
this._findAt(this.fd, // fd
offset+len * 1, // size (more like 'end' of buffer)
offset+pos, // pos
null, // lastPos
pos * 1, // adjustment
search, // key
done); // callback
function done(result) {
//console.log(self.refcount, search, result && result.line);
if (--self.refcount == 0) {
//console.log(' ... closing', self.filePath);
fs.close(self.fd);
self.fd = null;
}
callback(result);
}
}
// cache of fast index data across instances of WordPOS class // cache of fast index data across instances of WordPOS class
var cache = {}; var cache = {};
module.exports = { module.exports = {
/**
* loads fast index data and return fast index find function
*
* @param index is the IndexFile instance
* @return function - fast index find or origin find if errors
*/
find: function(index){ find: function(index){
var key = index.filePath, var key = index.filePath,