-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
86 changed files
with
2,859 additions
and
1,982 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# Sphinx build info version 1 | ||
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. | ||
config: 46e60af904d03c0364121ceed7c97fe4 | ||
config: ff0805878e0a3eb17f276fa67c1c8b36 | ||
tags: 645f666f9bcd5a90fca523b33c5a78b7 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,294 @@ | ||
/**@constructor*/ | ||
BaseStemmer = function() { | ||
this.setCurrent = function(value) { | ||
this.current = value; | ||
this.cursor = 0; | ||
this.limit = this.current.length; | ||
this.limit_backward = 0; | ||
this.bra = this.cursor; | ||
this.ket = this.limit; | ||
}; | ||
|
||
this.getCurrent = function() { | ||
return this.current; | ||
}; | ||
|
||
this.copy_from = function(other) { | ||
this.current = other.current; | ||
this.cursor = other.cursor; | ||
this.limit = other.limit; | ||
this.limit_backward = other.limit_backward; | ||
this.bra = other.bra; | ||
this.ket = other.ket; | ||
}; | ||
|
||
this.in_grouping = function(s, min, max) { | ||
if (this.cursor >= this.limit) return false; | ||
var ch = this.current.charCodeAt(this.cursor); | ||
if (ch > max || ch < min) return false; | ||
ch -= min; | ||
if ((s[ch >>> 3] & (0x1 << (ch & 0x7))) == 0) return false; | ||
this.cursor++; | ||
return true; | ||
}; | ||
|
||
this.in_grouping_b = function(s, min, max) { | ||
if (this.cursor <= this.limit_backward) return false; | ||
var ch = this.current.charCodeAt(this.cursor - 1); | ||
if (ch > max || ch < min) return false; | ||
ch -= min; | ||
if ((s[ch >>> 3] & (0x1 << (ch & 0x7))) == 0) return false; | ||
this.cursor--; | ||
return true; | ||
}; | ||
|
||
this.out_grouping = function(s, min, max) { | ||
if (this.cursor >= this.limit) return false; | ||
var ch = this.current.charCodeAt(this.cursor); | ||
if (ch > max || ch < min) { | ||
this.cursor++; | ||
return true; | ||
} | ||
ch -= min; | ||
if ((s[ch >>> 3] & (0X1 << (ch & 0x7))) == 0) { | ||
this.cursor++; | ||
return true; | ||
} | ||
return false; | ||
}; | ||
|
||
this.out_grouping_b = function(s, min, max) { | ||
if (this.cursor <= this.limit_backward) return false; | ||
var ch = this.current.charCodeAt(this.cursor - 1); | ||
if (ch > max || ch < min) { | ||
this.cursor--; | ||
return true; | ||
} | ||
ch -= min; | ||
if ((s[ch >>> 3] & (0x1 << (ch & 0x7))) == 0) { | ||
this.cursor--; | ||
return true; | ||
} | ||
return false; | ||
}; | ||
|
||
this.eq_s = function(s) | ||
{ | ||
if (this.limit - this.cursor < s.length) return false; | ||
if (this.current.slice(this.cursor, this.cursor + s.length) != s) | ||
{ | ||
return false; | ||
} | ||
this.cursor += s.length; | ||
return true; | ||
}; | ||
|
||
this.eq_s_b = function(s) | ||
{ | ||
if (this.cursor - this.limit_backward < s.length) return false; | ||
if (this.current.slice(this.cursor - s.length, this.cursor) != s) | ||
{ | ||
return false; | ||
} | ||
this.cursor -= s.length; | ||
return true; | ||
}; | ||
|
||
/** @return {number} */ this.find_among = function(v) | ||
{ | ||
var i = 0; | ||
var j = v.length; | ||
|
||
var c = this.cursor; | ||
var l = this.limit; | ||
|
||
var common_i = 0; | ||
var common_j = 0; | ||
|
||
var first_key_inspected = false; | ||
|
||
while (true) | ||
{ | ||
var k = i + ((j - i) >>> 1); | ||
var diff = 0; | ||
var common = common_i < common_j ? common_i : common_j; // smaller | ||
// w[0]: string, w[1]: substring_i, w[2]: result, w[3]: function (optional) | ||
var w = v[k]; | ||
var i2; | ||
for (i2 = common; i2 < w[0].length; i2++) | ||
{ | ||
if (c + common == l) | ||
{ | ||
diff = -1; | ||
break; | ||
} | ||
diff = this.current.charCodeAt(c + common) - w[0].charCodeAt(i2); | ||
if (diff != 0) break; | ||
common++; | ||
} | ||
if (diff < 0) | ||
{ | ||
j = k; | ||
common_j = common; | ||
} | ||
else | ||
{ | ||
i = k; | ||
common_i = common; | ||
} | ||
if (j - i <= 1) | ||
{ | ||
if (i > 0) break; // v->s has been inspected | ||
if (j == i) break; // only one item in v | ||
|
||
// - but now we need to go round once more to get | ||
// v->s inspected. This looks messy, but is actually | ||
// the optimal approach. | ||
|
||
if (first_key_inspected) break; | ||
first_key_inspected = true; | ||
} | ||
} | ||
do { | ||
var w = v[i]; | ||
if (common_i >= w[0].length) | ||
{ | ||
this.cursor = c + w[0].length; | ||
if (w.length < 4) return w[2]; | ||
var res = w[3](this); | ||
this.cursor = c + w[0].length; | ||
if (res) return w[2]; | ||
} | ||
i = w[1]; | ||
} while (i >= 0); | ||
return 0; | ||
}; | ||
|
||
// find_among_b is for backwards processing. Same comments apply | ||
this.find_among_b = function(v) | ||
{ | ||
var i = 0; | ||
var j = v.length | ||
|
||
var c = this.cursor; | ||
var lb = this.limit_backward; | ||
|
||
var common_i = 0; | ||
var common_j = 0; | ||
|
||
var first_key_inspected = false; | ||
|
||
while (true) | ||
{ | ||
var k = i + ((j - i) >> 1); | ||
var diff = 0; | ||
var common = common_i < common_j ? common_i : common_j; | ||
var w = v[k]; | ||
var i2; | ||
for (i2 = w[0].length - 1 - common; i2 >= 0; i2--) | ||
{ | ||
if (c - common == lb) | ||
{ | ||
diff = -1; | ||
break; | ||
} | ||
diff = this.current.charCodeAt(c - 1 - common) - w[0].charCodeAt(i2); | ||
if (diff != 0) break; | ||
common++; | ||
} | ||
if (diff < 0) | ||
{ | ||
j = k; | ||
common_j = common; | ||
} | ||
else | ||
{ | ||
i = k; | ||
common_i = common; | ||
} | ||
if (j - i <= 1) | ||
{ | ||
if (i > 0) break; | ||
if (j == i) break; | ||
if (first_key_inspected) break; | ||
first_key_inspected = true; | ||
} | ||
} | ||
do { | ||
var w = v[i]; | ||
if (common_i >= w[0].length) | ||
{ | ||
this.cursor = c - w[0].length; | ||
if (w.length < 4) return w[2]; | ||
var res = w[3](this); | ||
this.cursor = c - w[0].length; | ||
if (res) return w[2]; | ||
} | ||
i = w[1]; | ||
} while (i >= 0); | ||
return 0; | ||
}; | ||
|
||
/* to replace chars between c_bra and c_ket in this.current by the | ||
* chars in s. | ||
*/ | ||
this.replace_s = function(c_bra, c_ket, s) | ||
{ | ||
var adjustment = s.length - (c_ket - c_bra); | ||
this.current = this.current.slice(0, c_bra) + s + this.current.slice(c_ket); | ||
this.limit += adjustment; | ||
if (this.cursor >= c_ket) this.cursor += adjustment; | ||
else if (this.cursor > c_bra) this.cursor = c_bra; | ||
return adjustment; | ||
}; | ||
|
||
this.slice_check = function() | ||
{ | ||
if (this.bra < 0 || | ||
this.bra > this.ket || | ||
this.ket > this.limit || | ||
this.limit > this.current.length) | ||
{ | ||
return false; | ||
} | ||
return true; | ||
}; | ||
|
||
this.slice_from = function(s) | ||
{ | ||
var result = false; | ||
if (this.slice_check()) | ||
{ | ||
this.replace_s(this.bra, this.ket, s); | ||
result = true; | ||
} | ||
return result; | ||
}; | ||
|
||
this.slice_del = function() | ||
{ | ||
return this.slice_from(""); | ||
}; | ||
|
||
this.insert = function(c_bra, c_ket, s) | ||
{ | ||
var adjustment = this.replace_s(c_bra, c_ket, s); | ||
if (c_bra <= this.bra) this.bra += adjustment; | ||
if (c_bra <= this.ket) this.ket += adjustment; | ||
}; | ||
|
||
this.slice_to = function() | ||
{ | ||
var result = ''; | ||
if (this.slice_check()) | ||
{ | ||
result = this.current.slice(this.bra, this.ket); | ||
} | ||
return result; | ||
}; | ||
|
||
this.assign_to = function() | ||
{ | ||
return this.current.slice(0, this.limit); | ||
}; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.