Skip to content

Commit

Permalink
Closes #23 - use sentence-tokenizer for smart sentence tokenizing.
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrooksuk committed Mar 31, 2015
1 parent 72908d3 commit 91b6772
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 9 deletions.
19 changes: 11 additions & 8 deletions lib/summary.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
var _ = require('lodash-node/underscore');
var Tokenizer = require('sentence-tokenizer');

function splitContentToSentences(content, callback) {
if(content.indexOf('.') === -1) {
return callback(false);
}

content = content.replace("\n", '. ');
callback(content.match(/(.+?\.(?:\s|$))/g) || []);
var tokenizer = new Tokenizer('node-summary');
tokenizer.setEntry(content);

callback(tokenizer.getSentences() || []);
}

function splitContentToParagraphs(content, callback) {
Expand All @@ -15,7 +18,7 @@ function splitContentToParagraphs(content, callback) {

/**
* Original code from http://stackoverflow.com/a/1885660/394013
*/
*/
function intersect_safe(a, b) {
var ai = 0, bi=0;
var result = [];
Expand Down Expand Up @@ -44,7 +47,7 @@ function sentencesIntersection(sent1, sent2, callback) {

var intersect = intersect_safe(s1, s2);
var spliceHere = ((s1.length + s2.length) / 2);

callback(false, intersect.splice(0, spliceHere).length);
}

Expand Down Expand Up @@ -101,15 +104,15 @@ function getSortedSentences(paragraph, sentences_dict, n, callback) {
n = sentence_scores.length;
}
sentence_scores = sentence_scores.slice(0, n);

sentence_scores = _.sortBy(sentence_scores, function(sentence) {
return sentence.order;
});

sorted_sentences = _.pluck(sentence_scores, 'sentence');

callback(sorted_sentences);
});
});
}

function getSentencesRanks(content, callback) {
Expand Down Expand Up @@ -164,7 +167,7 @@ exports.summarize = function(title, content, callback) {
if(sentence) summary.push(sentence);
});
});

// If we only have a title, then there is an issue.
if(sentence.length === 2) err = true;
callback(err, summary.join("\n"));
Expand All @@ -187,4 +190,4 @@ exports.getSortedSentences = function(content, n, callback) {
}
});
});
};
};
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
"algorithm"
],
"dependencies": {
"lodash-node": "2.4.1"
"lodash-node": "2.4.1",
"sentence-tokenizer": "0.0.7"
},
"devDependencies": {
"mocha": "1.15.1",
Expand Down

0 comments on commit 91b6772

Please sign in to comment.