From: Andrew Dolgov Date: Thu, 18 Jun 2015 05:42:17 +0000 (+0300) Subject: add more english common words to bayes ignore list X-Git-Tag: 16.3~285 X-Git-Url: https://git.wh0rd.org/?a=commitdiff_plain;h=ef7395f170caa46519e11f77148457f7aa1cee43;p=tt-rss.git add more english common words to bayes ignore list --- diff --git a/plugins/af_sort_bayes/lib/class.naivebayesian.php b/plugins/af_sort_bayes/lib/class.naivebayesian.php index da81f2aa..4a4ffa7e 100644 --- a/plugins/af_sort_bayes/lib/class.naivebayesian.php +++ b/plugins/af_sort_bayes/lib/class.naivebayesian.php @@ -226,7 +226,18 @@ @return array ignore list */ function getIgnoreList() { - return array('the', 'that', 'you', 'for', 'and'); + //return array('the', 'that', 'you', 'for', 'and'); + + // https://en.wikipedia.org/wiki/Most_common_words_in_English + return array('the', 'be', 'to', 'of', 'and', 'a', 'in', 'that', 'have', 'I', 'it', 'for', 'not', 'on', 'with', + 'he', 'as', 'you', 'do', 'at', 'this', 'but', 'his', 'by', 'from', 'they', 'we', 'say', 'her', + 'she', 'or', 'an', 'will', 'my', 'one', 'all', 'would', 'there', 'their', 'what', 'so', 'up', + 'out', 'if', 'about', 'who', 'get', 'which', 'go', 'me', 'when', 'make', 'can', 'like', 'time', + 'no', 'just', 'him', 'know', 'take', 'people', 'into', 'year', 'your', 'good', 'some', 'could', + 'them', 'see', 'other', 'than', 'then', 'now', 'look', 'only', 'come', 'its', 'over', 'think', + 'also', 'back', 'after', 'use', 'two', 'how', 'our', 'work', 'first', 'well', 'way', 'even', + 'new', 'want', 'because', 'any', 'these', 'give', 'day', 'most', 'us', 'read', 'more'); + } /** get the tokens from a string