]>
git.wh0rd.org - tt-rss.git/blob - plugins/af_sort_bayes/lib/class.naivebayesianstorage.php
3 ***** BEGIN LICENSE BLOCK *****
4 This file is part of PHP Naive Bayesian Filter.
6 The Initial Developer of the Original Code is
7 Loic d'Anterroches [loic_at_xhtml.net].
8 Portions created by the Initial Developer are Copyright (C) 2003
9 the Initial Developer. All Rights Reserved.
13 PHP Naive Bayesian Filter is free software; you can redistribute it
14 and/or modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of
16 the License, or (at your option) any later version.
18 PHP Naive Bayesian Filter is distributed in the hope that it will
19 be useful, but WITHOUT ANY WARRANTY; without even the implied
20 warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
21 See the GNU General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with Foobar; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 Alternatively, the contents of this file may be used under the terms of
28 the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 in which case the provisions of the LGPL are applicable instead
32 ***** END LICENSE BLOCK *****
35 /** Access to the storage of the data for the filter.
37 To avoid dependency with respect to any database, this class handle all the
38 access to the data storage. You can provide your own class as long as
39 all the methods are available. The current one rely on a MySQL database.
42 - array getCategories()
43 - bool wordExists(string $word)
44 - array getWord(string $word, string $categoryid)
47 class NaiveBayesianStorage
{
49 var $owner_uid = null ;
51 function NaiveBayesianStorage ( $owner_uid ) {
52 $this -> con
= Db
:: get ();
53 $this -> owner_uid
= $owner_uid ;
58 /** get the list of categories with basic data.
60 @return array key = category ids, values = array(keys = 'probability', 'word_count')
62 function getCategories () {
63 $categories = array ();
64 $rs = $this -> con
-> query ( 'SELECT * FROM ttrss_plugin_af_sort_bayes_categories WHERE owner_uid = ' . $this -> owner_uid
);
66 while ( $line = $this -> con
-> fetch_assoc ( $rs )) {
67 $categories [ $line [ 'id' ]] = array ( 'probability' => $line [ 'probability' ],
68 'category' => $line [ 'category' ],
69 'word_count' => $line [ 'word_count' ]
76 function getCategoryByName ( $category ) {
77 $rs = $this -> con
-> query ( "SELECT id FROM ttrss_plugin_af_sort_bayes_categories WHERE category = '" .
78 $this -> con
-> escape_string ( $category ) . "' AND owner_uid = " . $this -> owner_uid
);
80 if ( $this -> con
-> num_rows ( $rs ) != 0 ) {
81 return $this -> con
-> fetch_result ( $rs , 0 , "id" );
87 /** see if the word is an already learnt word.
91 function wordExists ( $word ) {
92 $rs = $this -> con
-> query ( "SELECT * FROM ttrss_plugin_af_sort_bayes_wordfreqs WHERE word='" . $this -> con
-> escape_string ( $word ) . "' AND
93 owner_uid = " . $this -> owner_uid
);
95 return $this -> con
-> num_rows ( $rs ) != 0 ;
98 /** get details of a word in a category.
99 @return array ('count' => count)
101 @param string category id
103 function getWord ( $word , $category_id ) {
106 $rs = $this -> con
-> query ( "SELECT * FROM ttrss_plugin_af_sort_bayes_wordfreqs WHERE word='" .
107 $this -> con
-> escape_string ( $word ) . "' AND category_id=" . ( int ) $category_id );
109 if ( $this -> con
-> num_rows ( $rs ) == 0 ) {
110 $details [ 'count' ] = 0 ;
112 $details [ 'count' ] = $this -> con
-> fetch_result ( $rs , 0 , "count" );
118 /** update a word in a category.
119 If the word is new in this category it is added, else only the count is updated.
124 @paran string category id
126 function updateWord ( $word , $count , $category_id ) {
127 $oldword = $this -> getWord ( $word , $category_id );
129 if ( 0 == $oldword [ 'count' ]) {
130 return $this -> con
-> query ( "INSERT INTO ttrss_plugin_af_sort_bayes_wordfreqs (word, category_id, count, owner_uid)
131 VALUES ('" . $this -> con
-> escape_string ( $word ) . "', '" .
132 ( int ) $category_id . "', '" .
133 ( int ) $count . "', '" .
134 $this -> owner_uid
. "')" );
137 return $this -> con
-> query ( "UPDATE ttrss_plugin_af_sort_bayes_wordfreqs SET count = count + " . ( int ) $count . " WHERE category_id = '" . $this -> con
-> escape_string ( $category_id ) . "' AND word = '" . $this -> con
-> escape_string ( $word ) . "'" );
141 /** remove a word from a category.
146 @param string category id
148 function removeWord ( $word , $count , $category_id ) {
149 $oldword = $this -> getWord ( $word , $category_id );
151 if ( 0 != $oldword [ 'count' ] && 0 >= ( $oldword [ 'count' ] - $count )) {
152 return $this -> con
-> query ( "DELETE FROM ttrss_plugin_af_sort_bayes_wordfreqs WHERE word='" .
153 $this -> con
-> escape_string ( $word ) . "' AND category_id='" .
154 $this -> con
-> escape_string ( $category_id ) . "'" );
157 return $this -> con
-> query ( "UPDATE ttrss_plugin_af_sort_bayes_wordfreqs SET count = count - " .
158 ( int ) $count . " WHERE category_id = '" . $this -> con
-> escape_string ( $category_id ) . "'
159 AND word = '" . $this -> con
-> escape_string ( $word ) . "'" );
163 /** update the probabilities of the categories and word count.
164 This function must be run after a set of training
168 function updateProbabilities () {
169 // first update the word count of each category
170 $rs = $this -> con
-> query ( "SELECT SUM(count) AS total FROM ttrss_plugin_af_sort_bayes_wordfreqs WHERE owner_uid = " . $this -> owner_uid
);
172 $total_words = $this -> con
-> fetch_result ( $rs , 0 , "total" );
174 if ( $total_words == 0 ) {
175 $this -> con
-> query ( "UPDATE ttrss_plugin_af_sort_bayes_categories SET word_count=0, probability=0 WHERE owner_uid = " . $this -> owner_uid
);
179 $rs = $this -> con
-> query ( "SELECT tc.id AS category_id, SUM(count) AS total FROM ttrss_plugin_af_sort_bayes_categories AS tc
180 LEFT JOIN ttrss_plugin_af_sort_bayes_wordfreqs AS tw ON (tc.id = tw.category_id) WHERE tc.owner_uid = " . $this -> owner_uid
. " GROUP BY tc.id" );
182 while ( $line = $this -> con
-> fetch_assoc ( $rs )) {
184 $proba = ( int ) $line [ 'total' ] / $total_words ;
185 $this -> con
-> query ( "UPDATE ttrss_plugin_af_sort_bayes_categories SET word_count=" . ( int ) $line [ 'total' ] .
186 ", probability=" . $proba . " WHERE id = '" . $line [ 'category_id' ] . "'" );
192 /** save a reference in the database.
195 @param string reference if, must be unique
196 @param string category id
197 @param string content of the reference
199 function saveReference ( $doc_id , $category_id , $content ) {
200 return $this -> con
-> query ( "INSERT INTO ttrss_plugin_af_sort_bayes_references (document_id, category_id, owner_uid) VALUES
201 ('" . $this -> con
-> escape_string ( $doc_id ) . "', '" .
202 ( int ) $category_id . "', " .
203 ( int ) $this -> owner_uid
. ")" );
206 /** get a reference from the database.
208 @return array reference( category_id => ...., content => ....)
211 function getReference ( $doc_id , $include_content = true )
215 $rs = $this -> con
-> query ( "SELECT * FROM ttrss_plugin_af_sort_bayes_references WHERE document_id='" .
216 $this -> con
-> escape_string ( $doc_id ) . "' AND owner_uid = " . $this -> owner_uid
);
218 if ( $this -> con
-> num_rows ( $rs ) == 0 ) {
222 $ref [ 'category_id' ] = $this -> con
-> fetch_result ( $rs , 0 , 'category_id' );
223 $ref [ 'id' ] = $this -> con
-> fetch_result ( $rs , 0 , 'id' );
224 $ref [ 'document_id' ] = $this -> con
-> fetch_result ( $rs , 0 , 'document_id' );
226 if ( $include_content ) {
227 $rs = $this -> con
-> query ( "SELECT content, title FROM ttrss_entries WHERE guid = '" .
228 $this -> con
-> escape_string ( $ref [ 'document_id' ]) . "'" );
230 if ( $this -> con
-> num_rows ( $rs ) != 0 ) {
231 $ref [ 'content' ] = mb_strtolower ( $this -> con
-> fetch_result ( $rs , 0 , 'title' ) . ' ' . strip_tags ( $this -> con
-> fetch_result ( $rs , 0 , 'content' )));
238 /** remove a reference from the database
241 @param string reference id
243 function removeReference ( $doc_id ) {
245 return $this -> con
-> query ( "DELETE FROM ttrss_plugin_af_sort_bayes_references WHERE document_id='" . $this -> con
-> escape_string ( $doc_id ) . "' AND owner_uid = " . $this -> owner_uid
);