]>
git.wh0rd.org - tt-rss.git/blob - plugins/af_sort_bayes/lib/class.naivebayesianstorage.php
fccdcaf06eb7c643a1d098e558d5df0240256063
3 ***** BEGIN LICENSE BLOCK *****
4 This file is part of PHP Naive Bayesian Filter.
6 The Initial Developer of the Original Code is
7 Loic d'Anterroches [loic_at_xhtml.net].
8 Portions created by the Initial Developer are Copyright (C) 2003
9 the Initial Developer. All Rights Reserved.
13 PHP Naive Bayesian Filter is free software; you can redistribute it
14 and/or modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of
16 the License, or (at your option) any later version.
18 PHP Naive Bayesian Filter is distributed in the hope that it will
19 be useful, but WITHOUT ANY WARRANTY; without even the implied
20 warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
21 See the GNU General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with Foobar; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 Alternatively, the contents of this file may be used under the terms of
28 the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 in which case the provisions of the LGPL are applicable instead
32 ***** END LICENSE BLOCK *****
35 /** Access to the storage of the data for the filter.
37 To avoid dependency with respect to any database, this class handle all the
38 access to the data storage. You can provide your own class as long as
39 all the methods are available. The current one rely on a MySQL database.
42 - array getCategories()
43 - bool wordExists(string $word)
44 - array getWord(string $word, string $categoryid)
47 class NaiveBayesianStorage
{
49 var $owner_uid = null ;
51 function NaiveBayesianStorage ( $owner_uid ) {
52 $this -> con
= Db
:: get ();
53 $this -> owner_uid
= $owner_uid ;
58 /** get the list of categories with basic data.
60 @return array key = category ids, values = array(keys = 'probability', 'word_count')
62 function getCategories () {
63 $categories = array ();
64 $rs = $this -> con
-> query ( 'SELECT * FROM ttrss_plugin_af_sort_bayes_categories' );
66 while ( $this -> con
-> fetch_assoc ( $rs )) {
67 $categories [ $rs [ 'category_id' ]] = array ( 'probability' => $rs [ 'probability' ],
68 'word_count' => $rs [ 'word_count' ]
77 /** see if the word is an already learnt word.
81 function wordExists ( $word ) {
82 $rs = $this -> con
-> query ( "SELECT * FROM ttrss_plugin_af_sort_bayes_wordfreqs WHERE word='" . $this -> con
-> escape_string ( $word ) . "'" );
84 return $this -> con
-> num_rows ( $rs ) != 0 ;
87 /** get details of a word in a category.
88 @return array ('count' => count)
90 @param string category id
92 function getWord ( $word , $category_id ) {
95 $rs = $this -> con
-> query ( "SELECT * FROM ttrss_plugin_af_sort_bayes_wordfreqs WHERE word='" . $this -> con
-> escape_string ( $word ) . "' AND category_id='" . $this -> con
-> escape_string ( $category_id ) . "'" );
97 if ( $this -> con
-> num_rows ( $rs ) == 0 ) {
98 $details [ 'count' ] = 0 ;
101 $details [ 'count' ] = $rs [ 'count' ];
107 /** update a word in a category.
108 If the word is new in this category it is added, else only the count is updated.
113 @paran string category id
115 function updateWord ( $word , $count , $category_id ) {
116 $oldword = $this -> getWord ( $word , $category_id );
118 if ( 0 == $oldword [ 'count' ]) {
119 return $this -> con
-> execute ( "INSERT INTO ttrss_plugin_af_sort_bayes_wordfreqs (word, category_id, count) VALUES ('" . $this -> con
-> escape_string ( $word ) . "', '" . $this -> con
-> escape_string ( $category_id ) . "', '" . $this -> con
-> escape_string (( int ) $count ) . "')" );
122 return $this -> con
-> execute ( "UPDATE ttrss_plugin_af_sort_bayes_wordfreqs SET count = count + " . ( int ) $count . " WHERE category_id = '" . $this -> con
-> escape_string ( $category_id ) . "' AND word = '" . $this -> con
-> escape_string ( $word ) . "'" );
126 /** remove a word from a category.
131 @param string category id
133 function removeWord ( $word , $count , $category_id ) {
134 $oldword = $this -> getWord ( $word , $category_id );
136 if ( 0 != $oldword [ 'count' ] && 0 >= ( $oldword [ 'count' ] - $count )) {
137 return $this -> con
-> execute ( "DELETE FROM ttrss_plugin_af_sort_bayes_wordfreqs WHERE word='" . $this -> con
-> escape_string ( $word ) . "' AND category_id='" . $this -> con
-> escape_string ( $category_id ) . "'" );
140 return $this -> con
-> execute ( "UPDATE ttrss_plugin_af_sort_bayes_wordfreqs SET count = count - " . ( int ) $count . " WHERE category_id = '" . $this -> con
-> escape_string ( $category_id ) . "' AND word = '" . $this -> con
-> escape_string ( $word ) . "'" );
144 /** update the probabilities of the categories and word count.
145 This function must be run after a set of training
149 function updateProbabilities () {
150 // first update the word count of each category
151 $rs = $this -> con
-> query ( "SELECT category_id, SUM(count) AS total FROM ttrss_plugin_af_sort_bayes_wordfreqs WHERE 1 GROUP BY category_id" );
154 while ( $this -> con
-> fetch_assoc ( $rs )) {
155 $total_words +
= $rs [ 'total' ];
161 if ( $total_words == 0 ) {
162 $this -> con
-> execute ( "UPDATE ttrss_plugin_af_sort_bayes_categories SET word_count=0, probability=0 WHERE 1" );
167 while ( $this -> con
-> fetch_assoc ( $rs )) {
168 $proba = $rs [ 'total' ] / $total_words ;
169 $this -> con
-> execute ( "UPDATE ttrss_plugin_af_sort_bayes_categories SET word_count=" . ( int ) $rs [ 'total' ] . ", probability=" . $proba . " WHERE category_id = '" . $rs [ 'category_id' ] . "'" );
176 /** save a reference in the database.
179 @param string reference if, must be unique
180 @param string category id
181 @param string content of the reference
183 function saveReference ( $doc_id , $category_id , $content ) {
185 return $this -> con
-> execute ( "INSERT INTO ttrss_plugin_af_sort_bayes_references (id, category_id, content) VALUES ('" . $this -> con
-> escape_string ( $doc_id ) . "', '" . $this -> con
-> escape_string ( $category_id ) . "', '" . $this -> con
-> escape_string ( $content ) . "')" );
188 /** get a reference from the database.
190 @return array reference( category_id => ...., content => ....)
193 function getReference ( $doc_id ) {
195 $rs = $this -> con
-> query ( "SELECT * FROM ttrss_plugin_af_sort_bayes_references WHERE id='" . $this -> con
-> escape_string ( $doc_id ) . "'" );
197 if ( $this -> con
-> num_rows ( $rs ) == 0 ) {
201 $ref [ 'category_id' ] = $rs [ 'category_id' ];
202 $ref [ 'content' ] = $rs [ 'content' ];
203 $ref [ 'id' ] = $rs [ 'id' ];
208 /** remove a reference from the database
211 @param string reference id
213 function removeReference ( $doc_id ) {
215 return $this -> con
-> execute ( "DELETE FROM ttrss_plugin_af_sort_bayes_references WHERE id='" . $this -> con
-> escape_string ( $doc_id ) . "'" );