]> git.wh0rd.org - tt-rss.git/commitdiff
implement experimental article on-import data filters
authorAndrew Dolgov <fox@madoka.volgo-balt.ru>
Mon, 26 Nov 2012 10:33:18 +0000 (14:33 +0400)
committerAndrew Dolgov <fox@madoka.volgo-balt.ru>
Mon, 26 Nov 2012 10:33:18 +0000 (14:33 +0400)
classes/filter.php [new file with mode: 0644]
classes/filter/redditimgur.php [new file with mode: 0644]
include/functions.php
include/rssfuncs.php

diff --git a/classes/filter.php b/classes/filter.php
new file mode 100644 (file)
index 0000000..8d6bf6f
--- /dev/null
@@ -0,0 +1,14 @@
+<?php
+class Filter {
+       protected $link;
+
+       function __construct($link) {
+               $this->link = $link;
+       }
+
+       function filter_article($article) {
+               return $article;
+       }
+
+}
+?>
diff --git a/classes/filter/redditimgur.php b/classes/filter/redditimgur.php
new file mode 100644 (file)
index 0000000..6b41015
--- /dev/null
@@ -0,0 +1,47 @@
+<?php
+class Filter_RedditImgur {
+
+       function filter_article($article) {
+
+               if (strpos($article["link"], "reddit.com/r/") !== FALSE) {
+                       if (strpos($article["content"], "i.imgur.com") !== FALSE) {
+
+                               $doc = new DOMDocument();
+                               @$doc->loadHTML($article["content"]);
+
+                               if ($doc) {
+                                       $xpath = new DOMXPath($doc);
+                                       $entries = $xpath->query('(//a[@href]|//img[@src])');
+
+                                       foreach ($entries as $entry) {
+                                               if ($entry->hasAttribute("href")) {
+                                                       if (preg_match("/i.imgur.com\/.*?.jpg/", $entry->getAttribute("href"))) {
+
+                                                               $img = $doc->createElement('img');
+                                                               $img->setAttribute("src", $entry->getAttribute("href"));
+
+                                                               $entry->parentNode->replaceChild($img, $entry);
+                                                       }
+                                               }
+
+                                               // remove tiny thumbnails
+                                               if ($entry->hasAttribute("src")) {
+                                                       if ($entry->parentNode && $entry->parentNode->parentNode) {
+                                                               $entry->parentNode->parentNode->removeChild($entry->parentNode);
+                                                       }
+                                               }
+                                       }
+
+                                       $node = $doc->getElementsByTagName('body')->item(0);
+
+                                       if ($node) {
+                                               $article["content"] = $doc->saveXML($node, LIBXML_NOEMPTYTAG);
+                                       }
+                               }
+                       }
+               }
+
+               return $article;
+       }
+}
+?>
index 55333ccd667e265649c6e2738234a39755dc3ac6..1c3f4cbb9d1e071aee7d0462303dca181ce9e535 100644 (file)
@@ -7,11 +7,19 @@
        function __autoload($class) {
                $class_file = str_replace("_", "/", strtolower(basename($class)));
 
+               $file = dirname(__FILE__)."/../plugins/$class_file.php";
+
+               if (file_exists($file)) {
+                       require $file;
+                       return;
+               }
+
                $file = dirname(__FILE__)."/../classes/$class_file.php";
 
                if (file_exists($file)) {
                        require $file;
                }
+
        }
 
        mb_internal_encoding("UTF-8");
index e12044ea516cc2752af2e5d2a8bbb121c78b6b17..be7e19edcc8516652ca625147d68c22951f9b6df 100644 (file)
                                _debug("update_rss_feed: " . count($filters) . " filters loaded.");
                        }
 
+                       $filter_plugins = array();
+
+                       if (defined('_ARTICLE_FILTER_PLUGINS')) {
+                               foreach (explode(",", _ARTICLE_FILTER_PLUGINS) as $p) {
+                                       $pclass = "filter_" . trim($p);
+
+                                       if (class_exists($pclass)) {
+                                               $plugin = new $pclass($link);
+                                               array_push($filter_plugins, $plugin);
+                                       }
+                               }
+                       }
+
+                       if ($debug_enabled) {
+                               _debug("update_rss_feed: " . count($filter_plugins) . " filter plugins loaded.");
+                       }
+
                        if ($use_simplepie) {
                                $iterator = $rss->get_items();
                        } else {
 
                                $entry_content = db_escape_string($entry_content, false);
 
-                               $content_hash = "SHA1:" . sha1(strip_tags($entry_content));
-
                                $entry_title = db_escape_string($entry_title);
                                $entry_link = db_escape_string($entry_link);
                                $entry_comments = mb_substr(db_escape_string($entry_comments), 0, 250);
                                        _debug("update_rss_feed: done collecting data [TITLE:$entry_title]");
                                }
 
+                               // TODO: less memory-hungry implementation
+                               if (count($filter_plugins) > 0) {
+                                       if ($debug_enabled) {
+                                               _debug("update_rss_feed: applying plugin filters...");
+                                       }
+
+                                       $article = array("owner_uid" => $owner_uid,
+                                               "title" => $entry_title,
+                                               "content" => $entry_content,
+                                               "link" => $entry_link,
+                                               "tags" => $entry_tags,
+                                               "author" => $entry_author);
+
+                                       foreach ($filter_plugins as $plugin) {
+                                               $article = $plugin->filter_article($article);
+                                       }
+
+                                       $entry_title = $article["title"];
+                                       $entry_content = $article["content"];
+                                       $entry_tags = $article["tags"];
+                                       $entry_author = $article["author"];
+                               }
+
+                               $content_hash = "SHA1:" . sha1(strip_tags($entry_content));
+
                                db_query($link, "BEGIN");
 
                                if (db_num_rows($result) == 0) {