]> git.wh0rd.org - tt-rss.git/blobdiff - classes/feeditem/rss.php
parser: trim some some feed-extracted data link titles and links
[tt-rss.git] / classes / feeditem / rss.php
index e5e2a8e56544626c3dc98b46ef55d2b5c4cb8d09..bf08a1dfefe747a47c1192613d45cbd22eae0998 100644 (file)
@@ -1,13 +1,5 @@
 <?php
-class FeedItem_RSS {
-       private $elem;
-       private $xpath;
-
-       function __construct($elem, $doc, $xpath) {
-               $this->elem = $elem;
-               $this->xpath = $xpath;
-       }
-
+class FeedItem_RSS extends FeedItem_Common {
        function get_id() {
                $id = $this->elem->getElementsByTagName("guid")->item(0);
 
@@ -24,13 +16,37 @@ class FeedItem_RSS {
                if ($pubDate) {
                        return strtotime($pubDate->nodeValue);
                }
+
+               $date = $this->xpath->query("dc:date", $this->elem)->item(0);
+
+               if ($date) {
+                       return strtotime($date->nodeValue);
+               }
        }
 
        function get_link() {
+               $links = $this->xpath->query("atom:link", $this->elem);
+
+               foreach ($links as $link) {
+                       if ($link && $link->hasAttribute("href") &&
+                               (!$link->hasAttribute("rel")
+                                       || $link->getAttribute("rel") == "alternate"
+                                       || $link->getAttribute("rel") == "standout")) {
+
+                               return trim($link->getAttribute("href"));
+                       }
+               }
+
+               $link = $this->elem->getElementsByTagName("guid")->item(0);
+
+               if ($link && $link->hasAttributes() && $link->getAttribute("isPermaLink") == "true") {
+                       return trim($link->nodeValue);
+               }
+
                $link = $this->elem->getElementsByTagName("link")->item(0);
 
                if ($link) {
-                       return $link->nodeValue;
+                       return trim($link->nodeValue);
                }
        }
 
@@ -38,15 +54,26 @@ class FeedItem_RSS {
                $title = $this->elem->getElementsByTagName("title")->item(0);
 
                if ($title) {
-                       return $title->nodeValue;
+                       return trim($title->nodeValue);
                }
        }
 
        function get_content() {
-               $content = $this->elem->getElementsByTagName("description")->item(0);
+               $contentA = $this->xpath->query("content:encoded", $this->elem)->item(0);
+               $contentB = $this->elem->getElementsByTagName("description")->item(0);
+
+               if ($contentA && !$contentB) {
+                       return $contentA->nodeValue;
+               }
+
+
+               if ($contentB && !$contentA) {
+                       return $contentB->nodeValue;
+               }
 
-               if ($content) {
-                       return $content->nodeValue;
+               if ($contentA && $contentB) {
+                       return mb_strlen($contentA->nodeValue) > mb_strlen($contentB->nodeValue) ?
+                               $contentA->nodeValue : $contentB->nodeValue;
                }
        }
 
@@ -58,22 +85,18 @@ class FeedItem_RSS {
                }
        }
 
-       // todo
-       function get_comments_url() {
-
-       }
-
-       // todo
-       function get_comments_count() {
-
-       }
-
        function get_categories() {
                $categories = $this->elem->getElementsByTagName("category");
                $cats = array();
 
                foreach ($categories as $cat) {
-                       array_push($cats, $cat->nodeValue);
+                       array_push($cats, trim($cat->nodeValue));
+               }
+
+               $categories = $this->xpath->query("dc:subject", $this->elem);
+
+               foreach ($categories as $cat) {
+                       array_push($cats, trim($cat->nodeValue));
                }
 
                return $cats;
@@ -96,8 +119,6 @@ class FeedItem_RSS {
 
                $enclosures = $this->xpath->query("media:content", $this->elem);
 
-               $encs = array();
-
                foreach ($enclosures as $enclosure) {
                        $enc = new FeedEnclosure();
 
@@ -105,25 +126,50 @@ class FeedItem_RSS {
                        $enc->link = $enclosure->getAttribute("url");
                        $enc->length = $enclosure->getAttribute("length");
 
+                       $desc = $this->xpath->query("media:description", $enclosure)->item(0);
+                       if ($desc) $enc->title = strip_tags($desc->nodeValue);
+
                        array_push($encs, $enc);
                }
 
-               return $encs;
-       }
 
-       function get_author() {
-               $author = $this->elem->getElementsByTagName("author")->item(0);
+               $enclosures = $this->xpath->query("media:group", $this->elem);
 
-               if ($author) {
-                       $name = $author->getElementsByTagName("name")->item(0);
+               foreach ($enclosures as $enclosure) {
+                       $enc = new FeedEnclosure();
 
-                       if ($name) return $name->nodeValue;
+                       $content = $this->xpath->query("media:content", $enclosure)->item(0);
 
-                       $email = $author->getElementsByTagName("email")->item(0);
+                       if ($content) {
+                               $enc->type = $content->getAttribute("type");
+                               $enc->link = $content->getAttribute("url");
+                               $enc->length = $content->getAttribute("length");
 
-                       if ($email) return $email->nodeValue;
+                               $desc = $this->xpath->query("media:description", $content)->item(0);
+                               if ($desc) {
+                                       $enc->title = strip_tags($desc->nodeValue);
+                               } else {
+                                       $desc = $this->xpath->query("media:description", $enclosure)->item(0);
+                                       if ($desc) $enc->title = strip_tags($desc->nodeValue);
+                               }
 
+                               array_push($encs, $enc);
+                       }
                }
+
+               $enclosures = $this->xpath->query("media:thumbnail", $this->elem);
+
+               foreach ($enclosures as $enclosure) {
+                       $enc = new FeedEnclosure();
+
+                       $enc->type = "image/generic";
+                       $enc->link = $enclosure->getAttribute("url");
+
+                       array_push($encs, $enc);
+               }
+
+               return $encs;
        }
+
 }
 ?>