From: Andrew Dolgov Date: Fri, 22 Jan 2016 22:48:32 +0000 (+0300) Subject: parser: properly support tag subtrees instead of text content for article content X-Git-Tag: 16.3~49 X-Git-Url: https://git.wh0rd.org/?a=commitdiff_plain;h=7d1e15c396f36c261d0c8067fc316f6c3e8e1948;p=tt-rss.git parser: properly support tag subtrees instead of text content for article content --- diff --git a/classes/feeditem/atom.php b/classes/feeditem/atom.php index dfac7149..e132789b 100644 --- a/classes/feeditem/atom.php +++ b/classes/feeditem/atom.php @@ -75,7 +75,7 @@ class FeedItem_Atom extends FeedItem_Common { } } - return $content->nodeValue; + return $this->subtree_or_text($content); } } @@ -95,7 +95,7 @@ class FeedItem_Atom extends FeedItem_Common { } } - return $content->nodeValue; + return $this->subtree_or_text($content); } } diff --git a/classes/feeditem/common.php b/classes/feeditem/common.php index 80bebf8f..070692d7 100644 --- a/classes/feeditem/common.php +++ b/classes/feeditem/common.php @@ -70,6 +70,17 @@ abstract class FeedItem_Common extends FeedItem { } } + function count_children($node) { + return $node->getElementsByTagName("*")->length; + } + + function subtree_or_text($node) { + if ($this->count_children($node) == 0) { + return $node->nodeValue; + } else { + return $node->c14n(); + } + } } ?> diff --git a/classes/feeditem/rss.php b/classes/feeditem/rss.php index 27a364b8..080e4083 100644 --- a/classes/feeditem/rss.php +++ b/classes/feeditem/rss.php @@ -71,17 +71,19 @@ class FeedItem_RSS extends FeedItem_Common { $contentB = $this->elem->getElementsByTagName("description")->item(0); if ($contentA && !$contentB) { - return $contentA->nodeValue; + return $this->subtree_or_text($contentA); } if ($contentB && !$contentA) { - return $contentB->nodeValue; + return $this->subtree_or_text($contentB); } if ($contentA && $contentB) { - return mb_strlen($contentA->nodeValue) > mb_strlen($contentB->nodeValue) ? - $contentA->nodeValue : $contentB->nodeValue; + $resultA = $this->subtree_or_text($contentA); + $resultB = $this->subtree_or_text($contentB); + + return mb_strlen($resultA) > mb_strlen($resultB) ? $resultA : $resultB; } }