]> git.wh0rd.org - tt-rss.git/blame - classes/feeditem/rss.php
parser: use node->c14n() instead of expecting html in nodeValue
[tt-rss.git] / classes / feeditem / rss.php
CommitLineData
04d2f9c8 1<?php
b4d16900 2class FeedItem_RSS extends FeedItem_Common {
04d2f9c8 3 function get_id() {
b09a4cdc
AD
4 $id = $this->elem->getElementsByTagName("guid")->item(0);
5
6 if ($id) {
7 return $id->nodeValue;
8 } else {
9 return $this->get_link();
10 }
04d2f9c8
AD
11 }
12
13 function get_date() {
14 $pubDate = $this->elem->getElementsByTagName("pubDate")->item(0);
15
16 if ($pubDate) {
17 return strtotime($pubDate->nodeValue);
18 }
ce5d234d
AD
19
20 $date = $this->xpath->query("dc:date", $this->elem)->item(0);
21
22 if ($date) {
23 return strtotime($date->nodeValue);
24 }
04d2f9c8
AD
25 }
26
27 function get_link() {
df2655e0 28 $links = $this->xpath->query("atom:link", $this->elem);
04d2f9c8 29
df2655e0
AD
30 foreach ($links as $link) {
31 if ($link && $link->hasAttribute("href") &&
32 (!$link->hasAttribute("rel")
33 || $link->getAttribute("rel") == "alternate"
34 || $link->getAttribute("rel") == "standout")) {
35
31bd6f76 36 return trim($link->getAttribute("href"));
df2655e0 37 }
04d2f9c8 38 }
f7d64d03 39
042003d5
AD
40 $link = $this->elem->getElementsByTagName("guid")->item(0);
41
42 if ($link && $link->hasAttributes() && $link->getAttribute("isPermaLink") == "true") {
31bd6f76 43 return trim($link->nodeValue);
042003d5
AD
44 }
45
2f6b75d5 46 $link = $this->elem->getElementsByTagName("link")->item(0);
f7d64d03
AD
47
48 if ($link) {
31bd6f76 49 return trim($link->nodeValue);
f7d64d03 50 }
04d2f9c8
AD
51 }
52
53 function get_title() {
2b4853f5 54 $title = $this->xpath->query("title", $this->elem)->item(0);
04d2f9c8 55
2b4853f5 56 if ($title) {
57 return trim($title->nodeValue);
04d2f9c8 58 }
206326c2
AD
59
60 // if the document has a default namespace then querying for
61 // title would fail because of reasons so let's try the old way
62 $title = $this->elem->getElementsByTagName("title")->item(0);
63
64 if ($title) {
65 return trim($title->nodeValue);
66 }
04d2f9c8
AD
67 }
68
69 function get_content() {
f6c61b2d
AD
70 $contentA = $this->xpath->query("content:encoded", $this->elem)->item(0);
71 $contentB = $this->elem->getElementsByTagName("description")->item(0);
04d2f9c8 72
f6c61b2d 73 if ($contentA && !$contentB) {
1383514a 74 return $contentA->c14n();
04d2f9c8 75 }
8a95d630 76
f6c61b2d 77 if ($contentB && !$contentA) {
1383514a 78 return $contentB->c14n();
f6c61b2d
AD
79 }
80
81 if ($contentA && $contentB) {
82 return mb_strlen($contentA->nodeValue) > mb_strlen($contentB->nodeValue) ?
1383514a 83 $contentA->c14n() : $contentB->c14n();
8a95d630 84 }
04d2f9c8
AD
85 }
86
87 function get_description() {
88 $summary = $this->elem->getElementsByTagName("description")->item(0);
89
90 if ($summary) {
1383514a 91 return $summary->c14n();
04d2f9c8
AD
92 }
93 }
94
04d2f9c8
AD
95 function get_categories() {
96 $categories = $this->elem->getElementsByTagName("category");
97 $cats = array();
98
99 foreach ($categories as $cat) {
31bd6f76 100 array_push($cats, trim($cat->nodeValue));
04d2f9c8
AD
101 }
102
d4992d6b
AD
103 $categories = $this->xpath->query("dc:subject", $this->elem);
104
105 foreach ($categories as $cat) {
31bd6f76 106 array_push($cats, trim($cat->nodeValue));
d4992d6b
AD
107 }
108
04d2f9c8
AD
109 return $cats;
110 }
111
112 function get_enclosures() {
113 $enclosures = $this->elem->getElementsByTagName("enclosure");
114
115 $encs = array();
116
117 foreach ($enclosures as $enclosure) {
118 $enc = new FeedEnclosure();
119
120 $enc->type = $enclosure->getAttribute("type");
121 $enc->link = $enclosure->getAttribute("url");
122 $enc->length = $enclosure->getAttribute("length");
523bd90b
FE
123 $enc->height = $enclosure->getAttribute("height");
124 $enc->width = $enclosure->getAttribute("width");
04d2f9c8
AD
125
126 array_push($encs, $enc);
127 }
128
ed449a9a 129 $enclosures = $this->xpath->query("media:content", $this->elem);
4289b68f
AD
130
131 foreach ($enclosures as $enclosure) {
132 $enc = new FeedEnclosure();
133
134 $enc->type = $enclosure->getAttribute("type");
135 $enc->link = $enclosure->getAttribute("url");
136 $enc->length = $enclosure->getAttribute("length");
523bd90b
FE
137 $enc->height = $enclosure->getAttribute("height");
138 $enc->width = $enclosure->getAttribute("width");
4289b68f 139
5c54e683
AD
140 $desc = $this->xpath->query("media:description", $enclosure)->item(0);
141 if ($desc) $enc->title = strip_tags($desc->nodeValue);
142
4289b68f
AD
143 array_push($encs, $enc);
144 }
145
ed449a9a
JT
146
147 $enclosures = $this->xpath->query("media:group", $this->elem);
148
149 foreach ($enclosures as $enclosure) {
150 $enc = new FeedEnclosure();
151
152 $content = $this->xpath->query("media:content", $enclosure)->item(0);
153
2ab7ccb6
AD
154 if ($content) {
155 $enc->type = $content->getAttribute("type");
156 $enc->link = $content->getAttribute("url");
157 $enc->length = $content->getAttribute("length");
523bd90b
FE
158 $enc->height = $content->getAttribute("height");
159 $enc->width = $content->getAttribute("width");
2ab7ccb6
AD
160
161 $desc = $this->xpath->query("media:description", $content)->item(0);
162 if ($desc) {
163 $enc->title = strip_tags($desc->nodeValue);
164 } else {
165 $desc = $this->xpath->query("media:description", $enclosure)->item(0);
166 if ($desc) $enc->title = strip_tags($desc->nodeValue);
167 }
168
169 array_push($encs, $enc);
ed449a9a 170 }
ed449a9a
JT
171 }
172
e23aedd4
AD
173 $enclosures = $this->xpath->query("media:thumbnail", $this->elem);
174
175 foreach ($enclosures as $enclosure) {
176 $enc = new FeedEnclosure();
177
178 $enc->type = "image/generic";
179 $enc->link = $enclosure->getAttribute("url");
523bd90b
FE
180 $enc->height = $enclosure->getAttribute("height");
181 $enc->width = $enclosure->getAttribute("width");
e23aedd4
AD
182
183 array_push($encs, $enc);
184 }
185
04d2f9c8
AD
186 return $encs;
187 }
188
04d2f9c8
AD
189}
190?>