]> git.wh0rd.org - tt-rss.git/blame - functions.php
updated schema (md5_hash is not unique any more), feed parsing fixes
[tt-rss.git] / functions.php
CommitLineData
40d13c28
AD
1<?
2 require_once 'config.php';
3
9c9c7e6b 4 function update_all_feeds($link, $fetch) {
40d13c28 5
b82af8c3
AD
6 pg_query("BEGIN");
7
9c9c7e6b
AD
8 if (!$fetch) {
9
10 $result = pg_query($link, "SELECT feed_url,id FROM ttrss_feeds WHERE
11 last_updated is null OR title = '' OR
12 EXTRACT(EPOCH FROM NOW()) - EXTRACT(EPOCH FROM last_updated) > " .
13 MIN_UPDATE_TIME);
14
15 } else {
16
17 $result = pg_query($link, "SELECT feed_url,id FROM ttrss_feeds");
18 }
40d13c28 19
76798ff3
AD
20 $num_unread = 0;
21
40d13c28 22 while ($line = pg_fetch_assoc($result)) {
76798ff3 23 $num_unread += update_rss_feed($link, $line["feed_url"], $line["id"]);
40d13c28
AD
24 }
25
b82af8c3
AD
26 pg_query("COMMIT");
27
40d13c28
AD
28 }
29
30 function update_rss_feed($link, $feed_url, $feed) {
31
3ad5aa85 32 error_reporting(0);
40d13c28 33 $rss = fetch_rss($feed_url);
3ad5aa85 34 error_reporting (E_ERROR | E_WARNING | E_PARSE);
76798ff3
AD
35
36 $num_unread = 0;
40d13c28
AD
37
38 if ($rss) {
b82af8c3 39
331900c6
AD
40 $result = pg_query("SELECT title FROM ttrss_feeds WHERE id = '$feed'");
41
42 $registered_title = pg_fetch_result($result, 0, "title");
43
44 if (!$registered_title) {
331900c6
AD
45 $feed_title = $rss->channel["title"];
46 pg_query("UPDATE ttrss_feeds SET title = '$feed_title' WHERE id = '$feed'");
47 }
40d13c28 48
a2015351
AD
49 pg_query("BEGIN");
50
40d13c28
AD
51 foreach ($rss->items as $item) {
52
53 $entry_guid = $item["id"];
54
55 if (!$entry_guid) $entry_guid = $item["guid"];
56 if (!$entry_guid) $entry_guid = $item["link"];
57
9c9c7e6b 58 $entry_timestamp = "";
b82af8c3 59
9c9c7e6b
AD
60 $rss_2_date = $item['pubdate'];
61 $rss_1_date = $item['dc']['date'];
62 $atom_date = $item['issued'];
b197f117 63
b82af8c3
AD
64 $no_orig_date = 'false';
65
9c9c7e6b
AD
66 if ($atom_date != "") $entry_timestamp = parse_w3cdtf($atom_date);
67 if ($rss_1_date != "") $entry_timestamp = parse_w3cdtf($rss_1_date);
68 if ($rss_2_date != "") $entry_timestamp = strtotime($rss_2_date);
b82af8c3
AD
69// if ($rss_3_date != "") $entry_timestamp = strtotime($rss_3_date);
70
71 if ($entry_timestamp == "") {
72 $entry_timestamp = time();
73 $no_orig_date = 'true';
74 }
b197f117
AD
75
76 if (!$entry_timestamp) continue;
71ad3959 77
40d13c28
AD
78 $entry_title = $item["title"];
79 $entry_link = $item["link"];
71ad3959
AD
80
81 if (!$entry_title) continue;
82 if (!$entry_link) continue;
83
40d13c28
AD
84 $entry_content = $item["description"];
85 if (!$entry_content) $entry_content = $item["content"];
a2015351
AD
86
87 if (!$entry_content) continue;
88
40d13c28
AD
89 $entry_content = pg_escape_string($entry_content);
90 $entry_title = pg_escape_string($entry_title);
91
92 $content_md5 = md5($entry_content);
93
94 $result = pg_query($link, "
95 SELECT
b82af8c3 96 id,unread,md5_hash,last_read,no_orig_date,title,
b197f117 97 EXTRACT(EPOCH FROM updated) as updated_timestamp
40d13c28
AD
98 FROM
99 ttrss_entries
100 WHERE
a2015351 101 guid = '$entry_guid'");
40d13c28
AD
102
103 if (pg_num_rows($result) == 0) {
104
105 $entry_timestamp = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
106
107 $query = "INSERT INTO ttrss_entries
b82af8c3
AD
108 (title, guid, link, updated, content, feed_id,
109 md5_hash, no_orig_date)
40d13c28
AD
110 VALUES
111 ('$entry_title', '$entry_guid', '$entry_link',
112 '$entry_timestamp', '$entry_content', '$feed',
b82af8c3 113 '$content_md5', $no_orig_date)";
40d13c28 114
76798ff3
AD
115 $result = pg_query($link, $query);
116
117 if ($result) ++$num_unread;
40d13c28
AD
118
119 } else {
120
121 $entry_id = pg_fetch_result($result, 0, "id");
b197f117
AD
122 $updated_timestamp = pg_fetch_result($result, 0, "updated_timestamp");
123 $entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
cac95b8d 124 $last_read = pg_fetch_result($result, 0, "last_read");
40d13c28
AD
125
126 $unread = pg_fetch_result($result, 0, "unread");
127 $md5_hash = pg_fetch_result($result, 0, "md5_hash");
b82af8c3
AD
128 $no_orig_date = pg_fetch_result($result, 0, "no_orig_date");
129 $orig_title = pg_fetch_result($result, 0, "title");
cac95b8d 130
b82af8c3
AD
131 // disable update detection for posts which didn't have correct
132 // publishment date, because they will always register as updated
133 // sadly this doesn't catch feed generators which input current date
134 // in posts all the time (some planets do this)
135
136 if ($no_orig_date != 't' && (!$last_read || $md5_hash != $content_md5)) {
137 $last_read_qpart = 'last_read = null,';
cac95b8d 138 } else {
b82af8c3 139 $last_read_qpart = '';
cac95b8d
AD
140 }
141
b82af8c3
AD
142 // mark post as updated on title change
143 // maybe we should mark it as unread instead?
b197f117 144
b82af8c3
AD
145 if ($orig_title != $entry_title) {
146 $last_read_qpart = 'last_read = null,';
147 }
148
149 // don't bother updating timestamps on posts with broken pubDate
150
151 if ($no_orig_date != 't') {
152 $update_timestamp_qpart = "updated = '$entry_timestamp_fmt',";
153 }
b197f117 154
a2015351
AD
155// print "$content_md5 vs $md5_hash [$entry_title vs $orig_title, $entry_id, $feed_id]<br>";
156
157 if ($content_md5 != $md5_hash) {
158 $update_md5_qpart = "md5_hash = '$content_md5',";
159 }
160
40d13c28
AD
161 $query = "UPDATE ttrss_entries
162 SET
163 title ='$entry_title',
164 link = '$entry_link',
b82af8c3
AD
165 $update_timestamp_qpart
166 $last_read_qpart
a2015351 167 $update_md5_qpart
40d13c28 168 content = '$entry_content',
40d13c28
AD
169 unread = '$unread'
170 WHERE
171 id = '$entry_id'";
a2015351
AD
172
173// print "<pre>".htmlspecialchars($query)."</pre>";
174
40d13c28
AD
175 $result = pg_query($link, $query);
176
76798ff3 177 if ($result) ++$num_unread;
40d13c28
AD
178
179 }
a2015351 180
40d13c28
AD
181 }
182
76798ff3
AD
183 if ($result) {
184 $result = pg_query($link, "UPDATE ttrss_feeds SET last_updated = NOW()");
185 }
40d13c28 186
b82af8c3
AD
187 pg_query("COMMIT");
188
40d13c28
AD
189 }
190
191 }
192
193
194
195
196?>