]> git.wh0rd.org Git - tt-rss.git/commitdiff
add "extractfeedurls" rpc action that extracts the feed URLs from a HTML page
authorChristian Weiske <cweiske@cweiske.de>
Sun, 7 Nov 2010 14:45:50 +0000 (15:45 +0100)
committerAndrew Dolgov <fox@madoka.volgo-balt.ru>
Mon, 8 Nov 2010 20:10:22 +0000 (23:10 +0300)
functions.php
modules/backend-rpc.php

index ae37e7d84327b1b36edc2f7b313effe9bff0ee77..2373d54356d2215baedafda7d98d79886aad5735 100644 (file)
        function subscribe_to_feed($link, $url, $cat_id = 0, 
                        $auth_login = '', $auth_pass = '') {
 
-               $url   = fix_url($url);
-               $parts = parse_url($url);
-
+               $url = fix_url($url);
                if (!validate_feed_url($url)) return 2;
 
-               if ($parts['scheme'] == 'feed') $parts['scheme'] = 'http';
-
-               $url = make_url_from_parts($parts);
-
                if ($cat_id == "0" || !$cat_id) {
                        $cat_qpart = "NULL";
                } else {
 
        /**
         * Fixes incomplete URLs by prepending "http://".
+        * Also replaces feed:// with http://, and
+        * prepends a trailing slash if the url is a domain name only.
         *
         * @param string $url Possibly incomplete URL
         *
        function fix_url($url) {
                if (strpos($url, '://') === false) {
                        $url = 'http://' . $url;
+               } else if (substr($url, 0, 5) == 'feed:') {
+                       $url = 'http:' . substr($url, 5);
+               }
+
+               //prepend slash if the URL has no slash in it
+               // "http://www.example" -> "http://www.example/"
+               if (strpos($url, '/', 7) === false) {
+                       $url .= '/';
                }
                return $url;
        }
                }
                return false;
        }
+
+       /**
+        * Extracts RSS/Atom feed URLs from the given HTML URL.
+        *
+        * @param string $url HTML page URL
+        *
+        * @return array Array of feeds. Key is the full URL, value the title
+        */
+       function get_feeds_from_html($url)
+       {
+               $url     = fix_url($url);
+               $baseUrl = substr($url, 0, strrpos($url, '/') + 1);
+
+               $doc = new DOMDocument();
+               $doc->loadHTMLFile($url);
+               $xpath = new DOMXPath($doc);
+               $entries = $xpath->query('/html/head/link[@rel="alternate"]');
+               $feedUrls = array();
+               foreach ($entries as $entry) {
+                       if ($entry->hasAttribute('href')) {
+                               $title = $entry->getAttribute('title');
+                               if ($title == '') {
+                                       $title = $entry->getAttribute('type');
+                               }
+                               $feedUrl = $entry->getAttribute('href');
+                               if (strpos($feedUrl, '://') === false) {
+                                       //no protocol -> relative URL
+                                       $feedUrl = $baseUrl . $feedUrl;
+                               }
+                               $feedUrls[$feedUrl] = $title;
+                       }
+               }
+               return $feedUrls;
+       }
+
 ?>
index 7ccb30692bc7c1723c52cb6632be9b4837cb5731..54d636d183e23237df6f73ca4339964872156a84 100644 (file)
 
                }
 
+               if ($subop == "extractfeedurls") {
+                       print "<rpc-reply>";
+
+                       $urls = get_feeds_from_html($_REQUEST['url']);
+                       print "<urls><![CDATA[" . json_encode($urls) . "]]></urls>";
+
+                       print "</rpc-reply>";
+                       return;
+               }
+
                if ($subop == "togglepref") {
                        print "<rpc-reply>";