add "extractfeedurls" rpc action that extracts the feed URLs from a HTML page

author Christian Weiske <cweiske@cweiske.de>

Sun, 7 Nov 2010 14:45:50 +0000 (15:45 +0100)

committer Andrew Dolgov <fox@madoka.volgo-balt.ru>

Mon, 8 Nov 2010 20:10:22 +0000 (23:10 +0300)
author Christian Weiske <cweiske@cweiske.de>
Sun, 7 Nov 2010 14:45:50 +0000 (15:45 +0100)
committer Andrew Dolgov <fox@madoka.volgo-balt.ru>
Mon, 8 Nov 2010 20:10:22 +0000 (23:10 +0300)
diff --git a/functions.php b/functions.php

index ae37e7d84327b1b36edc2f7b313effe9bff0ee77..2373d54356d2215baedafda7d98d79886aad5735 100644 (file)
--- a/functions.php
+++ b/functions.php
@@ -2943,15 +2943,9 @@
         function subscribe_to_feed($link, $url, $cat_id = 0, 
                         $auth_login = '', $auth_pass = '') {
  
-               $url   = fix_url($url);
-               $parts = parse_url($url);
-
+               $url = fix_url($url);
                 if (!validate_feed_url($url)) return 2;
  
-               if ($parts['scheme'] == 'feed') $parts['scheme'] = 'http';
-
-               $url = make_url_from_parts($parts);
-
                 if ($cat_id == "0" || !$cat_id) {
                         $cat_qpart = "NULL";
                 } else {
@@ -6674,6 +6668,8 @@
  
         /**
          * Fixes incomplete URLs by prepending "http://".
+        * Also replaces feed:// with http://, and
+        * prepends a trailing slash if the url is a domain name only.
          *
          * @param string $url Possibly incomplete URL
          *
@@ -6682,6 +6678,14 @@
         function fix_url($url) {
                 if (strpos($url, '://') === false) {
                         $url = 'http://' . $url;
+               } else if (substr($url, 0, 5) == 'feed:') {
+                       $url = 'http:' . substr($url, 5);
+               }
+
+               //prepend slash if the URL has no slash in it
+               // "http://www.example" -> "http://www.example/"
+               if (strpos($url, '/', 7) === false) {
+                       $url .= '/';
                 }
                 return $url;
         }
@@ -6973,4 +6977,39 @@
                 }
                 return false;
         }
+
+       /**
+        * Extracts RSS/Atom feed URLs from the given HTML URL.
+        *
+        * @param string $url HTML page URL
+        *
+        * @return array Array of feeds. Key is the full URL, value the title
+        */
+       function get_feeds_from_html($url)
+       {
+               $url     = fix_url($url);
+               $baseUrl = substr($url, 0, strrpos($url, '/') + 1);
+
+               $doc = new DOMDocument();
+               $doc->loadHTMLFile($url);
+               $xpath = new DOMXPath($doc);
+               $entries = $xpath->query('/html/head/link[@rel="alternate"]');
+               $feedUrls = array();
+               foreach ($entries as $entry) {
+                       if ($entry->hasAttribute('href')) {
+                               $title = $entry->getAttribute('title');
+                               if ($title == '') {
+                                       $title = $entry->getAttribute('type');
+                               }
+                               $feedUrl = $entry->getAttribute('href');
+                               if (strpos($feedUrl, '://') === false) {
+                                       //no protocol -> relative URL
+                                       $feedUrl = $baseUrl . $feedUrl;
+                               }
+                               $feedUrls[$feedUrl] = $title;
+                       }
+               }
+               return $feedUrls;
+       }
+
  ?>
diff --git a/modules/backend-rpc.php b/modules/backend-rpc.php

index 7ccb30692bc7c1723c52cb6632be9b4837cb5731..54d636d183e23237df6f73ca4339964872156a84 100644 (file)
--- a/modules/backend-rpc.php
+++ b/modules/backend-rpc.php
@@ -123,6 +123,16 @@
  
                 }
  
+               if ($subop == "extractfeedurls") {
+                       print "<rpc-reply>";
+
+                       $urls = get_feeds_from_html($_REQUEST['url']);
+                       print "<urls><![CDATA[" . json_encode($urls) . "]]></urls>";
+
+                       print "</rpc-reply>";
+                       return;
+               }
+
                 if ($subop == "togglepref") {
                         print "<rpc-reply>";
author	Christian Weiske <cweiske@cweiske.de>
	Sun, 7 Nov 2010 14:45:50 +0000 (15:45 +0100)
committer	Andrew Dolgov <fox@madoka.volgo-balt.ru>
	Mon, 8 Nov 2010 20:10:22 +0000 (23:10 +0300)
functions.php		patch \| blob \| history
modules/backend-rpc.php		patch \| blob \| history