]> git.wh0rd.org - tt-rss.git/commitdiff
Check for HTML in the first 100 bytes.
authorChristian Weiske <cweiske@cweiske.de>
Thu, 9 Jul 2015 11:39:07 +0000 (13:39 +0200)
committerChristian Weiske <cweiske@cweiske.de>
Thu, 9 Jul 2015 11:39:07 +0000 (13:39 +0200)
Some HTML pages have an XML header (XHTML), which alone is nearly 50 bytes.
Thus we need to check for the HTML or doctype tags in the first 100 bytes.

include/functions2.php

index e3b6e535cc9259339e43d3cb1a5f563cc1250027..133352f75975c44c210d5437f440b3a584efe14a 100644 (file)
        }
 
        function is_html($content) {
-               return preg_match("/<html|DOCTYPE html/i", substr($content, 0, 20)) !== 0;
+               return preg_match("/<html|DOCTYPE html/i", substr($content, 0, 100)) !== 0;
        }
 
        function url_is_html($url, $login = false, $pass = false) {