]> git.wh0rd.org - tt-rss.git/blobdiff - include/functions.php
split transparent rewriting of locally cached media URLs to execute after both saniti...
[tt-rss.git] / include / functions.php
index 8e9949f75896215c5edc23e13ef817e573b733c9..8acea8ef49e652eda2c24f0a79c7dd8a8be7e8d9 100755 (executable)
@@ -1,6 +1,6 @@
 <?php
        define('EXPECTED_CONFIG_VERSION', 26);
-       define('SCHEMA_VERSION', 133);
+       define('SCHEMA_VERSION', 134);
 
        define('LABEL_BASE_INDEX', -1024);
        define('PLUGIN_FEED_BASE_INDEX', -128);
        // default sleep interval between feed updates (sec)
        define_default('MIN_CACHE_FILE_SIZE', 1024);
        // do not cache files smaller than that (bytes)
+       define_default('MAX_CACHE_FILE_SIZE', 64*1024*1024);
+       // do not cache files larger than that (bytes)
+       define_default('MAX_DOWNLOAD_FILE_SIZE', 16*1024*1024);
+       // do not download general files larger than that (bytes)
        define_default('CACHE_MAX_DAYS', 7);
        // max age in days for various automatically cached (temporary) files
        define_default('MAX_CONDITIONAL_INTERVAL', 3600*12);
                }
        }
 
+       // TODO: max_size currently only works for CURL transfers
        // TODO: multiple-argument way is deprecated, first parameter is a hash now
        function fetch_file_contents($options /* previously: 0: $url , 1: $type = false, 2: $login = false, 3: $pass = false,
                                4: $post_query = false, 5: $timeout = false, 6: $timestamp = 0, 7: $useragent = false*/) {
                $last_modified = isset($options["last_modified"]) ? $options["last_modified"] : "";
                $useragent = isset($options["useragent"]) ? $options["useragent"] : false;
                $followlocation = isset($options["followlocation"]) ? $options["followlocation"] : true;
+               $max_size = isset($options["max_size"]) ? $options["max_size"] : MAX_DOWNLOAD_FILE_SIZE; // in bytes
+               $http_accept = isset($options["http_accept"]) ? $options["http_accept"] : false;
 
                $url = ltrim($url, ' ');
                $url = str_replace(' ', '%20', $url);
 
                        $ch = curl_init($url);
 
-                       if ($last_modified && !$post_query) {
-                               curl_setopt($ch, CURLOPT_HTTPHEADER,
-                                       array("If-Modified-Since: $last_modified"));
-                       }
+                       $curl_http_headers = [];
+
+                       if ($last_modified && !$post_query)
+                               array_push($curl_http_headers, "If-Modified-Since: $last_modified");
+
+                       if ($http_accept)
+                               array_push($curl_http_headers, "Accept: " . $http_accept);
+
+                       if (count($curl_http_headers) > 0)
+                               curl_setopt($ch, CURLOPT_HTTPHEADER, $curl_http_headers);
 
                        curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout ? $timeout : FILE_FETCH_CONNECT_TIMEOUT);
                        curl_setopt($ch, CURLOPT_TIMEOUT, $timeout ? $timeout : FILE_FETCH_TIMEOUT);
                        curl_setopt($ch, CURLOPT_ENCODING, "");
                        //curl_setopt($ch, CURLOPT_REFERER, $url);
 
+                       if ($max_size) {
+                               curl_setopt($ch, CURLOPT_NOPROGRESS, false);
+                               curl_setopt($ch, CURLOPT_BUFFERSIZE, 16384); // needed to get 5 arguments in progress function?
+
+                               // holy shit closures in php
+                               // download & upload are *expected* sizes respectively, could be zero
+                               curl_setopt($ch, CURLOPT_PROGRESSFUNCTION, function($curl_handle, $download_size, $downloaded, $upload_size, $uploaded) use( &$max_size) {
+                                       //_debug("[curl progressfunction] $downloaded $max_size");
+
+                                       return ($downloaded > $max_size) ? 1 : 0; // if max size is set, abort when exceeding it
+                               });
+
+                       }
+
                        if (!ini_get("open_basedir")) {
                                curl_setopt($ch, CURLOPT_COOKIEJAR, "/dev/null");
                        }
 
                         $context_options = array(
                                  'http' => array(
+                                               'header' => array(
+                                                       'Connection: close'
+                                               ),
                                                'method' => 'GET',
                                                'ignore_errors' => true,
                                                'timeout' => $timeout ? $timeout : FILE_FETCH_TIMEOUT,
                                                'protocol_version'=> 1.1)
                                  );
 
-                       if (!$post_query && $last_modified) {
-                               $context_options['http']['header'] = "If-Modified-Since: $last_modified\r\n";
-                       }
+                       if (!$post_query && $last_modified)
+                               array_push($context_options['http']['header'], "If-Modified-Since: $last_modified");
+
+                       if ($http_accept)
+                               array_push($context_options['http']['header'], "Accept: $http_accept");
 
                        if (defined('_HTTP_PROXY')) {
                                $context_options['http']['request_fulluri'] = true;
                $profile = $profile ? $profile : null;
 
                $u_sth = $pdo->prepare("SELECT pref_name
-                       FROM ttrss_user_prefs WHERE owner_uid = :uid AND 
+                       FROM ttrss_user_prefs WHERE owner_uid = :uid AND
                                (profile = :profile OR (:profile IS NULL AND profile IS NULL))");
                $u_sth->execute([':uid' => $uid, ':profile' => $profile]);
 
 
                                /* cleanup ccache */
 
-                               $sth = $pdo->prepare("DELETE FROM ttrss_counters_cache WHERE owner_uid = ? 
+                               $sth = $pdo->prepare("DELETE FROM ttrss_counters_cache WHERE owner_uid = ?
                                        AND
                                                (SELECT COUNT(id) FROM ttrss_feeds WHERE
                                                        ttrss_feeds.id = feed_id) = 0");
 
                                $sth->execute([$_SESSION['uid']]);
 
-                               $sth = $pdo->prepare("DELETE FROM ttrss_cat_counters_cache WHERE owner_uid = ? 
+                               $sth = $pdo->prepare("DELETE FROM ttrss_cat_counters_cache WHERE owner_uid = ?
                                        AND
                                                (SELECT COUNT(id) FROM ttrss_feed_categories WHERE
                                                        ttrss_feed_categories.id = feed_id) = 0");
                $search_query_leftover = array();
 
                $pdo = Db::pdo();
-               
+
                if ($search_language)
                        $search_language = $pdo->quote(mb_strtolower($search_language));
                else
                return false;
        }
 
-       function sanitize($str, $force_remove_images = false, $owner = false, $site_url = false, $highlight_words = false, $article_id = false) {
-               if (!$owner) $owner = $_SESSION["uid"];
-
-               $res = trim($str); if (!$res) return '';
+       // check for locally cached (media) URLs and rewrite to local versions
+       // this is called separately after sanitize() and plugin render article hooks to allow
+       // plugins work on original source URLs used before caching
 
+       function rewrite_cached_urls($str) {
                $charset_hack = '<head>
                                <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
                        </head>';
 
-               $res = trim($res); if (!$res) return '';
-
-               libxml_use_internal_errors(true);
+               $res = trim($str); if (!$res) return '';
 
                $doc = new DOMDocument();
                $doc->loadHTML($charset_hack . $res);
                $xpath = new DOMXPath($doc);
 
-               $rewrite_base_url = $site_url ? $site_url : get_self_url_prefix();
+               $entries = $xpath->query('(//img[@src]|//video/source[@src]|//audio/source[@src])');
 
-               $entries = $xpath->query('(//a[@href]|//img[@src]|//video/source[@src]|//audio/source[@src])');
+               $need_saving = false;
 
                foreach ($entries as $entry) {
 
-                       if ($entry->hasAttribute('href')) {
-                               $entry->setAttribute('href',
-                                       rewrite_relative_url($rewrite_base_url, $entry->getAttribute('href')));
-
-                               $entry->setAttribute('rel', 'noopener noreferrer');
-                       }
-
                        if ($entry->hasAttribute('src')) {
-                               $src = rewrite_relative_url($rewrite_base_url, $entry->getAttribute('src'));
+
+                               // should be already absolutized because this is called after sanitize()
+                               $src = $entry->getAttribute('src');
                                $cached_filename = CACHE_DIR . '/images/' . sha1($src);
 
                                if (file_exists($cached_filename)) {
 
                                        $src = get_self_url_prefix() . '/public.php?op=cached_url&hash=' . sha1($src) . $suffix;
 
-                                       if ($entry->hasAttribute('srcset')) {
-                                               $entry->removeAttribute('srcset');
-                                       }
-
-                                       if ($entry->hasAttribute('sizes')) {
-                                               $entry->removeAttribute('sizes');
-                                       }
+                                       $entry->setAttribute('src', $src);
+                                       $need_saving = true;
                                }
+                       }
+               }
+
+               if ($need_saving) {
+                       $doc->removeChild($doc->firstChild); //remove doctype
+                       $res = $doc->saveHTML();
+               }
+
+               return $res;
+       }
+
+       function sanitize($str, $force_remove_images = false, $owner = false, $site_url = false, $highlight_words = false, $article_id = false) {
+               if (!$owner) $owner = $_SESSION["uid"];
+
+               $res = trim($str); if (!$res) return '';
+
+               $charset_hack = '<head>
+                               <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+                       </head>';
+
+               $res = trim($res); if (!$res) return '';
+
+               libxml_use_internal_errors(true);
+
+               $doc = new DOMDocument();
+               $doc->loadHTML($charset_hack . $res);
+               $xpath = new DOMXPath($doc);
+
+               $rewrite_base_url = $site_url ? $site_url : get_self_url_prefix();
+
+               $entries = $xpath->query('(//a[@href]|//img[@src]|//video/source[@src]|//audio/source[@src])');
+
+               foreach ($entries as $entry) {
+
+                       if ($entry->hasAttribute('href')) {
+                               $entry->setAttribute('href',
+                                       rewrite_relative_url($rewrite_base_url, $entry->getAttribute('href')));
+
+                               $entry->setAttribute('rel', 'noopener noreferrer');
+                       }
+
+                       if ($entry->hasAttribute('src')) {
+                               $src = rewrite_relative_url($rewrite_base_url, $entry->getAttribute('src'));
+
+                               // cache stuff has gone to rewrite_cached_urls()
 
                                $entry->setAttribute('src', $src);
                        }
                        if ($entry->nodeName == 'img') {
                                $entry->setAttribute('referrerpolicy', 'no-referrer');
 
+                               $entry->removeAttribute('width');
+                               $entry->removeAttribute('height');
+
                                if ($entry->hasAttribute('src')) {
                                        $is_https_url = parse_url($entry->getAttribute('src'), PHP_URL_SCHEME) === 'https';
 
                        }
                }
 
-               $allowed_elements = array('a', 'address', 'acronym', 'audio', 'article', 'aside',
+               $allowed_elements = array('a', 'abbr', 'address', 'acronym', 'audio', 'article', 'aside',
                        'b', 'bdi', 'bdo', 'big', 'blockquote', 'body', 'br',
                        'caption', 'cite', 'center', 'code', 'col', 'colgroup',
                        'data', 'dd', 'del', 'details', 'description', 'dfn', 'div', 'dl', 'font',
        }
 
        function tag_is_valid($tag) {
-               if ($tag == '') return false;
-               if (is_numeric($tag)) return false;
-               if (mb_strlen($tag) > 250) return false;
-
-               if (!$tag) return false;
+               if (!$tag || is_numeric($tag) || mb_strlen($tag) > 250)
+                       return false;
 
                return true;
        }
                }
 
                $sth = $pdo->prepare("SELECT id FROM ttrss_feed_categories
-                               WHERE (parent_cat = :parent OR (:parent IS NULL AND parent_cat IS NULL)) 
+                               WHERE (parent_cat = :parent OR (:parent IS NULL AND parent_cat IS NULL))
                                AND title = :title AND owner_uid = :uid");
                $sth->execute([':parent' => $parent_cat_id, ':title' => $feed_cat, ':uid' => $_SESSION['uid']]);
 
        }
 
        function get_minified_js($files) {
-               require_once 'lib/jshrink/Minifier.php';
 
                $rv = '';
 
                        }
 
                        $mimetype = mime_content_type($filename);
+
+                       // this is hardly ideal but 1) only media is cached in images/ and 2) seemingly only mp4
+                       // video files are detected as octet-stream by mime_content_type()
+
+                       if ($mimetype == "application/octet-stream")
+                               $mimetype = "video/mp4";
+
                        header("Content-type: $mimetype");
 
                        $stamp = gmdate("D, d M Y H:i:s", filemtime($filename)) . " GMT";
                        header("Last-Modified: $stamp", true);
 
-                       return readfile($filename);
+                       if (defined('_NGINX_XACCEL_PREFIX') && _NGINX_XACCEL_PREFIX) {
+                               header("X-Accel-Redirect: " . _NGINX_XACCEL_PREFIX . "/" . $filename);
+
+                               return false;
+                       } else {
+                               return readfile($filename);
+                       }
                } else {
                        return false;
                }