$found = true;
}
- if (preg_match("/\.(jpg|jpeg|gif|png)(\?[0-9][0-9]*)?$/i", $entry->getAttribute("href"))) {
+ if (preg_match("/\.(jpg|jpeg|gif|png)(\?[0-9][0-9]*)?$/i", $entry->getAttribute("href")) ||
+ mb_strpos($entry->getAttribute("href"), "i.reddituploads.com") !== FALSE ||
+ mb_strpos($this->get_content_type($entry->getAttribute("href")), "image/") !== FALSE) {
+
_debug("Handling as a picture", $debug);
$img = $doc->createElement('img');
//if ($debug) print_r($album_content);
- $aentries = $axpath->query("(//div[@class='post-image']/img[@src] | //a[@class='zoom']/img[@src])");
+ $aentries = $axpath->query("(//div[@class='post-image']/img[@src] | //a[@class='zoom']/img[@src] | //div[@class='video-elements']/source)");
$urls = [];
foreach ($aentries as $aentry) {
$url = $aentry->getAttribute("src");
if (!in_array($url, $urls)) {
- $img = $doc->createElement('img');
- $img->setAttribute("src", $url);
- $entry->parentNode->insertBefore($doc->createElement('br'), $entry);
- $br = $doc->createElement('br');
+ if ($aentry->tagName == "img") {
- $entry->parentNode->insertBefore($img, $entry);
- $entry->parentNode->insertBefore($br, $entry);
+ $img = $doc->createElement('img');
+ $img->setAttribute("src", $url);
+ $entry->parentNode->insertBefore($doc->createElement('br'), $entry);
+
+ $br = $doc->createElement('br');
+
+ $entry->parentNode->insertBefore($img, $entry);
+ $entry->parentNode->insertBefore($br, $entry);
+ } else if ($aentry->tagName == "source") {
+
+ if (strpos($url, "i.imgur.com") !== FALSE)
+ $poster_url = str_replace(".mp4", "h.jpg", $url);
+ else
+ $poster_url = "";
+
+ $this->handle_as_video($doc, $entry, $url, $poster_url);
+
+ }
array_push($urls, $url);
/* link may lead to a huge video file or whatever, we need to check content type before trying to
parse it which p much requires curl */
- $ch = curl_init($content_link->getAttribute("href"));
- curl_setopt($ch, CURLOPT_TIMEOUT, 5);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
- curl_setopt($ch, CURLOPT_HEADER, true);
- curl_setopt($ch, CURLOPT_NOBODY, true);
- curl_setopt($ch, CURLOPT_FOLLOWLOCATION, !ini_get("open_basedir"));
- curl_setopt($ch, CURLOPT_USERAGENT, SELF_USER_AGENT);
+ $useragent_compat = "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)";
- @$result = curl_exec($ch);
- $content_type = curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
+ $content_type = $this->get_content_type($content_link->getAttribute("href"), $useragent_compat);
if ($content_type && strpos($content_type, "text/html") !== FALSE) {
- $tmp = fetch_file_contents($content_link->getAttribute("href"));
+ $tmp = fetch_file_contents(array("url" => $content_link->getAttribute("href"),
+ "useragent" => $useragent_compat));
//_debug("tmplen: " . mb_strlen($tmp));
- if ($tmp && mb_strlen($tmp) < 65535 * 4) {
+ if ($tmp && mb_strlen($tmp) < 1024 * 250) {
$r = new Readability($tmp, $content_link->getAttribute("href"));
print $doc->saveHTML();
}
+
+ private function get_content_type($url, $useragent = SELF_USER_AGENT) {
+ $content_type = false;
+
+ if (function_exists("curl_init") && !defined("NO_CURL")) {
+ $ch = curl_init($url);
+ curl_setopt($ch, CURLOPT_TIMEOUT, 5);
+ curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
+ curl_setopt($ch, CURLOPT_HEADER, true);
+ curl_setopt($ch, CURLOPT_NOBODY, true);
+ curl_setopt($ch, CURLOPT_FOLLOWLOCATION, !ini_get("open_basedir"));
+ curl_setopt($ch, CURLOPT_USERAGENT, $useragent);
+
+ @$result = curl_exec($ch);
+ $content_type = curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
+ }
+
+ return $content_type;
+ }
}
?>