From b70c3daffe86a9bcd0a65990a6080475d182c0b8 Mon Sep 17 00:00:00 2001
From: Andrew Dolgov <noreply@fakecake.org>
Date: Sat, 26 May 2018 10:25:39 +0300
Subject: [PATCH] af_redditimgur updates: 1. remove special handling for imgur
 albums/pages because its full on cancerous json garbage now with no
 scrapeable layout 2. skip all urls leading to reddit.com while trying to
 embed content 3. implement generic last resort embedding based on meta
 og:image element

---
 plugins/af_redditimgur/init.php | 49 +++++++++++++++++++++++++++++----
 1 file changed, 43 insertions(+), 6 deletions(-)

diff --git a/plugins/af_redditimgur/init.php b/plugins/af_redditimgur/init.php
index 59e3a760..4079a5b3 100755
--- a/plugins/af_redditimgur/init.php
+++ b/plugins/af_redditimgur/init.php
@@ -84,15 +84,16 @@ class Af_RedditImgur extends Plugin {
 		$img_entries = $xpath->query("(//img[@src])");
 
 		$found = false;
+		//$debug = 1;
 
 		foreach ($entries as $entry) {
-			if ($entry->hasAttribute("href")) {
+			if ($entry->hasAttribute("href") && strpos($entry->getAttribute("href"), "reddit.com") === FALSE) {
 
 				_debug("processing href: " . $entry->getAttribute("href"), $debug);
 
 				$matches = array();
 
-				if (preg_match("/^https?:\/\/twitter.com\/(.*?)\/status\/(.*)/", $entry->getAttribute("href"), $matches)) {
+				if (!$found && preg_match("/^https?:\/\/twitter.com\/(.*?)\/status\/(.*)/", $entry->getAttribute("href"), $matches)) {
 					_debug("handling as twitter: " . $matches[1] . " " . $matches[2], $debug);
 
 					$oembed_result = fetch_file_contents("https://publish.twitter.com/oembed?url=" . urlencode($entry->getAttribute("href")));
@@ -285,7 +286,7 @@ class Af_RedditImgur extends Plugin {
 
 				// linked albums & pages
 
-				if (!$found && preg_match("/^https?:\/\/(m\.)?imgur.com\/([^\.\/]+$)/", $entry->getAttribute("href"), $matches) ||
+				/*if (!$found && preg_match("/^https?:\/\/(m\.)?imgur.com\/([^\.\/]+$)/", $entry->getAttribute("href"), $matches) ||
 					preg_match("/^https?:\/\/(m\.)?imgur.com\/(a|album|gallery)\/[^\.]+$/", $entry->getAttribute("href"), $matches)) {
 
 					_debug("Handling as an imgur page/album/gallery", $debug);
@@ -339,7 +340,7 @@ class Af_RedditImgur extends Plugin {
 							if ($debug) print_r($urls);
 						}
 					}
-				}
+				} */
 
 				// wtf is this even
 				if (!$found && preg_match("/^https?:\/\/gyazo\.com\/([^\.\/]+$)/", $entry->getAttribute("href"), $matches)) {
@@ -356,6 +357,41 @@ class Af_RedditImgur extends Plugin {
 
 					$found = true;
 				}
+
+				// let's try meta properties
+				if (!$found) {
+					_debug("looking for meta og:image", $debug);
+
+					$content = fetch_file_contents(["url" => $entry->getAttribute("href"),
+						"http_accept" => "text/*"]);
+
+					if ($content) {
+						$cdoc = new DOMDocument();
+
+						if (@$cdoc->loadHTML($content)) {
+							$cxpath = new DOMXPath($cdoc);
+
+							$og_image = $cxpath->query("//meta[@property='og:image']")->item(0);
+
+							if ($og_image) {
+
+								$og_src = $og_image->getAttribute("content");
+
+								if ($og_src) {
+									$img = $doc->createElement('img');
+									$img->setAttribute("src", $og_src);
+
+									$br = $doc->createElement('br');
+									$entry->parentNode->insertBefore($img, $entry);
+									$entry->parentNode->insertBefore($br, $entry);
+
+									$found = true;
+								}
+							}
+						}
+					}
+				}
+
 			}
 
 			// remove tiny thumbnails
@@ -534,8 +570,9 @@ class Af_RedditImgur extends Plugin {
 
 				if ($content_type && strpos($content_type, "text/html") !== FALSE) {
 
-					$tmp = fetch_file_contents(array("url" => $url,
-						"useragent" => $useragent_compat));
+					$tmp = fetch_file_contents(["url" => $url,
+						"useragent" => $useragent_compat,
+						"http_accept" => "text/html"]);
 
 					if ($debug) _debug("tmplen: " . mb_strlen($tmp));
 
-- 
2.47.3