]>
Commit | Line | Data |
---|---|---|
19dee1dc AD |
1 | <?php |
2 | ||
3 | /************************************************* | |
4 | ||
5 | Snoopy - the PHP net client | |
6 | Author: Monte Ohrt <monte@ispi.net> | |
7 | Copyright (c): 1999-2000 ispi, all rights reserved | |
8 | Version: 1.0 (plus - see SJM comments below) | |
9 | ||
10 | * This library is free software; you can redistribute it and/or | |
11 | * modify it under the terms of the GNU Lesser General Public | |
12 | * License as published by the Free Software Foundation; either | |
13 | * version 2.1 of the License, or (at your option) any later version. | |
14 | * | |
15 | * This library is distributed in the hope that it will be useful, | |
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18 | * Lesser General Public License for more details. | |
19 | * | |
20 | * You should have received a copy of the GNU Lesser General Public | |
21 | * License along with this library; if not, write to the Free Software | |
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
23 | ||
24 | You may contact the author of Snoopy by e-mail at: | |
25 | monte@ispi.net | |
26 | ||
27 | Or, write to: | |
28 | Monte Ohrt | |
29 | CTO, ispi | |
30 | 237 S. 70th suite 220 | |
31 | Lincoln, NE 68510 | |
32 | ||
33 | The latest version of Snoopy can be obtained from: | |
34 | http://snoopy.sourceforge.com | |
35 | ||
36 | ||
37 | ||
38 | SJM - alpha-grade changes based on the version of Snoopy released with MagpieRSS 0.7 | |
39 | ||
40 | comments to steve@minutillo.com | |
41 | ||
42 | Two additions: | |
43 | ||
44 | 1) If this is PHP 4.3 or greater, and 'openssl' is available, | |
45 | use the PHP built in SSL support for "https" instead of calling curl externally. | |
46 | Use of external curl can still be forced by setting $use_curl = true. | |
47 | ||
48 | ref: http://us2.php.net/fsockopen | |
49 | ||
50 | 2) HTTP Digest Authentication. If you set a username and password, basic auth | |
51 | will be tried first. If that fails, and the server sends back an | |
52 | WWW-Authenticate: Digest header, the request will be retried with the appropriate | |
53 | digest response. Only qop=auth is supported, with MD5 as the algorithm. | |
54 | I realize that sending basic auth first, and then following up with a digest | |
55 | challenge-response kind of defeats the purpose in terms of security. | |
56 | ||
57 | ref: http://www.faqs.org/rfcs/rfc2617.html | |
58 | ||
59 | *************************************************/ | |
60 | ||
61 | class Snoopy | |
62 | { | |
63 | /**** Public variables ****/ | |
64 | ||
65 | /* user definable vars */ | |
66 | ||
67 | var $host = "www.php.net"; // host name we are connecting to | |
68 | var $port = 80; // port we are connecting to | |
69 | var $proxy_host = ""; // proxy host to use | |
70 | var $proxy_port = ""; // proxy port to use | |
71 | var $agent = "Snoopy v1.0"; // agent we masquerade as | |
72 | var $referer = ""; // referer info to pass | |
73 | var $cookies = array(); // array of cookies to pass | |
74 | // $cookies["username"]="joe"; | |
75 | var $rawheaders = array(); // array of raw headers to send | |
76 | // $rawheaders["Content-type"]="text/html"; | |
77 | ||
78 | var $maxredirs = 5; // http redirection depth maximum. 0 = disallow | |
79 | var $lastredirectaddr = ""; // contains address of last redirected address | |
80 | var $offsiteok = true; // allows redirection off-site | |
81 | var $maxframes = 0; // frame content depth maximum. 0 = disallow | |
82 | var $expandlinks = true; // expand links to fully qualified URLs. | |
83 | // this only applies to fetchlinks() | |
84 | // or submitlinks() | |
85 | var $passcookies = true; // pass set cookies back through redirects | |
86 | // NOTE: this currently does not respect | |
87 | // dates, domains or paths. | |
88 | ||
89 | var $user = ""; // user for http authentication | |
90 | var $pass = ""; // password for http authentication | |
91 | ||
92 | // http accept types | |
93 | var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*"; | |
94 | ||
95 | var $results = ""; // where the content is put | |
96 | ||
97 | var $error = ""; // error messages sent here | |
98 | var $response_code = ""; // response code returned from server | |
99 | var $headers = array(); // headers returned from server sent here | |
100 | var $maxlength = 500000; // max return data length (body) | |
101 | var $read_timeout = 0; // timeout on read operations, in seconds | |
102 | // supported only since PHP 4 Beta 4 | |
103 | // set to 0 to disallow timeouts | |
104 | var $timed_out = false; // if a read operation timed out | |
105 | var $status = 0; // http request status | |
106 | ||
107 | var $curl_path = "/usr/bin/curl"; | |
108 | // Snoopy will use cURL for fetching | |
109 | // SSL content if a full system path to | |
110 | // the cURL binary is supplied here. | |
111 | // set to false if you do not have | |
112 | // cURL installed. See http://curl.haxx.se | |
113 | // for details on installing cURL. | |
114 | // Snoopy does *not* use the cURL | |
115 | // library functions built into php, | |
116 | // as these functions are not stable | |
117 | // as of this Snoopy release. | |
118 | ||
119 | // SJM - always use curl for HTTPS requests? | |
120 | var $use_curl = false; | |
121 | ||
122 | ||
123 | // send Accept-encoding: gzip? | |
124 | var $use_gzip = true; | |
125 | ||
126 | /**** Private variables ****/ | |
127 | ||
128 | var $_maxlinelen = 4096; // max line length (headers) | |
129 | ||
130 | var $_scheme = "http"; // default scheme | |
131 | var $_httpmethod = "GET"; // default http request method | |
132 | var $_httpversion = "HTTP/1.0"; // default http request version | |
133 | var $_submit_method = "POST"; // default submit method | |
134 | var $_submit_type = "application/x-www-form-urlencoded"; // default submit type | |
135 | var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type | |
136 | var $_redirectaddr = false; // will be set if page fetched is a redirect | |
137 | var $_redirectdepth = 0; // increments on an http redirect | |
138 | var $_trieddigest = false; // have we tried Digest auth yet? | |
139 | var $_frameurls = array(); // frame src urls | |
140 | var $_framedepth = 0; // increments on frame depth | |
141 | ||
142 | var $_isproxy = false; // set if using a proxy server | |
143 | var $_fp_timeout = 30; // timeout for socket connection | |
144 | ||
145 | /*======================================================================*\ | |
146 | Function: fetch | |
147 | Purpose: fetch the contents of a web page | |
148 | (and possibly other protocols in the | |
149 | future like ftp, nntp, gopher, etc.) | |
150 | Input: $URI the location of the page to fetch | |
151 | Output: $this->results the output text from the fetch | |
152 | \*======================================================================*/ | |
153 | ||
154 | function fetch($URI) | |
155 | { | |
156 | ||
157 | //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS); | |
158 | $URI_PARTS = parse_url($URI); | |
159 | if (!empty($URI_PARTS["user"])) | |
dcffb272 | 160 | $this->user = urldecode($URI_PARTS["user"]); |
19dee1dc | 161 | if (!empty($URI_PARTS["pass"])) |
dcffb272 | 162 | $this->pass = urldecode($URI_PARTS["pass"]); |
19dee1dc AD |
163 | |
164 | $this->_scheme = $URI_PARTS["scheme"]; | |
165 | ||
166 | switch($URI_PARTS["scheme"]) | |
167 | { | |
168 | case "http": | |
169 | case "https": | |
170 | break; | |
171 | ||
172 | default: | |
173 | // not a valid protocol | |
174 | $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n'; | |
175 | return false; | |
176 | } | |
177 | ||
178 | if($URI_PARTS["scheme"] == "https") | |
179 | { | |
180 | // SJM - if they really want curl, or it isn't PHP 4.3 yet, or openssl extension isn't loaded | |
181 | ||
182 | if($use_curl || !function_exists('file_get_contents') || !extension_loaded('openssl')) | |
183 | { | |
184 | if(!$this->curl_path || (!is_executable($this->curl_path))) { | |
185 | $this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n"; | |
186 | return false; | |
187 | } | |
188 | $this->host = $URI_PARTS["host"]; | |
189 | if(!empty($URI_PARTS["port"])) | |
190 | $this->port = $URI_PARTS["port"]; | |
191 | if($this->_isproxy) | |
192 | { | |
193 | // using proxy, send entire URI | |
194 | $this->_curlrequest($URI,$URI,$this->_httpmethod); | |
195 | } | |
196 | else | |
197 | { | |
198 | $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); | |
199 | // no proxy, send only the path | |
200 | $this->_curlrequest($path, $URI, $this->_httpmethod); | |
201 | } | |
202 | ||
203 | if($this->_redirectaddr) | |
204 | { | |
205 | /* url was redirected, check if we've hit the max depth */ | |
206 | if($this->maxredirs > $this->_redirectdepth) | |
207 | { | |
208 | // only follow redirect if it's on this site, or offsiteok is true | |
209 | if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) | |
210 | { | |
211 | /* follow the redirect */ | |
212 | $this->_redirectdepth++; | |
213 | $this->lastredirectaddr=$this->_redirectaddr; | |
214 | $this->fetch($this->_redirectaddr); | |
215 | } | |
216 | } | |
217 | } | |
218 | ||
219 | if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) | |
220 | { | |
221 | $frameurls = $this->_frameurls; | |
222 | $this->_frameurls = array(); | |
223 | ||
224 | while(list(,$frameurl) = each($frameurls)) | |
225 | { | |
226 | if($this->_framedepth < $this->maxframes) | |
227 | { | |
228 | $this->fetch($frameurl); | |
229 | $this->_framedepth++; | |
230 | } | |
231 | else | |
232 | break; | |
233 | } | |
234 | } | |
235 | return true; | |
236 | } | |
237 | } | |
238 | ||
239 | // SJM - else drop through and treat https as http | |
240 | ||
241 | $this->host = $URI_PARTS["host"]; | |
242 | if(!empty($URI_PARTS["port"])) | |
243 | $this->port = $URI_PARTS["port"]; | |
244 | ||
245 | // SJM - if it's https, default the port to 443 | |
246 | if($URI_PARTS["scheme"] == "https") | |
247 | { | |
248 | if(empty($URI_PARTS["port"])) | |
249 | { | |
250 | $this->port = 443; | |
251 | } | |
252 | } | |
253 | ||
254 | if($this->_connect($fp)) | |
255 | { | |
256 | if($this->_isproxy) | |
257 | { | |
258 | // using proxy, send entire URI | |
259 | $this->_httprequest($URI,$fp,$URI,$this->_httpmethod); | |
260 | } | |
261 | else | |
262 | { | |
263 | $path = $URI_PARTS["path"].(isset($URI_PARTS["query"]) ? "?".$URI_PARTS["query"] : ""); | |
264 | // no proxy, send only the path | |
265 | $this->_httprequest($path, $fp, $URI, $this->_httpmethod); | |
266 | } | |
267 | ||
268 | $this->_disconnect($fp); | |
269 | ||
270 | if($this->_redirectaddr) | |
271 | { | |
272 | /* url was redirected, check if we've hit the max depth */ | |
273 | if($this->maxredirs > $this->_redirectdepth) | |
274 | { | |
275 | // only follow redirect if it's on this site, or offsiteok is true | |
276 | if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) | |
277 | { | |
278 | /* follow the redirect */ | |
279 | $this->_redirectdepth++; | |
280 | $this->lastredirectaddr=$this->_redirectaddr; | |
281 | $this->fetch($this->_redirectaddr); | |
282 | } | |
283 | } | |
284 | } | |
285 | ||
286 | if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) | |
287 | { | |
288 | $frameurls = $this->_frameurls; | |
289 | $this->_frameurls = array(); | |
290 | ||
291 | while(list(,$frameurl) = each($frameurls)) | |
292 | { | |
293 | if($this->_framedepth < $this->maxframes) | |
294 | { | |
295 | $this->fetch($frameurl); | |
296 | $this->_framedepth++; | |
297 | } | |
298 | else | |
299 | break; | |
300 | } | |
301 | } | |
302 | } | |
303 | else | |
304 | { | |
305 | return false; | |
306 | } | |
307 | return true; | |
308 | } | |
309 | ||
310 | ||
311 | ||
312 | /*======================================================================*\ | |
313 | Private functions | |
314 | \*======================================================================*/ | |
315 | ||
316 | ||
317 | /*======================================================================*\ | |
318 | Function: _striplinks | |
319 | Purpose: strip the hyperlinks from an html document | |
320 | Input: $document document to strip. | |
321 | Output: $match an array of the links | |
322 | \*======================================================================*/ | |
323 | ||
324 | function _striplinks($document) | |
325 | { | |
326 | preg_match_all("'<\s*a\s+.*href\s*=\s* # find <a href= | |
327 | ([\"\'])? # find single or double quote | |
328 | (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching | |
329 | # quote, otherwise match up to next space | |
330 | 'isx",$document,$links); | |
331 | ||
332 | ||
333 | // catenate the non-empty matches from the conditional subpattern | |
334 | ||
335 | while(list($key,$val) = each($links[2])) | |
336 | { | |
337 | if(!empty($val)) | |
338 | $match[] = $val; | |
339 | } | |
340 | ||
341 | while(list($key,$val) = each($links[3])) | |
342 | { | |
343 | if(!empty($val)) | |
344 | $match[] = $val; | |
345 | } | |
346 | ||
347 | // return the links | |
348 | return $match; | |
349 | } | |
350 | ||
351 | /*======================================================================*\ | |
352 | Function: _stripform | |
353 | Purpose: strip the form elements from an html document | |
354 | Input: $document document to strip. | |
355 | Output: $match an array of the links | |
356 | \*======================================================================*/ | |
357 | ||
358 | function _stripform($document) | |
359 | { | |
360 | preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements); | |
361 | ||
362 | // catenate the matches | |
363 | $match = implode("\r\n",$elements[0]); | |
364 | ||
365 | // return the links | |
366 | return $match; | |
367 | } | |
368 | ||
369 | ||
370 | ||
371 | /*======================================================================*\ | |
372 | Function: _striptext | |
373 | Purpose: strip the text from an html document | |
374 | Input: $document document to strip. | |
375 | Output: $text the resulting text | |
376 | \*======================================================================*/ | |
377 | ||
378 | function _striptext($document) | |
379 | { | |
380 | ||
381 | // I didn't use preg eval (//e) since that is only available in PHP 4.0. | |
382 | // so, list your entities one by one here. I included some of the | |
383 | // more common ones. | |
384 | ||
385 | $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript | |
386 | "'<[\/\!]*?[^<>]*?>'si", // strip out html tags | |
387 | "'([\r\n])[\s]+'", // strip out white space | |
388 | "'&(quote|#34);'i", // replace html entities | |
389 | "'&(amp|#38);'i", | |
390 | "'&(lt|#60);'i", | |
391 | "'&(gt|#62);'i", | |
392 | "'&(nbsp|#160);'i", | |
393 | "'&(iexcl|#161);'i", | |
394 | "'&(cent|#162);'i", | |
395 | "'&(pound|#163);'i", | |
396 | "'&(copy|#169);'i" | |
397 | ); | |
398 | $replace = array( "", | |
399 | "", | |
400 | "\\1", | |
401 | "\"", | |
402 | "&", | |
403 | "<", | |
404 | ">", | |
405 | " ", | |
406 | chr(161), | |
407 | chr(162), | |
408 | chr(163), | |
409 | chr(169)); | |
410 | ||
411 | $text = preg_replace($search,$replace,$document); | |
412 | ||
413 | return $text; | |
414 | } | |
415 | ||
416 | /*======================================================================*\ | |
417 | Function: _expandlinks | |
418 | Purpose: expand each link into a fully qualified URL | |
419 | Input: $links the links to qualify | |
420 | $URI the full URI to get the base from | |
421 | Output: $expandedLinks the expanded links | |
422 | \*======================================================================*/ | |
423 | ||
424 | function _expandlinks($links,$URI) | |
425 | { | |
426 | ||
427 | preg_match("/^[^\?]+/",$URI,$match); | |
428 | ||
429 | $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]); | |
430 | ||
431 | $search = array( "|^http://".preg_quote($this->host)."|i", | |
432 | "|^(?!http://)(\/)?(?!mailto:)|i", | |
433 | "|/\./|", | |
434 | "|/[^\/]+/\.\./|" | |
435 | ); | |
436 | ||
437 | $replace = array( "", | |
438 | $match."/", | |
439 | "/", | |
440 | "/" | |
441 | ); | |
442 | ||
443 | $expandedLinks = preg_replace($search,$replace,$links); | |
444 | ||
445 | return $expandedLinks; | |
446 | } | |
447 | ||
448 | /*======================================================================*\ | |
449 | Function: _httprequest | |
450 | Purpose: go get the http data from the server | |
451 | Input: $url the url to fetch | |
452 | $fp the current open file pointer | |
453 | $URI the full URI | |
454 | $body body contents to send if any (POST) | |
455 | Output: | |
456 | \*======================================================================*/ | |
457 | ||
458 | function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="") | |
459 | { | |
460 | if($this->passcookies && $this->_redirectaddr) | |
461 | $this->setcookies(); | |
462 | ||
463 | $URI_PARTS = parse_url($URI); | |
464 | if(empty($url)) | |
465 | $url = "/"; | |
466 | $headers = $http_method." ".$url." ".$this->_httpversion."\r\n"; | |
467 | if(!empty($this->agent)) | |
468 | $headers .= "User-Agent: ".$this->agent."\r\n"; | |
469 | if(!empty($this->host) && !isset($this->rawheaders['Host'])) | |
470 | $headers .= "Host: ".$this->host."\r\n"; | |
471 | if(!empty($this->accept)) | |
472 | $headers .= "Accept: ".$this->accept."\r\n"; | |
473 | ||
474 | if($this->use_gzip) { | |
475 | // make sure PHP was built with --with-zlib | |
476 | // and we can handle gzipp'ed data | |
477 | if ( function_exists(gzinflate) ) { | |
478 | $headers .= "Accept-encoding: gzip\r\n"; | |
479 | } | |
480 | else { | |
481 | trigger_error( | |
482 | "use_gzip is on, but PHP was built without zlib support.". | |
483 | " Requesting file(s) without gzip encoding.", | |
484 | E_USER_NOTICE); | |
485 | } | |
486 | } | |
487 | ||
488 | if(!empty($this->referer)) | |
489 | $headers .= "Referer: ".$this->referer."\r\n"; | |
490 | if(!empty($this->cookies)) | |
491 | { | |
492 | if(!is_array($this->cookies)) | |
493 | $this->cookies = (array)$this->cookies; | |
494 | ||
495 | reset($this->cookies); | |
496 | if ( count($this->cookies) > 0 ) { | |
497 | $cookie_headers .= 'Cookie: '; | |
498 | foreach ( $this->cookies as $cookieKey => $cookieVal ) { | |
499 | $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; "; | |
500 | } | |
501 | $headers .= substr($cookie_headers,0,-2) . "\r\n"; | |
502 | } | |
503 | } | |
504 | if(!empty($this->rawheaders)) | |
505 | { | |
506 | if(!is_array($this->rawheaders)) | |
507 | $this->rawheaders = (array)$this->rawheaders; | |
508 | while(list($headerKey,$headerVal) = each($this->rawheaders)) | |
2d5a42a3 | 509 | $headers .= $headerKey.": ".$headerVal; |
19dee1dc AD |
510 | } |
511 | if(!empty($content_type)) { | |
512 | $headers .= "Content-type: $content_type"; | |
513 | if ($content_type == "multipart/form-data") | |
514 | $headers .= "; boundary=".$this->_mime_boundary; | |
515 | $headers .= "\r\n"; | |
516 | } | |
517 | if(!empty($body)) | |
518 | $headers .= "Content-length: ".strlen($body)."\r\n"; | |
519 | if(!empty($this->user) || !empty($this->pass)) | |
58e481b4 | 520 | $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n"; |
19dee1dc AD |
521 | |
522 | $headers .= "\r\n"; | |
523 | ||
524 | // set the read timeout if needed | |
525 | if ($this->read_timeout > 0) | |
526 | socket_set_timeout($fp, $this->read_timeout); | |
527 | $this->timed_out = false; | |
528 | ||
529 | fwrite($fp,$headers.$body,strlen($headers.$body)); | |
530 | ||
531 | $this->_redirectaddr = false; | |
532 | unset($this->headers); | |
533 | ||
534 | // content was returned gzip encoded? | |
535 | $is_gzipped = false; | |
536 | ||
537 | while($currentHeader = fgets($fp,$this->_maxlinelen)) | |
538 | { | |
539 | if ($this->read_timeout > 0 && $this->_check_timeout($fp)) | |
540 | { | |
541 | $this->status=-100; | |
542 | return false; | |
543 | } | |
544 | ||
545 | // if($currentHeader == "\r\n") | |
546 | if(preg_match("/^\r?\n$/", $currentHeader) ) | |
547 | break; | |
548 | ||
549 | if(!$this->_tried_digest && preg_match("/^WWW-Authenticate: Digest (.*)/", $currentHeader, $matches)) | |
550 | { | |
551 | // SJM - we got a Digest challenge. Try to respond... | |
552 | ||
553 | $digestheader = $matches[1]; | |
554 | ||
555 | preg_match("/nonce=\"(.*?)\"/", $digestheader, $matches); | |
556 | $nonce = $matches[1]; | |
557 | ||
558 | preg_match("/realm=\"(.*?)\"/", $digestheader, $matches); | |
559 | $realm = $matches[1]; | |
560 | ||
561 | $cnonce = md5(microtime()); | |
562 | ||
563 | $a1 = $this->user . ":" . $realm . ":" . $this->pass; | |
564 | $a2 = $http_method . ":" . $url; | |
565 | ||
566 | $ha1 = md5($a1); | |
567 | $ha2 = md5($a2); | |
568 | ||
569 | $response = md5($ha1 . ":" . $nonce . ":00000001:" . $cnonce . ":auth:" . $ha2); | |
570 | ||
571 | $auth = 'Digest username="' . $this->user . '", '; | |
572 | $auth .= 'realm="' . $realm . '", '; | |
573 | $auth .= 'nonce="' . $nonce . '", '; | |
574 | $auth .= 'uri="' . $url . '", '; | |
575 | $auth .= 'response="' . $response . '", '; | |
576 | $auth .= 'algorithm="MD5", '; | |
577 | $auth .= 'cnonce="' . $cnonce . '", '; | |
578 | $auth .= 'nc=00000001, '; | |
579 | $auth .= 'qop="auth"'; | |
580 | ||
581 | // SJM - treat Digest challenge as a redirect. set flag so we don't keep retrying. | |
582 | ||
583 | $this->_tried_digest = true; | |
584 | ||
2d5a42a3 | 585 | $this->rawheaders["Authorization"]=$auth . "\r\n"; |
19dee1dc AD |
586 | $this->user = ""; |
587 | $this->pass = ""; | |
588 | ||
589 | $this->_redirectaddr = $URI_PARTS['scheme'] . '://' . $this->host . $url; | |
590 | } | |
591 | ||
592 | // if a header begins with Location: or URI:, set the redirect | |
593 | if(preg_match("/^(Location:|URI:)/i",$currentHeader)) | |
594 | { | |
595 | // get URL portion of the redirect | |
596 | preg_match("/^(Location:|URI:)\s+(.*)/",chop($currentHeader),$matches); | |
597 | // look for :// in the Location header to see if hostname is included | |
598 | if(!preg_match("|\:\/\/|",$matches[2])) | |
599 | { | |
600 | // no host in the path, so prepend | |
601 | $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port; | |
602 | // eliminate double slash | |
603 | if(!preg_match("|^/|",$matches[2])) | |
604 | $this->_redirectaddr .= "/".$matches[2]; | |
605 | else | |
606 | $this->_redirectaddr .= $matches[2]; | |
607 | } | |
608 | else | |
609 | $this->_redirectaddr = $matches[2]; | |
610 | } | |
611 | ||
612 | if(preg_match("|^HTTP/|",$currentHeader)) | |
613 | { | |
614 | if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status)) | |
615 | { | |
616 | $this->status= $status[1]; | |
617 | } | |
618 | $this->response_code = $currentHeader; | |
619 | } | |
620 | ||
621 | if (preg_match("/Content-Encoding: gzip/", $currentHeader) ) { | |
622 | $is_gzipped = true; | |
623 | } | |
624 | ||
625 | $this->headers[] = $currentHeader; | |
626 | } | |
627 | ||
628 | # $results = fread($fp, $this->maxlength); | |
629 | $results = ""; | |
630 | while ( $data = fread($fp, $this->maxlength) ) { | |
631 | $results .= $data; | |
632 | if ( | |
633 | strlen($results) > $this->maxlength ) { | |
634 | break; | |
635 | } | |
636 | } | |
637 | ||
638 | // gunzip | |
639 | if ( $is_gzipped ) { | |
640 | // per http://www.php.net/manual/en/function.gzencode.php | |
641 | $results = substr($results, 10); | |
642 | $results = gzinflate($results); | |
643 | } | |
644 | ||
645 | if ($this->read_timeout > 0 && $this->_check_timeout($fp)) | |
646 | { | |
647 | $this->status=-100; | |
648 | return false; | |
649 | } | |
650 | ||
651 | // check if there is a a redirect meta tag | |
652 | ||
653 | if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) | |
654 | { | |
655 | $this->_redirectaddr = $this->_expandlinks($match[1],$URI); | |
656 | } | |
657 | ||
658 | // have we hit our frame depth and is there frame src to fetch? | |
659 | if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match)) | |
660 | { | |
661 | $this->results[] = $results; | |
662 | for($x=0; $x<count($match[1]); $x++) | |
663 | $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host); | |
664 | } | |
665 | // have we already fetched framed content? | |
666 | elseif(is_array($this->results)) | |
667 | $this->results[] = $results; | |
668 | // no framed content | |
669 | else | |
670 | $this->results = $results; | |
671 | ||
672 | return true; | |
673 | } | |
674 | ||
675 | /*======================================================================*\ | |
676 | Function: _curlrequest | |
677 | Purpose: go get the https data from the server using curl | |
678 | Input: $url the url to fetch | |
679 | $URI the full URI | |
680 | $body body contents to send if any (POST) | |
681 | Output: | |
682 | \*======================================================================*/ | |
683 | ||
684 | function _curlrequest($url,$URI,$http_method,$content_type="",$body="") | |
685 | { | |
686 | if($this->passcookies && $this->_redirectaddr) | |
687 | $this->setcookies(); | |
688 | ||
689 | $headers = array(); | |
690 | ||
691 | $URI_PARTS = parse_url($URI); | |
692 | if(empty($url)) | |
693 | $url = "/"; | |
694 | // GET ... header not needed for curl | |
695 | //$headers[] = $http_method." ".$url." ".$this->_httpversion; | |
696 | if(!empty($this->agent)) | |
697 | $headers[] = "User-Agent: ".$this->agent; | |
698 | if(!empty($this->host)) | |
699 | $headers[] = "Host: ".$this->host; | |
700 | if(!empty($this->accept)) | |
701 | $headers[] = "Accept: ".$this->accept; | |
702 | if(!empty($this->referer)) | |
703 | $headers[] = "Referer: ".$this->referer; | |
704 | if(!empty($this->cookies)) | |
705 | { | |
706 | if(!is_array($this->cookies)) | |
707 | $this->cookies = (array)$this->cookies; | |
708 | ||
709 | reset($this->cookies); | |
710 | if ( count($this->cookies) > 0 ) { | |
711 | $cookie_str = 'Cookie: '; | |
712 | foreach ( $this->cookies as $cookieKey => $cookieVal ) { | |
713 | $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; "; | |
714 | } | |
715 | $headers[] = substr($cookie_str,0,-2); | |
716 | } | |
717 | } | |
718 | if(!empty($this->rawheaders)) | |
719 | { | |
720 | if(!is_array($this->rawheaders)) | |
721 | $this->rawheaders = (array)$this->rawheaders; | |
722 | while(list($headerKey,$headerVal) = each($this->rawheaders)) | |
723 | $headers[] = $headerKey.": ".$headerVal; | |
724 | } | |
725 | if(!empty($content_type)) { | |
726 | if ($content_type == "multipart/form-data") | |
727 | $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary; | |
728 | else | |
729 | $headers[] = "Content-type: $content_type"; | |
730 | } | |
731 | if(!empty($body)) | |
732 | $headers[] = "Content-length: ".strlen($body); | |
733 | if(!empty($this->user) || !empty($this->pass)) | |
734 | $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass); | |
735 | ||
736 | for($curr_header = 0; $curr_header < count($headers); $curr_header++) | |
737 | $cmdline_params .= " -H \"".$headers[$curr_header]."\""; | |
738 | ||
739 | if(!empty($body)) | |
740 | $cmdline_params .= " -d \"$body\""; | |
741 | ||
742 | if($this->read_timeout > 0) | |
743 | $cmdline_params .= " -m ".$this->read_timeout; | |
744 | ||
745 | $headerfile = uniqid(time()); | |
746 | ||
747 | # accept self-signed certs | |
748 | ||
749 | // mbi: removed, as it breaks on older cURL's | |
750 | //$cmdline_params .= " -k"; | |
751 | ||
752 | exec($this->curl_path." -D \"/tmp/$headerfile\"".$cmdline_params." ".$URI,$results,$return); | |
753 | ||
754 | if($return) | |
755 | { | |
756 | $this->error = "Error: cURL could not retrieve the document, error $return."; | |
757 | return false; | |
758 | } | |
759 | ||
760 | ||
761 | $results = implode("\r\n",$results); | |
762 | ||
763 | $result_headers = file("/tmp/$headerfile"); | |
764 | ||
765 | $this->_redirectaddr = false; | |
766 | unset($this->headers); | |
767 | ||
768 | for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++) | |
769 | { | |
770 | ||
771 | // if a header begins with Location: or URI:, set the redirect | |
772 | if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader])) | |
773 | { | |
774 | // get URL portion of the redirect | |
775 | preg_match("/^(Location: |URI:)(.*)/",chop($result_headers[$currentHeader]),$matches); | |
776 | // look for :// in the Location header to see if hostname is included | |
777 | if(!preg_match("|\:\/\/|",$matches[2])) | |
778 | { | |
779 | // no host in the path, so prepend | |
780 | $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port; | |
781 | // eliminate double slash | |
782 | if(!preg_match("|^/|",$matches[2])) | |
783 | $this->_redirectaddr .= "/".$matches[2]; | |
784 | else | |
785 | $this->_redirectaddr .= $matches[2]; | |
786 | } | |
787 | else | |
788 | $this->_redirectaddr = $matches[2]; | |
789 | } | |
790 | ||
791 | if(preg_match("|^HTTP/|",$result_headers[$currentHeader])) | |
792 | { | |
793 | $this->response_code = $result_headers[$currentHeader]; | |
794 | if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$this->response_code, $match)) | |
795 | { | |
796 | $this->status= $match[1]; | |
797 | } | |
798 | } | |
799 | $this->headers[] = $result_headers[$currentHeader]; | |
800 | } | |
801 | ||
802 | // check if there is a a redirect meta tag | |
803 | ||
804 | if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) | |
805 | { | |
806 | $this->_redirectaddr = $this->_expandlinks($match[1],$URI); | |
807 | } | |
808 | ||
809 | // have we hit our frame depth and is there frame src to fetch? | |
810 | if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match)) | |
811 | { | |
812 | $this->results[] = $results; | |
813 | for($x=0; $x<count($match[1]); $x++) | |
814 | $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host); | |
815 | } | |
816 | // have we already fetched framed content? | |
817 | elseif(is_array($this->results)) | |
818 | $this->results[] = $results; | |
819 | // no framed content | |
820 | else | |
821 | $this->results = $results; | |
822 | ||
823 | unlink("/tmp/$headerfile"); | |
824 | ||
825 | return true; | |
826 | } | |
827 | ||
828 | /*======================================================================*\ | |
829 | Function: setcookies() | |
830 | Purpose: set cookies for a redirection | |
831 | \*======================================================================*/ | |
832 | ||
833 | function setcookies() | |
834 | { | |
835 | for($x=0; $x<count($this->headers); $x++) | |
836 | { | |
837 | if(preg_match("/^set-cookie:[\s]+([^=]+)=([^;]+)/i", $this->headers[$x],$match)) | |
838 | $this->cookies[$match[1]] = $match[2]; | |
839 | } | |
840 | } | |
841 | ||
842 | ||
843 | /*======================================================================*\ | |
844 | Function: _check_timeout | |
845 | Purpose: checks whether timeout has occurred | |
846 | Input: $fp file pointer | |
847 | \*======================================================================*/ | |
848 | ||
849 | function _check_timeout($fp) | |
850 | { | |
851 | if ($this->read_timeout > 0) { | |
852 | $fp_status = socket_get_status($fp); | |
853 | if ($fp_status["timed_out"]) { | |
854 | $this->timed_out = true; | |
855 | return true; | |
856 | } | |
857 | } | |
858 | return false; | |
859 | } | |
860 | ||
861 | /*======================================================================*\ | |
862 | Function: _connect | |
863 | Purpose: make a socket connection | |
864 | Input: $fp file pointer | |
865 | \*======================================================================*/ | |
866 | ||
867 | function _connect(&$fp) | |
868 | { | |
869 | if(!empty($this->proxy_host) && !empty($this->proxy_port)) | |
870 | { | |
871 | $this->_isproxy = true; | |
872 | $host = $this->proxy_host; | |
873 | $port = $this->proxy_port; | |
874 | } | |
875 | else | |
876 | { | |
877 | $host = $this->host; | |
878 | $port = $this->port; | |
879 | } | |
880 | ||
881 | $this->status = 0; | |
882 | ||
883 | if($this->_scheme == "https") | |
884 | { | |
885 | $host = "ssl://" . $host; | |
886 | } | |
887 | ||
888 | if($fp = fsockopen( | |
889 | $host, | |
890 | $port, | |
891 | $errno, | |
892 | $errstr, | |
893 | $this->_fp_timeout | |
894 | )) | |
895 | { | |
896 | // socket connection succeeded | |
897 | ||
898 | return true; | |
899 | } | |
900 | else | |
901 | { | |
902 | // socket connection failed | |
903 | $this->status = $errno; | |
904 | switch($errno) | |
905 | { | |
906 | case -3: | |
907 | $this->error="socket creation failed (-3)"; | |
908 | case -4: | |
909 | $this->error="dns lookup failure (-4)"; | |
910 | case -5: | |
911 | $this->error="connection refused or timed out (-5)"; | |
912 | default: | |
913 | $this->error="connection failed (".$errno.")"; | |
914 | } | |
915 | return false; | |
916 | } | |
917 | } | |
918 | /*======================================================================*\ | |
919 | Function: _disconnect | |
920 | Purpose: disconnect a socket connection | |
921 | Input: $fp file pointer | |
922 | \*======================================================================*/ | |
923 | ||
924 | function _disconnect($fp) | |
925 | { | |
926 | return(fclose($fp)); | |
927 | } | |
928 | ||
929 | ||
930 | /*======================================================================*\ | |
931 | Function: _prepare_post_body | |
932 | Purpose: Prepare post body according to encoding type | |
933 | Input: $formvars - form variables | |
934 | $formfiles - form upload files | |
935 | Output: post body | |
936 | \*======================================================================*/ | |
937 | ||
938 | function _prepare_post_body($formvars, $formfiles) | |
939 | { | |
940 | settype($formvars, "array"); | |
941 | settype($formfiles, "array"); | |
942 | ||
943 | if (count($formvars) == 0 && count($formfiles) == 0) | |
944 | return; | |
945 | ||
946 | switch ($this->_submit_type) { | |
947 | case "application/x-www-form-urlencoded": | |
948 | reset($formvars); | |
949 | while(list($key,$val) = each($formvars)) { | |
950 | if (is_array($val) || is_object($val)) { | |
951 | while (list($cur_key, $cur_val) = each($val)) { | |
952 | $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&"; | |
953 | } | |
954 | } else | |
955 | $postdata .= urlencode($key)."=".urlencode($val)."&"; | |
956 | } | |
957 | break; | |
958 | ||
959 | case "multipart/form-data": | |
960 | $this->_mime_boundary = "Snoopy".md5(uniqid(microtime())); | |
961 | ||
962 | reset($formvars); | |
963 | while(list($key,$val) = each($formvars)) { | |
964 | if (is_array($val) || is_object($val)) { | |
965 | while (list($cur_key, $cur_val) = each($val)) { | |
966 | $postdata .= "--".$this->_mime_boundary."\r\n"; | |
967 | $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n"; | |
968 | $postdata .= "$cur_val\r\n"; | |
969 | } | |
970 | } else { | |
971 | $postdata .= "--".$this->_mime_boundary."\r\n"; | |
972 | $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n"; | |
973 | $postdata .= "$val\r\n"; | |
974 | } | |
975 | } | |
976 | ||
977 | reset($formfiles); | |
978 | while (list($field_name, $file_names) = each($formfiles)) { | |
979 | settype($file_names, "array"); | |
980 | while (list(, $file_name) = each($file_names)) { | |
981 | if (!is_readable($file_name)) continue; | |
982 | ||
983 | $fp = fopen($file_name, "r"); | |
984 | $file_content = fread($fp, filesize($file_name)); | |
985 | fclose($fp); | |
986 | $base_name = basename($file_name); | |
987 | ||
988 | $postdata .= "--".$this->_mime_boundary."\r\n"; | |
989 | $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n"; | |
990 | $postdata .= "$file_content\r\n"; | |
991 | } | |
992 | } | |
993 | $postdata .= "--".$this->_mime_boundary."--\r\n"; | |
994 | break; | |
995 | } | |
996 | ||
997 | return $postdata; | |
998 | } | |
999 | } | |
1000 | ||
1001 | ?> |