]> git.wh0rd.org - tt-rss.git/blob - lib/magpierss/extlib/Snoopy.class.inc
update translations
[tt-rss.git] / lib / magpierss / extlib / Snoopy.class.inc
1 <?php
2
3 /*************************************************
4
5 Snoopy - the PHP net client
6 Author: Monte Ohrt <monte@ispi.net>
7 Copyright (c): 1999-2000 ispi, all rights reserved
8 Version: 1.0 (plus - see SJM comments below)
9
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
24 You may contact the author of Snoopy by e-mail at:
25 monte@ispi.net
26
27 Or, write to:
28 Monte Ohrt
29 CTO, ispi
30 237 S. 70th suite 220
31 Lincoln, NE 68510
32
33 The latest version of Snoopy can be obtained from:
34 http://snoopy.sourceforge.com
35
36
37
38 SJM - alpha-grade changes based on the version of Snoopy released with MagpieRSS 0.7
39
40 comments to steve@minutillo.com
41
42 Two additions:
43
44 1) If this is PHP 4.3 or greater, and 'openssl' is available,
45 use the PHP built in SSL support for "https" instead of calling curl externally.
46 Use of external curl can still be forced by setting $use_curl = true.
47
48 ref: http://us2.php.net/fsockopen
49
50 2) HTTP Digest Authentication. If you set a username and password, basic auth
51 will be tried first. If that fails, and the server sends back an
52 WWW-Authenticate: Digest header, the request will be retried with the appropriate
53 digest response. Only qop=auth is supported, with MD5 as the algorithm.
54 I realize that sending basic auth first, and then following up with a digest
55 challenge-response kind of defeats the purpose in terms of security.
56
57 ref: http://www.faqs.org/rfcs/rfc2617.html
58
59 *************************************************/
60
61 class Snoopy
62 {
63 /**** Public variables ****/
64
65 /* user definable vars */
66
67 var $host = "www.php.net"; // host name we are connecting to
68 var $port = 80; // port we are connecting to
69 var $proxy_host = ""; // proxy host to use
70 var $proxy_port = ""; // proxy port to use
71 var $agent = "Snoopy v1.0"; // agent we masquerade as
72 var $referer = ""; // referer info to pass
73 var $cookies = array(); // array of cookies to pass
74 // $cookies["username"]="joe";
75 var $rawheaders = array(); // array of raw headers to send
76 // $rawheaders["Content-type"]="text/html";
77
78 var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
79 var $lastredirectaddr = ""; // contains address of last redirected address
80 var $offsiteok = true; // allows redirection off-site
81 var $maxframes = 0; // frame content depth maximum. 0 = disallow
82 var $expandlinks = true; // expand links to fully qualified URLs.
83 // this only applies to fetchlinks()
84 // or submitlinks()
85 var $passcookies = true; // pass set cookies back through redirects
86 // NOTE: this currently does not respect
87 // dates, domains or paths.
88
89 var $user = ""; // user for http authentication
90 var $pass = ""; // password for http authentication
91
92 // http accept types
93 var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
94
95 var $results = ""; // where the content is put
96
97 var $error = ""; // error messages sent here
98 var $response_code = ""; // response code returned from server
99 var $headers = array(); // headers returned from server sent here
100 var $maxlength = 500000; // max return data length (body)
101 var $read_timeout = 0; // timeout on read operations, in seconds
102 // supported only since PHP 4 Beta 4
103 // set to 0 to disallow timeouts
104 var $timed_out = false; // if a read operation timed out
105 var $status = 0; // http request status
106
107 var $curl_path = "/usr/bin/curl";
108 // Snoopy will use cURL for fetching
109 // SSL content if a full system path to
110 // the cURL binary is supplied here.
111 // set to false if you do not have
112 // cURL installed. See http://curl.haxx.se
113 // for details on installing cURL.
114 // Snoopy does *not* use the cURL
115 // library functions built into php,
116 // as these functions are not stable
117 // as of this Snoopy release.
118
119 // SJM - always use curl for HTTPS requests?
120 var $use_curl = false;
121
122
123 // send Accept-encoding: gzip?
124 var $use_gzip = true;
125
126 /**** Private variables ****/
127
128 var $_maxlinelen = 4096; // max line length (headers)
129
130 var $_scheme = "http"; // default scheme
131 var $_httpmethod = "GET"; // default http request method
132 var $_httpversion = "HTTP/1.0"; // default http request version
133 var $_submit_method = "POST"; // default submit method
134 var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
135 var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
136 var $_redirectaddr = false; // will be set if page fetched is a redirect
137 var $_redirectdepth = 0; // increments on an http redirect
138 var $_trieddigest = false; // have we tried Digest auth yet?
139 var $_frameurls = array(); // frame src urls
140 var $_framedepth = 0; // increments on frame depth
141
142 var $_isproxy = false; // set if using a proxy server
143 var $_fp_timeout = 30; // timeout for socket connection
144
145 /*======================================================================*\
146 Function: fetch
147 Purpose: fetch the contents of a web page
148 (and possibly other protocols in the
149 future like ftp, nntp, gopher, etc.)
150 Input: $URI the location of the page to fetch
151 Output: $this->results the output text from the fetch
152 \*======================================================================*/
153
154 function fetch($URI)
155 {
156
157 //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
158 $URI_PARTS = parse_url($URI);
159 if (!empty($URI_PARTS["user"]))
160 $this->user = urldecode($URI_PARTS["user"]);
161 if (!empty($URI_PARTS["pass"]))
162 $this->pass = urldecode($URI_PARTS["pass"]);
163
164 $this->_scheme = $URI_PARTS["scheme"];
165
166 switch($URI_PARTS["scheme"])
167 {
168 case "http":
169 case "https":
170 break;
171
172 default:
173 // not a valid protocol
174 $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
175 return false;
176 }
177
178 if($URI_PARTS["scheme"] == "https")
179 {
180 // SJM - if they really want curl, or it isn't PHP 4.3 yet, or openssl extension isn't loaded
181
182 if($use_curl || !function_exists('file_get_contents') || !extension_loaded('openssl'))
183 {
184 if(!$this->curl_path || (!is_executable($this->curl_path))) {
185 $this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n";
186 return false;
187 }
188 $this->host = $URI_PARTS["host"];
189 if(!empty($URI_PARTS["port"]))
190 $this->port = $URI_PARTS["port"];
191 if($this->_isproxy)
192 {
193 // using proxy, send entire URI
194 $this->_curlrequest($URI,$URI,$this->_httpmethod);
195 }
196 else
197 {
198 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
199 // no proxy, send only the path
200 $this->_curlrequest($path, $URI, $this->_httpmethod);
201 }
202
203 if($this->_redirectaddr)
204 {
205 /* url was redirected, check if we've hit the max depth */
206 if($this->maxredirs > $this->_redirectdepth)
207 {
208 // only follow redirect if it's on this site, or offsiteok is true
209 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
210 {
211 /* follow the redirect */
212 $this->_redirectdepth++;
213 $this->lastredirectaddr=$this->_redirectaddr;
214 $this->fetch($this->_redirectaddr);
215 }
216 }
217 }
218
219 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
220 {
221 $frameurls = $this->_frameurls;
222 $this->_frameurls = array();
223
224 while(list(,$frameurl) = each($frameurls))
225 {
226 if($this->_framedepth < $this->maxframes)
227 {
228 $this->fetch($frameurl);
229 $this->_framedepth++;
230 }
231 else
232 break;
233 }
234 }
235 return true;
236 }
237 }
238
239 // SJM - else drop through and treat https as http
240
241 $this->host = $URI_PARTS["host"];
242 if(!empty($URI_PARTS["port"]))
243 $this->port = $URI_PARTS["port"];
244
245 // SJM - if it's https, default the port to 443
246 if($URI_PARTS["scheme"] == "https")
247 {
248 if(empty($URI_PARTS["port"]))
249 {
250 $this->port = 443;
251 }
252 }
253
254 if($this->_connect($fp))
255 {
256 if($this->_isproxy)
257 {
258 // using proxy, send entire URI
259 $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
260 }
261 else
262 {
263 $path = $URI_PARTS["path"].(isset($URI_PARTS["query"]) ? "?".$URI_PARTS["query"] : "");
264 // no proxy, send only the path
265 $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
266 }
267
268 $this->_disconnect($fp);
269
270 if($this->_redirectaddr)
271 {
272 /* url was redirected, check if we've hit the max depth */
273 if($this->maxredirs > $this->_redirectdepth)
274 {
275 // only follow redirect if it's on this site, or offsiteok is true
276 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
277 {
278 /* follow the redirect */
279 $this->_redirectdepth++;
280 $this->lastredirectaddr=$this->_redirectaddr;
281 $this->fetch($this->_redirectaddr);
282 }
283 }
284 }
285
286 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
287 {
288 $frameurls = $this->_frameurls;
289 $this->_frameurls = array();
290
291 while(list(,$frameurl) = each($frameurls))
292 {
293 if($this->_framedepth < $this->maxframes)
294 {
295 $this->fetch($frameurl);
296 $this->_framedepth++;
297 }
298 else
299 break;
300 }
301 }
302 }
303 else
304 {
305 return false;
306 }
307 return true;
308 }
309
310
311
312 /*======================================================================*\
313 Private functions
314 \*======================================================================*/
315
316
317 /*======================================================================*\
318 Function: _striplinks
319 Purpose: strip the hyperlinks from an html document
320 Input: $document document to strip.
321 Output: $match an array of the links
322 \*======================================================================*/
323
324 function _striplinks($document)
325 {
326 preg_match_all("'<\s*a\s+.*href\s*=\s* # find <a href=
327 ([\"\'])? # find single or double quote
328 (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching
329 # quote, otherwise match up to next space
330 'isx",$document,$links);
331
332
333 // catenate the non-empty matches from the conditional subpattern
334
335 while(list($key,$val) = each($links[2]))
336 {
337 if(!empty($val))
338 $match[] = $val;
339 }
340
341 while(list($key,$val) = each($links[3]))
342 {
343 if(!empty($val))
344 $match[] = $val;
345 }
346
347 // return the links
348 return $match;
349 }
350
351 /*======================================================================*\
352 Function: _stripform
353 Purpose: strip the form elements from an html document
354 Input: $document document to strip.
355 Output: $match an array of the links
356 \*======================================================================*/
357
358 function _stripform($document)
359 {
360 preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
361
362 // catenate the matches
363 $match = implode("\r\n",$elements[0]);
364
365 // return the links
366 return $match;
367 }
368
369
370
371 /*======================================================================*\
372 Function: _striptext
373 Purpose: strip the text from an html document
374 Input: $document document to strip.
375 Output: $text the resulting text
376 \*======================================================================*/
377
378 function _striptext($document)
379 {
380
381 // I didn't use preg eval (//e) since that is only available in PHP 4.0.
382 // so, list your entities one by one here. I included some of the
383 // more common ones.
384
385 $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
386 "'<[\/\!]*?[^<>]*?>'si", // strip out html tags
387 "'([\r\n])[\s]+'", // strip out white space
388 "'&(quote|#34);'i", // replace html entities
389 "'&(amp|#38);'i",
390 "'&(lt|#60);'i",
391 "'&(gt|#62);'i",
392 "'&(nbsp|#160);'i",
393 "'&(iexcl|#161);'i",
394 "'&(cent|#162);'i",
395 "'&(pound|#163);'i",
396 "'&(copy|#169);'i"
397 );
398 $replace = array( "",
399 "",
400 "\\1",
401 "\"",
402 "&",
403 "<",
404 ">",
405 " ",
406 chr(161),
407 chr(162),
408 chr(163),
409 chr(169));
410
411 $text = preg_replace($search,$replace,$document);
412
413 return $text;
414 }
415
416 /*======================================================================*\
417 Function: _expandlinks
418 Purpose: expand each link into a fully qualified URL
419 Input: $links the links to qualify
420 $URI the full URI to get the base from
421 Output: $expandedLinks the expanded links
422 \*======================================================================*/
423
424 function _expandlinks($links,$URI)
425 {
426
427 preg_match("/^[^\?]+/",$URI,$match);
428
429 $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
430
431 $search = array( "|^http://".preg_quote($this->host)."|i",
432 "|^(?!http://)(\/)?(?!mailto:)|i",
433 "|/\./|",
434 "|/[^\/]+/\.\./|"
435 );
436
437 $replace = array( "",
438 $match."/",
439 "/",
440 "/"
441 );
442
443 $expandedLinks = preg_replace($search,$replace,$links);
444
445 return $expandedLinks;
446 }
447
448 /*======================================================================*\
449 Function: _httprequest
450 Purpose: go get the http data from the server
451 Input: $url the url to fetch
452 $fp the current open file pointer
453 $URI the full URI
454 $body body contents to send if any (POST)
455 Output:
456 \*======================================================================*/
457
458 function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
459 {
460 if($this->passcookies && $this->_redirectaddr)
461 $this->setcookies();
462
463 $URI_PARTS = parse_url($URI);
464 if(empty($url))
465 $url = "/";
466 $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
467 if(!empty($this->agent))
468 $headers .= "User-Agent: ".$this->agent."\r\n";
469 if(!empty($this->host) && !isset($this->rawheaders['Host']))
470 $headers .= "Host: ".$this->host."\r\n";
471 if(!empty($this->accept))
472 $headers .= "Accept: ".$this->accept."\r\n";
473
474 if($this->use_gzip) {
475 // make sure PHP was built with --with-zlib
476 // and we can handle gzipp'ed data
477 if ( function_exists(gzinflate) ) {
478 $headers .= "Accept-encoding: gzip\r\n";
479 }
480 else {
481 trigger_error(
482 "use_gzip is on, but PHP was built without zlib support.".
483 " Requesting file(s) without gzip encoding.",
484 E_USER_NOTICE);
485 }
486 }
487
488 if(!empty($this->referer))
489 $headers .= "Referer: ".$this->referer."\r\n";
490 if(!empty($this->cookies))
491 {
492 if(!is_array($this->cookies))
493 $this->cookies = (array)$this->cookies;
494
495 reset($this->cookies);
496 if ( count($this->cookies) > 0 ) {
497 $cookie_headers .= 'Cookie: ';
498 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
499 $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
500 }
501 $headers .= substr($cookie_headers,0,-2) . "\r\n";
502 }
503 }
504 if(!empty($this->rawheaders))
505 {
506 if(!is_array($this->rawheaders))
507 $this->rawheaders = (array)$this->rawheaders;
508 while(list($headerKey,$headerVal) = each($this->rawheaders))
509 $headers .= $headerKey.": ".$headerVal;
510 }
511 if(!empty($content_type)) {
512 $headers .= "Content-type: $content_type";
513 if ($content_type == "multipart/form-data")
514 $headers .= "; boundary=".$this->_mime_boundary;
515 $headers .= "\r\n";
516 }
517 if(!empty($body))
518 $headers .= "Content-length: ".strlen($body)."\r\n";
519 if(!empty($this->user) || !empty($this->pass))
520 $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
521
522 $headers .= "\r\n";
523
524 // set the read timeout if needed
525 if ($this->read_timeout > 0)
526 socket_set_timeout($fp, $this->read_timeout);
527 $this->timed_out = false;
528
529 fwrite($fp,$headers.$body,strlen($headers.$body));
530
531 $this->_redirectaddr = false;
532 unset($this->headers);
533
534 // content was returned gzip encoded?
535 $is_gzipped = false;
536
537 while($currentHeader = fgets($fp,$this->_maxlinelen))
538 {
539 if ($this->read_timeout > 0 && $this->_check_timeout($fp))
540 {
541 $this->status=-100;
542 return false;
543 }
544
545 // if($currentHeader == "\r\n")
546 if(preg_match("/^\r?\n$/", $currentHeader) )
547 break;
548
549 if(!$this->_tried_digest && preg_match("/^WWW-Authenticate: Digest (.*)/", $currentHeader, $matches))
550 {
551 // SJM - we got a Digest challenge. Try to respond...
552
553 $digestheader = $matches[1];
554
555 preg_match("/nonce=\"(.*?)\"/", $digestheader, $matches);
556 $nonce = $matches[1];
557
558 preg_match("/realm=\"(.*?)\"/", $digestheader, $matches);
559 $realm = $matches[1];
560
561 $cnonce = md5(microtime());
562
563 $a1 = $this->user . ":" . $realm . ":" . $this->pass;
564 $a2 = $http_method . ":" . $url;
565
566 $ha1 = md5($a1);
567 $ha2 = md5($a2);
568
569 $response = md5($ha1 . ":" . $nonce . ":00000001:" . $cnonce . ":auth:" . $ha2);
570
571 $auth = 'Digest username="' . $this->user . '", ';
572 $auth .= 'realm="' . $realm . '", ';
573 $auth .= 'nonce="' . $nonce . '", ';
574 $auth .= 'uri="' . $url . '", ';
575 $auth .= 'response="' . $response . '", ';
576 $auth .= 'algorithm="MD5", ';
577 $auth .= 'cnonce="' . $cnonce . '", ';
578 $auth .= 'nc=00000001, ';
579 $auth .= 'qop="auth"';
580
581 // SJM - treat Digest challenge as a redirect. set flag so we don't keep retrying.
582
583 $this->_tried_digest = true;
584
585 $this->rawheaders["Authorization"]=$auth . "\r\n";
586 $this->user = "";
587 $this->pass = "";
588
589 $this->_redirectaddr = $URI_PARTS['scheme'] . '://' . $this->host . $url;
590 }
591
592 // if a header begins with Location: or URI:, set the redirect
593 if(preg_match("/^(Location:|URI:)/i",$currentHeader))
594 {
595 // get URL portion of the redirect
596 preg_match("/^(Location:|URI:)\s+(.*)/",chop($currentHeader),$matches);
597 // look for :// in the Location header to see if hostname is included
598 if(!preg_match("|\:\/\/|",$matches[2]))
599 {
600 // no host in the path, so prepend
601 $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
602 // eliminate double slash
603 if(!preg_match("|^/|",$matches[2]))
604 $this->_redirectaddr .= "/".$matches[2];
605 else
606 $this->_redirectaddr .= $matches[2];
607 }
608 else
609 $this->_redirectaddr = $matches[2];
610 }
611
612 if(preg_match("|^HTTP/|",$currentHeader))
613 {
614 if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
615 {
616 $this->status= $status[1];
617 }
618 $this->response_code = $currentHeader;
619 }
620
621 if (preg_match("/Content-Encoding: gzip/", $currentHeader) ) {
622 $is_gzipped = true;
623 }
624
625 $this->headers[] = $currentHeader;
626 }
627
628 # $results = fread($fp, $this->maxlength);
629 $results = "";
630 while ( $data = fread($fp, $this->maxlength) ) {
631 $results .= $data;
632 if (
633 strlen($results) > $this->maxlength ) {
634 break;
635 }
636 }
637
638 // gunzip
639 if ( $is_gzipped ) {
640 // per http://www.php.net/manual/en/function.gzencode.php
641 $results = substr($results, 10);
642 $results = gzinflate($results);
643 }
644
645 if ($this->read_timeout > 0 && $this->_check_timeout($fp))
646 {
647 $this->status=-100;
648 return false;
649 }
650
651 // check if there is a a redirect meta tag
652
653 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
654 {
655 $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
656 }
657
658 // have we hit our frame depth and is there frame src to fetch?
659 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
660 {
661 $this->results[] = $results;
662 for($x=0; $x<count($match[1]); $x++)
663 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
664 }
665 // have we already fetched framed content?
666 elseif(is_array($this->results))
667 $this->results[] = $results;
668 // no framed content
669 else
670 $this->results = $results;
671
672 return true;
673 }
674
675 /*======================================================================*\
676 Function: _curlrequest
677 Purpose: go get the https data from the server using curl
678 Input: $url the url to fetch
679 $URI the full URI
680 $body body contents to send if any (POST)
681 Output:
682 \*======================================================================*/
683
684 function _curlrequest($url,$URI,$http_method,$content_type="",$body="")
685 {
686 if($this->passcookies && $this->_redirectaddr)
687 $this->setcookies();
688
689 $headers = array();
690
691 $URI_PARTS = parse_url($URI);
692 if(empty($url))
693 $url = "/";
694 // GET ... header not needed for curl
695 //$headers[] = $http_method." ".$url." ".$this->_httpversion;
696 if(!empty($this->agent))
697 $headers[] = "User-Agent: ".$this->agent;
698 if(!empty($this->host))
699 $headers[] = "Host: ".$this->host;
700 if(!empty($this->accept))
701 $headers[] = "Accept: ".$this->accept;
702 if(!empty($this->referer))
703 $headers[] = "Referer: ".$this->referer;
704 if(!empty($this->cookies))
705 {
706 if(!is_array($this->cookies))
707 $this->cookies = (array)$this->cookies;
708
709 reset($this->cookies);
710 if ( count($this->cookies) > 0 ) {
711 $cookie_str = 'Cookie: ';
712 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
713 $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
714 }
715 $headers[] = substr($cookie_str,0,-2);
716 }
717 }
718 if(!empty($this->rawheaders))
719 {
720 if(!is_array($this->rawheaders))
721 $this->rawheaders = (array)$this->rawheaders;
722 while(list($headerKey,$headerVal) = each($this->rawheaders))
723 $headers[] = $headerKey.": ".$headerVal;
724 }
725 if(!empty($content_type)) {
726 if ($content_type == "multipart/form-data")
727 $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
728 else
729 $headers[] = "Content-type: $content_type";
730 }
731 if(!empty($body))
732 $headers[] = "Content-length: ".strlen($body);
733 if(!empty($this->user) || !empty($this->pass))
734 $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
735
736 for($curr_header = 0; $curr_header < count($headers); $curr_header++)
737 $cmdline_params .= " -H \"".$headers[$curr_header]."\"";
738
739 if(!empty($body))
740 $cmdline_params .= " -d \"$body\"";
741
742 if($this->read_timeout > 0)
743 $cmdline_params .= " -m ".$this->read_timeout;
744
745 $headerfile = uniqid(time());
746
747 # accept self-signed certs
748
749 // mbi: removed, as it breaks on older cURL's
750 //$cmdline_params .= " -k";
751
752 exec($this->curl_path." -D \"/tmp/$headerfile\"".$cmdline_params." ".$URI,$results,$return);
753
754 if($return)
755 {
756 $this->error = "Error: cURL could not retrieve the document, error $return.";
757 return false;
758 }
759
760
761 $results = implode("\r\n",$results);
762
763 $result_headers = file("/tmp/$headerfile");
764
765 $this->_redirectaddr = false;
766 unset($this->headers);
767
768 for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
769 {
770
771 // if a header begins with Location: or URI:, set the redirect
772 if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
773 {
774 // get URL portion of the redirect
775 preg_match("/^(Location: |URI:)(.*)/",chop($result_headers[$currentHeader]),$matches);
776 // look for :// in the Location header to see if hostname is included
777 if(!preg_match("|\:\/\/|",$matches[2]))
778 {
779 // no host in the path, so prepend
780 $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
781 // eliminate double slash
782 if(!preg_match("|^/|",$matches[2]))
783 $this->_redirectaddr .= "/".$matches[2];
784 else
785 $this->_redirectaddr .= $matches[2];
786 }
787 else
788 $this->_redirectaddr = $matches[2];
789 }
790
791 if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
792 {
793 $this->response_code = $result_headers[$currentHeader];
794 if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$this->response_code, $match))
795 {
796 $this->status= $match[1];
797 }
798 }
799 $this->headers[] = $result_headers[$currentHeader];
800 }
801
802 // check if there is a a redirect meta tag
803
804 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
805 {
806 $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
807 }
808
809 // have we hit our frame depth and is there frame src to fetch?
810 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
811 {
812 $this->results[] = $results;
813 for($x=0; $x<count($match[1]); $x++)
814 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
815 }
816 // have we already fetched framed content?
817 elseif(is_array($this->results))
818 $this->results[] = $results;
819 // no framed content
820 else
821 $this->results = $results;
822
823 unlink("/tmp/$headerfile");
824
825 return true;
826 }
827
828 /*======================================================================*\
829 Function: setcookies()
830 Purpose: set cookies for a redirection
831 \*======================================================================*/
832
833 function setcookies()
834 {
835 for($x=0; $x<count($this->headers); $x++)
836 {
837 if(preg_match("/^set-cookie:[\s]+([^=]+)=([^;]+)/i", $this->headers[$x],$match))
838 $this->cookies[$match[1]] = $match[2];
839 }
840 }
841
842
843 /*======================================================================*\
844 Function: _check_timeout
845 Purpose: checks whether timeout has occurred
846 Input: $fp file pointer
847 \*======================================================================*/
848
849 function _check_timeout($fp)
850 {
851 if ($this->read_timeout > 0) {
852 $fp_status = socket_get_status($fp);
853 if ($fp_status["timed_out"]) {
854 $this->timed_out = true;
855 return true;
856 }
857 }
858 return false;
859 }
860
861 /*======================================================================*\
862 Function: _connect
863 Purpose: make a socket connection
864 Input: $fp file pointer
865 \*======================================================================*/
866
867 function _connect(&$fp)
868 {
869 if(!empty($this->proxy_host) && !empty($this->proxy_port))
870 {
871 $this->_isproxy = true;
872 $host = $this->proxy_host;
873 $port = $this->proxy_port;
874 }
875 else
876 {
877 $host = $this->host;
878 $port = $this->port;
879 }
880
881 $this->status = 0;
882
883 if($this->_scheme == "https")
884 {
885 $host = "ssl://" . $host;
886 }
887
888 if($fp = fsockopen(
889 $host,
890 $port,
891 $errno,
892 $errstr,
893 $this->_fp_timeout
894 ))
895 {
896 // socket connection succeeded
897
898 return true;
899 }
900 else
901 {
902 // socket connection failed
903 $this->status = $errno;
904 switch($errno)
905 {
906 case -3:
907 $this->error="socket creation failed (-3)";
908 case -4:
909 $this->error="dns lookup failure (-4)";
910 case -5:
911 $this->error="connection refused or timed out (-5)";
912 default:
913 $this->error="connection failed (".$errno.")";
914 }
915 return false;
916 }
917 }
918 /*======================================================================*\
919 Function: _disconnect
920 Purpose: disconnect a socket connection
921 Input: $fp file pointer
922 \*======================================================================*/
923
924 function _disconnect($fp)
925 {
926 return(fclose($fp));
927 }
928
929
930 /*======================================================================*\
931 Function: _prepare_post_body
932 Purpose: Prepare post body according to encoding type
933 Input: $formvars - form variables
934 $formfiles - form upload files
935 Output: post body
936 \*======================================================================*/
937
938 function _prepare_post_body($formvars, $formfiles)
939 {
940 settype($formvars, "array");
941 settype($formfiles, "array");
942
943 if (count($formvars) == 0 && count($formfiles) == 0)
944 return;
945
946 switch ($this->_submit_type) {
947 case "application/x-www-form-urlencoded":
948 reset($formvars);
949 while(list($key,$val) = each($formvars)) {
950 if (is_array($val) || is_object($val)) {
951 while (list($cur_key, $cur_val) = each($val)) {
952 $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
953 }
954 } else
955 $postdata .= urlencode($key)."=".urlencode($val)."&";
956 }
957 break;
958
959 case "multipart/form-data":
960 $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
961
962 reset($formvars);
963 while(list($key,$val) = each($formvars)) {
964 if (is_array($val) || is_object($val)) {
965 while (list($cur_key, $cur_val) = each($val)) {
966 $postdata .= "--".$this->_mime_boundary."\r\n";
967 $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
968 $postdata .= "$cur_val\r\n";
969 }
970 } else {
971 $postdata .= "--".$this->_mime_boundary."\r\n";
972 $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
973 $postdata .= "$val\r\n";
974 }
975 }
976
977 reset($formfiles);
978 while (list($field_name, $file_names) = each($formfiles)) {
979 settype($file_names, "array");
980 while (list(, $file_name) = each($file_names)) {
981 if (!is_readable($file_name)) continue;
982
983 $fp = fopen($file_name, "r");
984 $file_content = fread($fp, filesize($file_name));
985 fclose($fp);
986 $base_name = basename($file_name);
987
988 $postdata .= "--".$this->_mime_boundary."\r\n";
989 $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
990 $postdata .= "$file_content\r\n";
991 }
992 }
993 $postdata .= "--".$this->_mime_boundary."--\r\n";
994 break;
995 }
996
997 return $postdata;
998 }
999 }
1000
1001 ?>