]> git.wh0rd.org Git - tt-rss.git/blob - lib/magpierss/extlib/Snoopy.class.inc
update translations
[tt-rss.git] / lib / magpierss / extlib / Snoopy.class.inc
1 <?php
2
3 /*************************************************
4
5 Snoopy - the PHP net client
6 Author: Monte Ohrt <monte@ispi.net>
7 Copyright (c): 1999-2000 ispi, all rights reserved
8 Version: 1.0 (plus - see SJM comments below)
9
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23
24 You may contact the author of Snoopy by e-mail at:
25 monte@ispi.net
26
27 Or, write to:
28 Monte Ohrt
29 CTO, ispi
30 237 S. 70th suite 220
31 Lincoln, NE 68510
32
33 The latest version of Snoopy can be obtained from:
34 http://snoopy.sourceforge.com
35
36
37
38 SJM - alpha-grade changes based on the version of Snoopy released with MagpieRSS 0.7
39
40 comments to steve@minutillo.com
41
42 Two additions:
43
44 1) If this is PHP 4.3 or greater, and 'openssl' is available,
45    use the PHP built in SSL support for "https" instead of calling curl externally.
46    Use of external curl can still be forced by setting $use_curl = true.
47    
48    ref:  http://us2.php.net/fsockopen
49    
50 2) HTTP Digest Authentication.  If you set a username and password, basic auth
51    will be tried first.  If that fails, and the server sends back an
52    WWW-Authenticate: Digest header, the request will be retried with the appropriate
53    digest response.  Only qop=auth is supported, with MD5 as the algorithm.
54    I realize that sending basic auth first, and then following up with a digest
55    challenge-response kind of defeats the purpose in terms of security.
56    
57    ref:  http://www.faqs.org/rfcs/rfc2617.html
58
59 *************************************************/
60
61 class Snoopy
62 {
63         /**** Public variables ****/
64         
65         /* user definable vars */
66
67         var $host                       =       "www.php.net";          // host name we are connecting to
68         var $port                       =       80;                                     // port we are connecting to
69         var $proxy_host         =       "";                                     // proxy host to use
70         var $proxy_port         =       "";                                     // proxy port to use
71         var $agent                      =       "Snoopy v1.0";          // agent we masquerade as
72         var     $referer                =       "";                                     // referer info to pass
73         var $cookies            =       array();                        // array of cookies to pass
74                                                                                                 // $cookies["username"]="joe";
75         var     $rawheaders             =       array();                        // array of raw headers to send
76                                                                                                 // $rawheaders["Content-type"]="text/html";
77
78         var $maxredirs          =       5;                                      // http redirection depth maximum. 0 = disallow
79         var $lastredirectaddr   =       "";                             // contains address of last redirected address
80         var     $offsiteok              =       true;                           // allows redirection off-site
81         var $maxframes          =       0;                                      // frame content depth maximum. 0 = disallow
82         var $expandlinks        =       true;                           // expand links to fully qualified URLs.
83                                                                                                 // this only applies to fetchlinks()
84                                                                                                 // or submitlinks()
85         var $passcookies        =       true;                           // pass set cookies back through redirects
86                                                                                                 // NOTE: this currently does not respect
87                                                                                                 // dates, domains or paths.
88         
89         var     $user                   =       "";                                     // user for http authentication
90         var     $pass                   =       "";                                     // password for http authentication
91         
92         // http accept types
93         var $accept                     =       "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
94         
95         var $results            =       "";                                     // where the content is put
96                 
97         var $error                      =       "";                                     // error messages sent here
98         var     $response_code  =       "";                                     // response code returned from server
99         var     $headers                =       array();                        // headers returned from server sent here
100         var     $maxlength              =       500000;                         // max return data length (body)
101         var $read_timeout       =       0;                                      // timeout on read operations, in seconds
102                                                                                                 // supported only since PHP 4 Beta 4
103                                                                                                 // set to 0 to disallow timeouts
104         var $timed_out          =       false;                          // if a read operation timed out
105         var     $status                 =       0;                                      // http request status
106         
107         var     $curl_path              =       "/usr/bin/curl";
108                                                                                                 // Snoopy will use cURL for fetching
109                                                                                                 // SSL content if a full system path to
110                                                                                                 // the cURL binary is supplied here.
111                                                                                                 // set to false if you do not have
112                                                                                                 // cURL installed. See http://curl.haxx.se
113                                                                                                 // for details on installing cURL.
114                                                                                                 // Snoopy does *not* use the cURL
115                                                                                                 // library functions built into php,
116                                                                                                 // as these functions are not stable
117                                                                                                 // as of this Snoopy release.
118         
119         // SJM - always use curl for HTTPS requests?
120         var $use_curl           = false;        
121         
122
123         // send Accept-encoding: gzip?
124         var $use_gzip           = true; 
125         
126         /**** Private variables ****/   
127         
128         var     $_maxlinelen    =       4096;                           // max line length (headers)
129         
130         var $_scheme    =       "http";                         // default scheme
131         var $_httpmethod        =       "GET";                          // default http request method
132         var $_httpversion       =       "HTTP/1.0";                     // default http request version
133         var $_submit_method     =       "POST";                         // default submit method
134         var $_submit_type       =       "application/x-www-form-urlencoded";    // default submit type
135         var $_mime_boundary     =   "";                                 // MIME boundary for multipart/form-data submit type
136         var $_redirectaddr      =       false;                          // will be set if page fetched is a redirect
137         var $_redirectdepth     =       0;                                      // increments on an http redirect
138         var $_trieddigest       =       false;                                  // have we tried Digest auth yet?
139         var $_frameurls         =       array();                        // frame src urls
140         var $_framedepth        =       0;                                      // increments on frame depth
141         
142         var $_isproxy           =       false;                          // set if using a proxy server
143         var $_fp_timeout        =       30;                                     // timeout for socket connection
144
145 /*======================================================================*\
146         Function:       fetch
147         Purpose:        fetch the contents of a web page
148                                 (and possibly other protocols in the
149                                 future like ftp, nntp, gopher, etc.)
150         Input:          $URI    the location of the page to fetch
151         Output:         $this->results  the output text from the fetch
152 \*======================================================================*/
153
154         function fetch($URI)
155         {
156         
157                 //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
158                 $URI_PARTS = parse_url($URI);
159                 if (!empty($URI_PARTS["user"]))
160                   $this->user = urldecode($URI_PARTS["user"]);
161                 if (!empty($URI_PARTS["pass"]))
162                   $this->pass = urldecode($URI_PARTS["pass"]);
163                 
164                 $this->_scheme = $URI_PARTS["scheme"];
165
166                 switch($URI_PARTS["scheme"])
167                 {
168                         case "http":
169                         case "https":
170                                 break;
171                                 
172                         default:
173                                 // not a valid protocol
174                                 $this->error    =       'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
175                                 return false;
176                 }
177                 
178                 if($URI_PARTS["scheme"] == "https")
179                 {
180                         // SJM - if they really want curl, or it isn't PHP 4.3 yet, or openssl extension isn't loaded
181                         
182                         if($use_curl || !function_exists('file_get_contents') || !extension_loaded('openssl'))
183                         {
184                                 if(!$this->curl_path || (!is_executable($this->curl_path))) {
185                                         $this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n";
186                                         return false;
187                                 }
188                                 $this->host = $URI_PARTS["host"];
189                                 if(!empty($URI_PARTS["port"]))
190                                         $this->port = $URI_PARTS["port"];
191                                 if($this->_isproxy)
192                                 {
193                                         // using proxy, send entire URI
194                                         $this->_curlrequest($URI,$URI,$this->_httpmethod);
195                                 }
196                                 else
197                                 {
198                                         $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
199                                         // no proxy, send only the path
200                                         $this->_curlrequest($path, $URI, $this->_httpmethod);
201                                 }
202
203                                 if($this->_redirectaddr)
204                                 {
205                                         /* url was redirected, check if we've hit the max depth */
206                                         if($this->maxredirs > $this->_redirectdepth)
207                                         {
208                                                 // only follow redirect if it's on this site, or offsiteok is true
209                                                 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
210                                                 {
211                                                         /* follow the redirect */
212                                                         $this->_redirectdepth++;
213                                                         $this->lastredirectaddr=$this->_redirectaddr;
214                                                         $this->fetch($this->_redirectaddr);
215                                                 }
216                                         }
217                                 }
218
219                                 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
220                                 {
221                                         $frameurls = $this->_frameurls;
222                                         $this->_frameurls = array();
223
224                                         while(list(,$frameurl) = each($frameurls))
225                                         {
226                                                 if($this->_framedepth < $this->maxframes)
227                                                 {
228                                                         $this->fetch($frameurl);
229                                                         $this->_framedepth++;
230                                                 }
231                                                 else
232                                                         break;
233                                         }
234                                 }                                       
235                                 return true;                                    
236                         }
237                 }
238
239                 // SJM - else drop through and treat https as http
240                 
241                 $this->host = $URI_PARTS["host"];
242                 if(!empty($URI_PARTS["port"]))
243                         $this->port = $URI_PARTS["port"];
244                 
245                 // SJM - if it's https, default the port to 443
246                 if($URI_PARTS["scheme"] == "https")
247                 {
248                         if(empty($URI_PARTS["port"]))
249                         {
250                                 $this->port = 443;
251                         }
252                 }
253                 
254                 if($this->_connect($fp))
255                 {
256                         if($this->_isproxy)
257                         {
258                                 // using proxy, send entire URI
259                                 $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
260                         }
261                         else
262                         {
263                                 $path = $URI_PARTS["path"].(isset($URI_PARTS["query"]) ? "?".$URI_PARTS["query"] : "");
264                                 // no proxy, send only the path
265                                 $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
266                         }
267
268                         $this->_disconnect($fp);
269
270                         if($this->_redirectaddr)
271                         {
272                                 /* url was redirected, check if we've hit the max depth */
273                                 if($this->maxredirs > $this->_redirectdepth)
274                                 {
275                                         // only follow redirect if it's on this site, or offsiteok is true
276                                         if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
277                                         {
278                                                 /* follow the redirect */
279                                                 $this->_redirectdepth++;
280                                                 $this->lastredirectaddr=$this->_redirectaddr;
281                                                 $this->fetch($this->_redirectaddr);
282                                         }
283                                 }
284                         }
285
286                         if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
287                         {
288                                 $frameurls = $this->_frameurls;
289                                 $this->_frameurls = array();
290
291                                 while(list(,$frameurl) = each($frameurls))
292                                 {
293                                         if($this->_framedepth < $this->maxframes)
294                                         {
295                                                 $this->fetch($frameurl);
296                                                 $this->_framedepth++;
297                                         }
298                                         else
299                                                 break;
300                                 }
301                         }                                       
302                 }
303                 else
304                 {
305                         return false;
306                 }
307                 return true;                                    
308         }
309
310
311
312 /*======================================================================*\
313         Private functions
314 \*======================================================================*/
315         
316         
317 /*======================================================================*\
318         Function:       _striplinks
319         Purpose:        strip the hyperlinks from an html document
320         Input:          $document       document to strip.
321         Output:         $match          an array of the links
322 \*======================================================================*/
323
324         function _striplinks($document)
325         {       
326                 preg_match_all("'<\s*a\s+.*href\s*=\s*                  # find <a href=
327                                                 ([\"\'])?                                       # find single or double quote
328                                                 (?(1) (.*?)\\1 | ([^\s\>]+))            # if quote found, match up to next matching
329                                                                                                         # quote, otherwise match up to next space
330                                                 'isx",$document,$links);
331                                                 
332
333                 // catenate the non-empty matches from the conditional subpattern
334
335                 while(list($key,$val) = each($links[2]))
336                 {
337                         if(!empty($val))
338                                 $match[] = $val;
339                 }                               
340                 
341                 while(list($key,$val) = each($links[3]))
342                 {
343                         if(!empty($val))
344                                 $match[] = $val;
345                 }               
346                 
347                 // return the links
348                 return $match;
349         }
350
351 /*======================================================================*\
352         Function:       _stripform
353         Purpose:        strip the form elements from an html document
354         Input:          $document       document to strip.
355         Output:         $match          an array of the links
356 \*======================================================================*/
357
358         function _stripform($document)
359         {       
360                 preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
361                 
362                 // catenate the matches
363                 $match = implode("\r\n",$elements[0]);
364                                 
365                 // return the links
366                 return $match;
367         }
368
369         
370         
371 /*======================================================================*\
372         Function:       _striptext
373         Purpose:        strip the text from an html document
374         Input:          $document       document to strip.
375         Output:         $text           the resulting text
376 \*======================================================================*/
377
378         function _striptext($document)
379         {
380                 
381                 // I didn't use preg eval (//e) since that is only available in PHP 4.0.
382                 // so, list your entities one by one here. I included some of the
383                 // more common ones.
384                                                                 
385                 $search = array("'<script[^>]*?>.*?</script>'si",       // strip out javascript
386                                                 "'<[\/\!]*?[^<>]*?>'si",                        // strip out html tags
387                                                 "'([\r\n])[\s]+'",                                      // strip out white space
388                                                 "'&(quote|#34);'i",                                     // replace html entities
389                                                 "'&(amp|#38);'i",
390                                                 "'&(lt|#60);'i",
391                                                 "'&(gt|#62);'i",
392                                                 "'&(nbsp|#160);'i",
393                                                 "'&(iexcl|#161);'i",
394                                                 "'&(cent|#162);'i",
395                                                 "'&(pound|#163);'i",
396                                                 "'&(copy|#169);'i"
397                                                 );                              
398                 $replace = array(       "",
399                                                         "",
400                                                         "\\1",
401                                                         "\"",
402                                                         "&",
403                                                         "<",
404                                                         ">",
405                                                         " ",
406                                                         chr(161),
407                                                         chr(162),
408                                                         chr(163),
409                                                         chr(169));
410                                         
411                 $text = preg_replace($search,$replace,$document);
412                                                                 
413                 return $text;
414         }
415
416 /*======================================================================*\
417         Function:       _expandlinks
418         Purpose:        expand each link into a fully qualified URL
419         Input:          $links                  the links to qualify
420                                 $URI                    the full URI to get the base from
421         Output:         $expandedLinks  the expanded links
422 \*======================================================================*/
423
424         function _expandlinks($links,$URI)
425         {
426                 
427                 preg_match("/^[^\?]+/",$URI,$match);
428
429                 $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
430                                 
431                 $search = array(        "|^http://".preg_quote($this->host)."|i",
432                                                         "|^(?!http://)(\/)?(?!mailto:)|i",
433                                                         "|/\./|",
434                                                         "|/[^\/]+/\.\./|"
435                                                 );
436                                                 
437                 $replace = array(       "",
438                                                         $match."/",
439                                                         "/",
440                                                         "/"
441                                                 );                      
442                                 
443                 $expandedLinks = preg_replace($search,$replace,$links);
444
445                 return $expandedLinks;
446         }
447
448 /*======================================================================*\
449         Function:       _httprequest
450         Purpose:        go get the http data from the server
451         Input:          $url            the url to fetch
452                                 $fp                     the current open file pointer
453                                 $URI            the full URI
454                                 $body           body contents to send if any (POST)
455         Output:         
456 \*======================================================================*/
457         
458         function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
459         {
460                 if($this->passcookies && $this->_redirectaddr)
461                         $this->setcookies();
462
463                 $URI_PARTS = parse_url($URI);
464                 if(empty($url))
465                         $url = "/";
466                 $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";                
467                 if(!empty($this->agent))
468                         $headers .= "User-Agent: ".$this->agent."\r\n";
469                 if(!empty($this->host) && !isset($this->rawheaders['Host']))
470                         $headers .= "Host: ".$this->host."\r\n";
471                 if(!empty($this->accept))
472                         $headers .= "Accept: ".$this->accept."\r\n";
473                 
474                 if($this->use_gzip) {
475                         // make sure PHP was built with --with-zlib
476                         // and we can handle gzipp'ed data
477                         if ( function_exists(gzinflate) ) {
478                            $headers .= "Accept-encoding: gzip\r\n";
479                         }
480                         else {
481                            trigger_error(
482                                 "use_gzip is on, but PHP was built without zlib support.".
483                                 "  Requesting file(s) without gzip encoding.", 
484                                 E_USER_NOTICE);
485                         }
486                 }
487                 
488                 if(!empty($this->referer))
489                         $headers .= "Referer: ".$this->referer."\r\n";
490                 if(!empty($this->cookies))
491                 {                       
492                         if(!is_array($this->cookies))
493                                 $this->cookies = (array)$this->cookies;
494         
495                         reset($this->cookies);
496                         if ( count($this->cookies) > 0 ) {
497                                 $cookie_headers .= 'Cookie: ';
498                                 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
499                                 $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
500                                 }
501                                 $headers .= substr($cookie_headers,0,-2) . "\r\n";
502                         } 
503                 }
504                 if(!empty($this->rawheaders))
505                 {
506                         if(!is_array($this->rawheaders))
507                                 $this->rawheaders = (array)$this->rawheaders;
508                         while(list($headerKey,$headerVal) = each($this->rawheaders))
509                                 $headers .= $headerKey.": ".$headerVal;
510                 }
511                 if(!empty($content_type)) {
512                         $headers .= "Content-type: $content_type";
513                         if ($content_type == "multipart/form-data")
514                                 $headers .= "; boundary=".$this->_mime_boundary;
515                         $headers .= "\r\n";
516                 }
517                 if(!empty($body))       
518                         $headers .= "Content-length: ".strlen($body)."\r\n";
519                 if(!empty($this->user) || !empty($this->pass))  
520                         $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
521
522                 $headers .= "\r\n";
523                 
524                 // set the read timeout if needed
525                 if ($this->read_timeout > 0)
526                         socket_set_timeout($fp, $this->read_timeout);
527                 $this->timed_out = false;
528                 
529                 fwrite($fp,$headers.$body,strlen($headers.$body));
530                 
531                 $this->_redirectaddr = false;
532                 unset($this->headers);
533                 
534                 // content was returned gzip encoded?
535                 $is_gzipped = false;
536                                                 
537                 while($currentHeader = fgets($fp,$this->_maxlinelen))
538                 {
539                         if ($this->read_timeout > 0 && $this->_check_timeout($fp))
540                         {
541                                 $this->status=-100;
542                                 return false;
543                         }
544                                 
545                 //      if($currentHeader == "\r\n")
546                         if(preg_match("/^\r?\n$/", $currentHeader) )
547                               break;
548
549                         if(!$this->_tried_digest && preg_match("/^WWW-Authenticate: Digest (.*)/", $currentHeader, $matches))
550                         {
551                                 // SJM - we got a Digest challenge.  Try to respond...
552                                 
553                                 $digestheader = $matches[1];
554                                 
555                                 preg_match("/nonce=\"(.*?)\"/", $digestheader, $matches);
556                                 $nonce = $matches[1];
557
558                                 preg_match("/realm=\"(.*?)\"/", $digestheader, $matches);
559                                 $realm = $matches[1];
560
561                                 $cnonce = md5(microtime());
562
563                                 $a1 = $this->user . ":" . $realm . ":" . $this->pass;
564                                 $a2 = $http_method . ":" . $url;
565
566                                 $ha1 = md5($a1);
567                                 $ha2 = md5($a2);
568
569                                 $response = md5($ha1 . ":" . $nonce . ":00000001:" . $cnonce . ":auth:" . $ha2);
570
571                                 $auth  = 'Digest username="' . $this->user . '", ';
572                                 $auth .= 'realm="' . $realm . '", ';
573                                 $auth .= 'nonce="' . $nonce . '", ';
574                                 $auth .= 'uri="' . $url . '", ';
575                                 $auth .= 'response="' . $response . '", ';
576                                 $auth .= 'algorithm="MD5", ';
577                                 $auth .= 'cnonce="' . $cnonce . '", ';
578                                 $auth .= 'nc=00000001, ';
579                                 $auth .= 'qop="auth"';
580
581                                 // SJM - treat Digest challenge as a redirect.  set flag so we don't keep retrying.
582                                 
583                                 $this->_tried_digest = true;
584                                 
585                                 $this->rawheaders["Authorization"]=$auth . "\r\n";
586                                 $this->user = "";
587                                 $this->pass = "";
588                                 
589                                 $this->_redirectaddr = $URI_PARTS['scheme'] . '://' . $this->host . $url;
590                         }
591
592                         // if a header begins with Location: or URI:, set the redirect
593                         if(preg_match("/^(Location:|URI:)/i",$currentHeader))
594                         {
595                                 // get URL portion of the redirect
596                                 preg_match("/^(Location:|URI:)\s+(.*)/",chop($currentHeader),$matches);
597                                 // look for :// in the Location header to see if hostname is included
598                                 if(!preg_match("|\:\/\/|",$matches[2]))
599                                 {
600                                         // no host in the path, so prepend
601                                         $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
602                                         // eliminate double slash
603                                         if(!preg_match("|^/|",$matches[2]))
604                                                         $this->_redirectaddr .= "/".$matches[2];
605                                         else
606                                                         $this->_redirectaddr .= $matches[2];
607                                 }
608                                 else
609                                         $this->_redirectaddr = $matches[2];
610                         }
611                 
612                         if(preg_match("|^HTTP/|",$currentHeader))
613                         {
614                 if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
615                                 {
616                                         $this->status= $status[1];
617                 }                               
618                                 $this->response_code = $currentHeader;
619                         }
620                         
621                         if (preg_match("/Content-Encoding: gzip/", $currentHeader) ) {
622                                 $is_gzipped = true;
623                         }
624                         
625                         $this->headers[] = $currentHeader;
626                 }
627
628                 # $results = fread($fp, $this->maxlength);
629                 $results = "";
630                 while ( $data = fread($fp, $this->maxlength) ) {
631                     $results .= $data;
632                     if (
633                         strlen($results) > $this->maxlength ) {
634                         break;
635                     }
636                 }
637                 
638                 // gunzip
639                 if ( $is_gzipped ) {
640                         // per http://www.php.net/manual/en/function.gzencode.php
641                         $results = substr($results, 10);
642                         $results = gzinflate($results);
643                 }
644                 
645                 if ($this->read_timeout > 0 && $this->_check_timeout($fp))
646                 {
647                         $this->status=-100;
648                         return false;
649                 }
650                 
651                 // check if there is a a redirect meta tag
652                 
653                 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
654                 {
655                         $this->_redirectaddr = $this->_expandlinks($match[1],$URI);     
656                 }
657
658                 // have we hit our frame depth and is there frame src to fetch?
659                 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
660                 {
661                         $this->results[] = $results;
662                         for($x=0; $x<count($match[1]); $x++)
663                                 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
664                 }
665                 // have we already fetched framed content?
666                 elseif(is_array($this->results))
667                         $this->results[] = $results;
668                 // no framed content
669                 else
670                         $this->results = $results;
671                 
672                 return true;
673         }
674
675 /*======================================================================*\
676         Function:       _curlrequest
677         Purpose:        go get the https data from the server using curl
678         Input:          $url            the url to fetch
679                                 $URI            the full URI
680                                 $body           body contents to send if any (POST)
681         Output:         
682 \*======================================================================*/
683         
684         function _curlrequest($url,$URI,$http_method,$content_type="",$body="")
685         {
686                 if($this->passcookies && $this->_redirectaddr)
687                         $this->setcookies();
688
689                 $headers = array();             
690                                         
691                 $URI_PARTS = parse_url($URI);
692                 if(empty($url))
693                         $url = "/";
694                 // GET ... header not needed for curl
695                 //$headers[] = $http_method." ".$url." ".$this->_httpversion;           
696                 if(!empty($this->agent))
697                         $headers[] = "User-Agent: ".$this->agent;
698                 if(!empty($this->host))
699                         $headers[] = "Host: ".$this->host;
700                 if(!empty($this->accept))
701                         $headers[] = "Accept: ".$this->accept;
702                 if(!empty($this->referer))
703                         $headers[] = "Referer: ".$this->referer;
704                 if(!empty($this->cookies))
705                 {                       
706                         if(!is_array($this->cookies))
707                                 $this->cookies = (array)$this->cookies;
708         
709                         reset($this->cookies);
710                         if ( count($this->cookies) > 0 ) {
711                                 $cookie_str = 'Cookie: ';
712                                 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
713                                 $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
714                                 }
715                                 $headers[] = substr($cookie_str,0,-2);
716                         }
717                 }
718                 if(!empty($this->rawheaders))
719                 {
720                         if(!is_array($this->rawheaders))
721                                 $this->rawheaders = (array)$this->rawheaders;
722                         while(list($headerKey,$headerVal) = each($this->rawheaders))
723                                 $headers[] = $headerKey.": ".$headerVal;
724                 }
725                 if(!empty($content_type)) {
726                         if ($content_type == "multipart/form-data")
727                                 $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
728                         else
729                                 $headers[] = "Content-type: $content_type";
730                 }
731                 if(!empty($body))       
732                         $headers[] = "Content-length: ".strlen($body);
733                 if(!empty($this->user) || !empty($this->pass))  
734                         $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
735                         
736                 for($curr_header = 0; $curr_header < count($headers); $curr_header++)
737                         $cmdline_params .= " -H \"".$headers[$curr_header]."\"";
738                 
739                 if(!empty($body))
740                         $cmdline_params .= " -d \"$body\"";
741                 
742                 if($this->read_timeout > 0)
743                         $cmdline_params .= " -m ".$this->read_timeout;
744                 
745                 $headerfile = uniqid(time());
746                 
747                 # accept self-signed certs
748                 
749                 // mbi: removed, as it breaks on older cURL's
750                 //$cmdline_params .= " -k";
751                 
752                 exec($this->curl_path." -D \"/tmp/$headerfile\"".$cmdline_params." ".$URI,$results,$return);
753                 
754                 if($return)
755                 {
756                         $this->error = "Error: cURL could not retrieve the document, error $return.";
757                         return false;
758                 }
759                         
760                         
761                 $results = implode("\r\n",$results);
762                 
763                 $result_headers = file("/tmp/$headerfile");
764                                                 
765                 $this->_redirectaddr = false;
766                 unset($this->headers);
767                                                 
768                 for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
769                 {
770                         
771                         // if a header begins with Location: or URI:, set the redirect
772                         if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
773                         {
774                                 // get URL portion of the redirect
775                                 preg_match("/^(Location: |URI:)(.*)/",chop($result_headers[$currentHeader]),$matches);
776                                 // look for :// in the Location header to see if hostname is included
777                                 if(!preg_match("|\:\/\/|",$matches[2]))
778                                 {
779                                         // no host in the path, so prepend
780                                         $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
781                                         // eliminate double slash
782                                         if(!preg_match("|^/|",$matches[2]))
783                                                         $this->_redirectaddr .= "/".$matches[2];
784                                         else
785                                                         $this->_redirectaddr .= $matches[2];
786                                 }
787                                 else
788                                         $this->_redirectaddr = $matches[2];
789                         }
790                 
791                         if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
792                         {
793                             $this->response_code = $result_headers[$currentHeader];
794                             if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$this->response_code, $match))
795                             {
796                                 $this->status= $match[1];
797                             }
798                         }
799                         $this->headers[] = $result_headers[$currentHeader];
800                 }
801
802                 // check if there is a a redirect meta tag
803                 
804                 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
805                 {
806                         $this->_redirectaddr = $this->_expandlinks($match[1],$URI);     
807                 }
808
809                 // have we hit our frame depth and is there frame src to fetch?
810                 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
811                 {
812                         $this->results[] = $results;
813                         for($x=0; $x<count($match[1]); $x++)
814                                 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
815                 }
816                 // have we already fetched framed content?
817                 elseif(is_array($this->results))
818                         $this->results[] = $results;
819                 // no framed content
820                 else
821                         $this->results = $results;
822
823                 unlink("/tmp/$headerfile");
824                 
825                 return true;
826         }
827
828 /*======================================================================*\
829         Function:       setcookies()
830         Purpose:        set cookies for a redirection
831 \*======================================================================*/
832         
833         function setcookies()
834         {
835                 for($x=0; $x<count($this->headers); $x++)
836                 {
837                 if(preg_match("/^set-cookie:[\s]+([^=]+)=([^;]+)/i", $this->headers[$x],$match))
838                         $this->cookies[$match[1]] = $match[2];
839                 }
840         }
841
842         
843 /*======================================================================*\
844         Function:       _check_timeout
845         Purpose:        checks whether timeout has occurred
846         Input:          $fp     file pointer
847 \*======================================================================*/
848
849         function _check_timeout($fp)
850         {
851                 if ($this->read_timeout > 0) {
852                         $fp_status = socket_get_status($fp);
853                         if ($fp_status["timed_out"]) {
854                                 $this->timed_out = true;
855                                 return true;
856                         }
857                 }
858                 return false;
859         }
860
861 /*======================================================================*\
862         Function:       _connect
863         Purpose:        make a socket connection
864         Input:          $fp     file pointer
865 \*======================================================================*/
866         
867         function _connect(&$fp)
868         {
869                 if(!empty($this->proxy_host) && !empty($this->proxy_port))
870                         {
871                                 $this->_isproxy = true;
872                                 $host = $this->proxy_host;
873                                 $port = $this->proxy_port;
874                         }
875                 else
876                 {
877                         $host = $this->host;
878                         $port = $this->port;
879                 }
880         
881                 $this->status = 0;
882                 
883                 if($this->_scheme == "https")
884                 {
885                         $host = "ssl://" . $host;
886                 }
887                 
888                 if($fp = fsockopen(
889                                         $host,
890                                         $port,
891                                         $errno,
892                                         $errstr,
893                                         $this->_fp_timeout
894                                         ))
895                 {
896                         // socket connection succeeded
897
898                         return true;
899                 }
900                 else
901                 {
902                         // socket connection failed
903                         $this->status = $errno;
904                         switch($errno)
905                         {
906                                 case -3:
907                                         $this->error="socket creation failed (-3)";
908                                 case -4:
909                                         $this->error="dns lookup failure (-4)";
910                                 case -5:
911                                         $this->error="connection refused or timed out (-5)";
912                                 default:
913                                         $this->error="connection failed (".$errno.")";
914                         }
915                         return false;
916                 }
917         }
918 /*======================================================================*\
919         Function:       _disconnect
920         Purpose:        disconnect a socket connection
921         Input:          $fp     file pointer
922 \*======================================================================*/
923         
924         function _disconnect($fp)
925         {
926                 return(fclose($fp));
927         }
928
929         
930 /*======================================================================*\
931         Function:       _prepare_post_body
932         Purpose:        Prepare post body according to encoding type
933         Input:          $formvars  - form variables
934                                 $formfiles - form upload files
935         Output:         post body
936 \*======================================================================*/
937         
938         function _prepare_post_body($formvars, $formfiles)
939         {
940                 settype($formvars, "array");
941                 settype($formfiles, "array");
942
943                 if (count($formvars) == 0 && count($formfiles) == 0)
944                         return;
945                 
946                 switch ($this->_submit_type) {
947                         case "application/x-www-form-urlencoded":
948                                 reset($formvars);
949                                 while(list($key,$val) = each($formvars)) {
950                                         if (is_array($val) || is_object($val)) {
951                                                 while (list($cur_key, $cur_val) = each($val)) {
952                                                         $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
953                                                 }
954                                         } else
955                                                 $postdata .= urlencode($key)."=".urlencode($val)."&";
956                                 }
957                                 break;
958
959                         case "multipart/form-data":
960                                 $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
961                                 
962                                 reset($formvars);
963                                 while(list($key,$val) = each($formvars)) {
964                                         if (is_array($val) || is_object($val)) {
965                                                 while (list($cur_key, $cur_val) = each($val)) {
966                                                         $postdata .= "--".$this->_mime_boundary."\r\n";
967                                                         $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
968                                                         $postdata .= "$cur_val\r\n";
969                                                 }
970                                         } else {
971                                                 $postdata .= "--".$this->_mime_boundary."\r\n";
972                                                 $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
973                                                 $postdata .= "$val\r\n";
974                                         }
975                                 }
976                                 
977                                 reset($formfiles);
978                                 while (list($field_name, $file_names) = each($formfiles)) {
979                                         settype($file_names, "array");
980                                         while (list(, $file_name) = each($file_names)) {
981                                                 if (!is_readable($file_name)) continue;
982
983                                                 $fp = fopen($file_name, "r");
984                                                 $file_content = fread($fp, filesize($file_name));
985                                                 fclose($fp);
986                                                 $base_name = basename($file_name);
987
988                                                 $postdata .= "--".$this->_mime_boundary."\r\n";
989                                                 $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
990                                                 $postdata .= "$file_content\r\n";
991                                         }
992                                 }
993                                 $postdata .= "--".$this->_mime_boundary."--\r\n";
994                                 break;
995                 }
996
997                 return $postdata;
998         }
999 }
1000
1001 ?>