20 public $scheme =
'http';
21 public $host =
'www.php.net';
23 public $proxy_host =
'';
24 public $proxy_port =
'';
25 public $proxy_user =
'';
26 public $proxy_pass =
'';
28 public $agent =
'Snoopy v2.0.1';
32 public $rawheaders = [];
35 public $maxredirs = 5;
36 public $lastredirectaddr =
'';
37 public $offsiteok =
true;
38 public $maxframes = 0;
39 public $expandlinks =
true;
42 public $passcookies =
true;
50 public $accept =
'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*';
55 public $response_code =
'';
57 public $maxlength = 500000;
58 public $read_timeout = 0;
61 public $timed_out =
false;
64 public $temp_dir =
'/tmp';
68 public $curl_path =
false;
73 public $use_gzip =
true;
81 public $_maxlinelen = 4096;
83 public $_httpmethod =
'GET';
84 public $_httpversion =
'HTTP/1.0';
85 public $_submit_method =
'POST';
86 public $_submit_type =
'application/x-www-form-urlencoded';
87 public $_mime_boundary =
'';
88 public $_redirectaddr =
false;
89 public $_redirectdepth = 0;
90 public $_frameurls = [];
91 public $_framedepth = 0;
93 public $_isproxy =
false;
94 public $_fp_timeout = 30;
105 public function fetch($URI)
107 $URI_PARTS = parse_url($URI);
108 if (!empty($URI_PARTS[
'user'])) {
109 $this->
user = $URI_PARTS[
'user'];
111 if (!empty($URI_PARTS[
'pass'])) {
112 $this->pass = $URI_PARTS[
'pass'];
114 if (empty($URI_PARTS[
'query'])) {
115 $URI_PARTS[
'query'] =
'';
117 if (empty($URI_PARTS[
'path'])) {
118 $URI_PARTS[
'path'] =
'';
123 switch (strtolower($URI_PARTS[
'scheme'])) {
125 if (!extension_loaded(
'openssl')) {
126 trigger_error(
'openssl extension required for HTTPS', E_USER_ERROR);
132 $this->scheme = strtolower($URI_PARTS[
'scheme']);
133 $this->host = $URI_PARTS[
'host'];
134 if (!empty($URI_PARTS[
'port'])) {
135 $this->port = $URI_PARTS[
'port'];
138 if ($this->_isproxy) {
140 $this->
_httprequest($URI, $fp, $URI, $this->_httpmethod);
142 $path = $URI_PARTS[
'path'] . ($URI_PARTS[
'query'] ?
'?' . $URI_PARTS[
'query'] :
'');
144 $this->
_httprequest($path, $fp, $URI, $this->_httpmethod);
147 $this->_disconnect($fp);
150 if ($this->_redirectaddr && $this->maxredirs > $this->_redirectdepth) {
152 if (preg_match(
'|^https?://' . preg_quote($this->host) .
'|i', $this->_redirectaddr) || $this->offsiteok) {
154 $this->_redirectdepth++;
155 $this->lastredirectaddr = $this->_redirectaddr;
156 $this->fetch($this->_redirectaddr);
160 if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) {
161 $frameurls = $this->_frameurls;
162 $this->_frameurls = [];
165 foreach ($frameurls as $frameurl) {
167 if ($this->_framedepth < $this->maxframes) {
168 $this->fetch($frameurl);
169 $this->_framedepth++;
182 $this->error =
'Invalid protocol "' . $URI_PARTS[
'scheme'] .
'"\n';
200 public function submit($URI, $formvars =
'', $formfiles =
'')
206 $URI_PARTS = parse_url($URI);
207 if (!empty($URI_PARTS[
'user'])) {
208 $this->
user = $URI_PARTS[
'user'];
210 if (!empty($URI_PARTS[
'pass'])) {
211 $this->pass = $URI_PARTS[
'pass'];
213 if (empty($URI_PARTS[
'query'])) {
214 $URI_PARTS[
'query'] =
'';
216 if (empty($URI_PARTS[
'path'])) {
217 $URI_PARTS[
'path'] =
'';
220 switch (strtolower($URI_PARTS[
'scheme'])) {
222 if (!extension_loaded(
'openssl')) {
223 trigger_error(
'openssl extension required for HTTPS', E_USER_ERROR);
228 $this->scheme = strtolower($URI_PARTS[
'scheme']);
229 $this->host = $URI_PARTS[
'host'];
230 if (!empty($URI_PARTS[
'port'])) {
231 $this->port = $URI_PARTS[
'port'];
234 if ($this->_isproxy) {
236 $this->
_httprequest($URI, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
238 $path = $URI_PARTS[
'path'] . ($URI_PARTS[
'query'] ?
'?' . $URI_PARTS[
'query'] :
'');
240 $this->
_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
243 $this->_disconnect($fp);
246 if ($this->_redirectaddr && $this->maxredirs > $this->_redirectdepth) {
247 if (!preg_match(
'|^' . $URI_PARTS[
'scheme'] .
'://|', $this->_redirectaddr)) {
248 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr, $URI_PARTS[
'scheme'] .
'://' . $URI_PARTS[
'host']);
252 if (preg_match(
'|^https?://' . preg_quote($this->host) .
'|i', $this->_redirectaddr) || $this->offsiteok) {
254 $this->_redirectdepth++;
255 $this->lastredirectaddr = $this->_redirectaddr;
256 if (strpos($this->_redirectaddr,
'?') > 0) {
257 $this->fetch($this->_redirectaddr);
260 $this->submit($this->_redirectaddr, $formvars, $formfiles);
265 if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) {
266 $frameurls = $this->_frameurls;
267 $this->_frameurls = [];
269 foreach ($frameurls as $frameurl) {
270 if ($this->_framedepth < $this->maxframes) {
271 $this->fetch($frameurl);
272 $this->_framedepth++;
285 $this->error =
'Invalid protocol "' . $URI_PARTS[
'scheme'] .
'"\n';
299 public function fetchlinks($URI)
301 if (
false !== $this->fetch($URI)) {
302 if ($this->lastredirectaddr) {
303 $URI = $this->lastredirectaddr;
305 if (is_array($this->results)) {
306 foreach ($this->results as $x => $xValue) {
307 $this->results[$x] = $this->_striplinks($xValue);
310 $this->results = $this->_striplinks($this->results);
313 if ($this->expandlinks) {
314 $this->results = $this->_expandlinks($this->results, $URI);
329 public function fetchform($URI)
331 if (
false !== $this->fetch($URI)) {
332 if (is_array($this->results)) {
333 foreach ($this->results as $x => $xValue) {
334 $this->results[$x] = $this->_stripform($xValue);
337 $this->results = $this->_stripform($this->results);
354 public function fetchtext($URI)
356 if (
false !== $this->fetch($URI)) {
357 if (is_array($this->results)) {
358 foreach ($this->results as $x => $xValue) {
359 $this->results[$x] = $this->_striptext($xValue);
362 $this->results = $this->_striptext($this->results);
377 public function submitlinks($URI, $formvars =
'', $formfiles =
'')
379 if (
false !== $this->submit($URI, $formvars, $formfiles)) {
380 if ($this->lastredirectaddr) {
381 $URI = $this->lastredirectaddr;
383 if (is_array($this->results)) {
384 foreach ($this->results as $x => $xValue) {
385 $this->results[$x] = $this->_striplinks($xValue);
386 if ($this->expandlinks) {
387 $this->results[$x] = $this->_expandlinks($xValue, $URI);
391 $this->results = $this->_striplinks($this->results);
392 if ($this->expandlinks) {
393 $this->results = $this->_expandlinks($this->results, $URI);
409 public function submittext($URI, $formvars =
'', $formfiles =
'')
411 if (
false !== $this->submit($URI, $formvars, $formfiles)) {
412 if ($this->lastredirectaddr) {
413 $URI = $this->lastredirectaddr;
415 if (is_array($this->results)) {
416 foreach ($this->results as $x => $xValue) {
417 $this->results[$x] = $this->_striptext($xValue);
418 if ($this->expandlinks) {
419 $this->results[$x] = $this->_expandlinks($xValue, $URI);
423 $this->results = $this->_striptext($this->results);
424 if ($this->expandlinks) {
425 $this->results = $this->_expandlinks($this->results, $URI);
440 public function set_submit_multipart()
442 $this->_submit_type =
'multipart/form-data';
452 public function set_submit_normal()
454 $this->_submit_type =
'application/x-www-form-urlencoded';
473 public function _striplinks($document)
475 preg_match_all(
"'<\s*a\s.*?href\s*=\s* # find <a href=
476 ([\"\'])? # find single or double quote
477 (?(1) (.*?)\\1 | ([^\s>]+)) # if quote found, match up to next matching
478 # quote, otherwise match up to next space
479 'isx", $document, $links);
484 foreach ($links[2] as $val) {
490 foreach ($links[3] as $val) {
507 public function _stripform($document)
509 preg_match_all(
"'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi", $document, $elements);
513 return implode(
"\r\n", $elements[0]);
524 public function _striptext($document)
532 "'<script[^>]*?>.*?</script>'si",
533 "'<[\/\!]*?[^<>]*?>'si",
535 "'&(quot|#34|#034|#x22);'i",
536 "'&(amp|#38|#038|#x26);'i",
537 "'&(lt|#60|#060|#x3c);'i",
538 "'&(gt|#62|#062|#x3e);'i",
539 "'&(nbsp|#160|#xa0);'i",
546 "'&(#39|#039|#x27);'",
586 return preg_replace($search, $replace, $document);
597 public function _expandlinks($links, $URI)
599 preg_match(
"/^[^\?]+/", $URI, $match);
601 $match = preg_replace(
"|/[^\/\.]+\.[^\/\.]+$|",
'', $match[0]);
602 $match = preg_replace(
'|/$|',
'', $match);
603 $match_part = parse_url($match);
605 $match_part[
'scheme'] .
'://' . $match_part[
'host'];
608 '|^https://' . preg_quote($this->host) .
'|i',
610 '|^(?!https://)(?!mailto:)|i',
623 return preg_replace($search, $replace, $links);
636 public function _httprequest($url, $fp, $URI, $http_method, $content_type =
'', $body =
'')
638 $cookie_headers =
'';
639 if ($this->passcookies && $this->_redirectaddr) {
643 $URI_PARTS = parse_url($URI);
647 $headers = $http_method .
' ' . $url .
' ' . $this->_httpversion .
"\r\n";
648 if (!empty($this->host) && !isset($this->rawheaders[
'Host'])) {
649 $headers .=
'Host: ' . $this->host;
650 if (!empty($this->port) &&
'80' !== $this->port) {
651 $headers .=
':' . $this->port;
655 if (!empty($this->agent)) {
656 $headers .=
'User-Agent: ' . $this->agent .
"\r\n";
658 if (!empty($this->accept)) {
659 $headers .=
'Accept: ' . $this->accept .
"\r\n";
661 if ($this->use_gzip) {
664 if (function_exists(
'gzinflate')) {
665 $headers .=
"Accept-encoding: gzip\r\n";
668 'use_gzip is on, but PHP was built without zlib support.' .
' Requesting file(s) without gzip encoding.',
672 if (!empty($this->referer)) {
673 $headers .=
'Referer: ' . $this->referer .
"\r\n";
675 if (!empty($this->cookies)) {
676 if (!is_array($this->cookies)) {
677 $this->cookies = (array)$this->cookies;
680 reset($this->cookies);
681 if (count($this->cookies) > 0) {
682 $cookie_headers .=
'Cookie: ';
683 foreach ($this->cookies as $cookieKey => $cookieVal) {
684 $cookie_headers .= $cookieKey .
'=' . urlencode($cookieVal) .
'; ';
686 $headers .= substr($cookie_headers, 0, -2) .
"\r\n";
689 if (!empty($this->rawheaders)) {
690 if (!is_array($this->rawheaders)) {
691 $this->rawheaders = (array)$this->rawheaders;
693 foreach ($this->rawheaders as $headerKey => $headerVal) {
694 $headers .= $headerKey .
': ' . $headerVal .
"\r\n";
697 if (!empty($content_type)) {
698 $headers .=
"Content-type: $content_type";
699 if (
'multipart/form-data' === $content_type) {
700 $headers .=
'; boundary=' . $this->_mime_boundary;
705 $headers .=
'Content-length: ' . strlen($body) .
"\r\n";
707 if (!empty($this->
user) || !empty($this->pass)) {
708 $headers .=
'Authorization: Basic ' . base64_encode($this->
user .
':' . $this->pass) .
"\r\n";
712 if (!empty($this->proxy_user)) {
713 $headers .=
'Proxy-Authorization: ' .
'Basic ' . base64_encode($this->proxy_user .
':' . $this->proxy_pass) .
"\r\n";
720 if ($this->read_timeout > 0) {
721 stream_set_timeout($fp, $this->read_timeout);
723 $this->timed_out =
false;
725 fwrite($fp, $headers . $body, strlen($headers . $body));
727 $this->_redirectaddr =
false;
728 unset($this->headers);
733 while ($currentHeader = fgets($fp, $this->_maxlinelen)) {
734 if ($this->read_timeout > 0 && $this->_check_timeout($fp)) {
735 $this->status = -100;
739 if (
"\r\n" == $currentHeader) {
744 if (preg_match(
'/^(Location:|URI:)/i', $currentHeader)) {
746 preg_match(
'/^(Location:|URI:)[ ]+(.*)/i', rtrim($currentHeader), $matches);
748 if (!preg_match(
"|\:\/\/|", $matches[2])) {
750 $this->_redirectaddr = $URI_PARTS[
'scheme'] .
'://' . $this->host .
':' . $this->port;
752 if (!preg_match(
'|^/|', $matches[2])) {
753 $this->_redirectaddr .=
'/' . $matches[2];
755 $this->_redirectaddr .= $matches[2];
758 $this->_redirectaddr = $matches[2];
762 if (preg_match(
'|^HTTP/|', $currentHeader)) {
763 if (preg_match(
"|^HTTP/[^\s]*\s(.*?)\s|", $currentHeader, $status)) {
764 $this->status = $status[1];
766 $this->response_code = $currentHeader;
774 if (preg_match(
"/Content-Encoding: gzip/i", $currentHeader)) {
778 $this->headers[] = $currentHeader;
783 $_data = fread($fp, $this->maxlength);
793 $results = substr($results, 10);
794 $results = gzinflate($results);
797 if ($this->read_timeout > 0 && $this->_check_timeout($fp)) {
798 $this->status = -100;
804 if (preg_match(
"'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i", $results, $match)) {
805 $this->_redirectaddr = $this->_expandlinks($match[1], $URI);
809 if (($this->_framedepth < $this->maxframes) && preg_match_all(
"'<frame\s+.*src[\s]*=[\'\"]?([^\'\">]+)'i", $results, $match)) {
810 $this->results[] = $results;
811 foreach ($match[1] as $xValue) {
812 $this->_frameurls[] = $this->_expandlinks($xValue, $URI_PARTS[
'scheme'] .
'://' . $this->host);
815 elseif (is_array($this->results)) {
816 $this->results[] = $results;
820 $this->results = $results;
831 public function setcookies()
833 foreach ($this->headers as $x => $xValue) {
834 if (preg_match(
'/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x], $match)) {
835 $this->cookies[$match[1]] = urldecode($match[2]);
848 public function _check_timeout($fp)
850 if ($this->read_timeout > 0) {
851 $fp_status = stream_get_meta_data($fp);
852 if ($fp_status[
'timed_out']) {
853 $this->timed_out =
true;
868 if (!empty($this->proxy_host) && !empty($this->proxy_port)) {
869 $this->_isproxy =
true;
871 $host = $this->proxy_host;
872 $port = $this->proxy_port;
874 if (
'https' == $this->scheme) {
875 trigger_error(
'HTTPS connections over proxy are currently not supported', E_USER_ERROR);
887 if (
'https' == $this->scheme) {
890 if (isset($this->cafile) || isset($this->capath)) {
891 $context_opts[
'ssl'] = [
892 'verify_peer' =>
true,
893 'CN_match' => $this->host,
894 'disable_compression' =>
true,
897 if (isset($this->cafile)) {
898 $context_opts[
'ssl'][
'cafile'] = $this->cafile;
900 if (isset($this->capath)) {
901 $context_opts[
'ssl'][
'capath'] = $this->capath;
905 $host =
'ssl://' . $host;
908 $context = stream_context_create($context_opts);
910 if (PHP_VERSION_ID > 50000) {
911 if($this->scheme ==
'http'){
912 $host =
"tcp://" . $host;
918 $fp = stream_socket_client(
923 STREAM_CLIENT_CONNECT,
943 $this->status = $errno;
946 $this->error =
'socket creation failed (-3)';
949 $this->error =
'dns lookup failure (-4)';
952 $this->error =
'connection refused or timed out (-5)';
955 $this->error =
'connection failed (' . $errno .
')';
966 public function _disconnect($fp)
968 return (fclose($fp));
982 $formvars = (array)$formvars;
983 $formfiles = (array)$formfiles;
986 if (0 == count($formvars) && 0 == count($formfiles)) {
990 switch ($this->_submit_type) {
991 case 'application/x-www-form-urlencoded':
993 foreach ($formvars as $key => $val) {
994 if (is_array($val) || is_object($val)) {
995 foreach ($val as $cur_key => $cur_val) {
996 $postdata .= urlencode($key) .
"[$cur_key]=". urlencode($cur_val).
"&";
1000 $postdata .= urlencode($key) .
'=' . urlencode($val) .
'&';
1003 $postdata = substr($postdata, 0, strlen($postdata) - 1);
1006 case 'multipart/form-data':
1007 $this->_mime_boundary =
'Snoopy' . md5(uniqid(microtime(),
true));
1010 foreach ($formvars as $key => $val) {
1011 if (is_array($val) || is_object($val)) {
1012 foreach ($val as $cur_val) {
1013 $postdata .=
'--' . $this->_mime_boundary .
"\r\n";
1014 $postdata .=
"Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1015 $postdata .=
"$cur_val\r\n";
1018 $postdata .=
'--' . $this->_mime_boundary .
"\r\n";
1019 $postdata .=
"Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1020 $postdata .=
"$val\r\n";
1025 foreach ($formfiles as $field_name => $file_names) {
1026 $file_names = (array)$file_names;
1027 foreach ($file_names as $file_name) {
1028 if (!is_readable($file_name)) {
1032 $fp = fopen($file_name,
'r');
1033 $file_content = fread($fp, filesize($file_name));
1035 $base_name = basename($file_name);
1037 $postdata .=
'--' . $this->_mime_boundary .
"\r\n";
1038 $postdata .=
"Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1039 $postdata .=
"$file_content\r\n";
1042 $postdata .=
'--' . $this->_mime_boundary .
"--\r\n";
1048 $postdata = $formvars[0];
1062 public function getResults()
1064 return $this->results;