The Cache: Technology Expert's Forum
 
*
Welcome, Guest. Please login or register. September 18, 2019, 12:35:37 PM

Login with username, password and session length


Pages: [1]
  Print  
Author Topic: Perk's WebRequest Class  (Read 20972 times)
perkiset
Olde World Hacker
Administrator
Lifer
*****
Offline Offline

Posts: 10096



View Profile
« on: April 21, 2007, 03:16:58 PM »

This is the class that I use for performing web requests from systems that do not have anything other than php 4.3. Note that I recently added a couple little lines that are 5.0 only, but if you try to use the class it'll bomb fast and let you know. The new lines are not critical but do help out a bit.

Pop a thread in the discussion area if you wanna talk about usage.

Code:
<?php
class WebRequest {
var $URL;
var $Host;
var $Port;
var $responseCode;
var $responseMessage;
var $_method;
var $_lastResponse;
var $LastSent;
var $_GetArray = array();
var $_PostArray = array();
var $endOnBody false;
var $timeout 15;
var $succeedOnTimeout false;

var $cookieList;
var $headerList;

function WebRequest() {
$this->Reset();
$this->cookieList = array();
$this->headerList = array();
}

function AddGetParam($key$value) {
$count $this->_GetArray['__count'];
$count++;
$this->_GetArray[$count]['key'] = $key;
$this->_GetArray[$count]['value'] = $value;
$this->_GetArray['__count'] = $count;
}

function AddPostParam($key$value) {
$count $this->_PostArray['__count'];
$count++;
$this->_PostArray[$count]['key'] = $key;
$this->_PostArray[$count]['value'] = $value;
$this->_PostArray['__count'] = $count;
}

function Content() {
$tempStr str_replace(chr(13), ''$this->_lastResponse);
$tempArr explode(chr(10), $tempStr);
$chunked false;
while (true) {
$thisLine trim(strtolower($tempArr[0]));
if ($thisLine <= ' ') { break; }
if (strpos($thisLine'-length') > 0) {
$ptr strpos($thisLine':');
$contentLength trim(substr($thisLine$ptr 11024));
}

if (strpos($thisLine'encoding') > 0) {
// If the encoding is chunked then I have to gather the content differently...
$chunked = (strpos($thisLine'chunked') > 0);
}

array_shift($tempArr);
}

array_shift($tempArr);
$tempStr trim(implode(chr(10), $tempArr));

if ($chunked) {
// Bummer! NN2collect the data in chunks into a new array and send it back...
// (also, there wont be a "length" value yet)
$out '';
$ptr strpos($tempStrchr(10));
$thisLen hexdec(trim(substr($tempStr0$ptr)));
while ($thisLen 0) {
$tempStr trim(substr($tempStr$ptr 1strlen($tempStr)));
$out .= substr($tempStr0$thisLen);
$tempStr trim(substr($tempStr$thisLen 1strlen($tempStr)));
$ptr strpos($tempStrchr(10));
$thisLen hexdec(trim(substr($tempStr0$ptr)));
}

return $out;
} else return substr($tempStr0$contentLength);
}

function Cookies() { return $this->cookieList; }

function __Dispatch() {
if ($this->URL <= ' ') { return 'Error: WebRequest requires the URL property to be set'; }
if ($this->Host <= ' ') { return 'Error: WebRequest requires the Host property to be set'; }


// Build the final URL...
$finalURL $this->URL;
$getCount $this->_GetArray['__count'];
if ($getCount >= 0) {
$sepStr '?';
$getStr '';
for ($i=0$i<=$getCount$i++) {
$key trim($this->_GetArray[$i]['key']);
if ($key <= ' ') { continue; }
$value trim(urlencode($this->_GetArray[$i]['value']));
$getStr .= "$sepStr$key=$value";
$sepStr '&';
}

if (substr($finalURL, -11) == '/') { $finalURL substr($finalURLstrlen($finalURL) - 1); }
$finalURL .= $getStr;
}


// Build the content portion of the request...
$postStr 'No Content';
$postCount $this->_PostArray['__count'];
if ($this->_method == 'POST') {
$sepStr '';
$postStr '';
for ($i=0$i<=$postCount$i++) {
$key trim($this->_PostArray[$i]['key']);
if ($key <= ' ') { continue; }
$value trim(urlencode($this->_PostArray[$i]['value']));
$postStr .= "$sepStr$key=$value";
$sepStr '&';
}
}

$requestLen strlen($postStr);

$cookieStr '';
$start true;
foreach($this->cookieList as $name=>$value)
{
if (!$start) { $cookieStr .= '; '; }
$cookieStr .= "$name=$value";
$start false;
}

//      Build the actual HTTP request...
if ($this->_method == 'GET') { $type='text/html'; }
else { $type="application/x-www-form-urlencoded"; }

$header "{$this->_method} $finalURL HTTP/1.1\r\n";
$header .= "Host: {$this->Host}\r\n";
$header .= "User-Agent: Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/417.9 (KHTML, like Gecko) Safari/417.8\r\n";
$header .= "Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5\r\n";
$header .= "Accept-Language: en-us,en;q=0.5\r\n";
$header .= "Accept-Encoding: \r\n";
$header .= "Accept-Charset: ISO-8859-1,utf-8:q=0.7,*;q=0.7\r\n";
if ($cookieStr) { $header .= "Cookie: $cookieStr\r\n"; }
$header .= "Connection: close\r\n\r\n";
$header .= "Content-Length: $requestLen\r\n";
$header .= "Content-Type: $type\r\n";
$header .= "$postStr\r\n";

$this->LastSent $header;

// Dispatch it!
$errno 0;
$errstr '';
$socket = @fsockopen($this->Host$this->Port$errno$errstr);
$response '';
$endOnStr = ($this->endOnBody) ? '</body' '</html';
if ($socket) {
fputs($sockettrim($header));
$checkedHeader false;
while (!feof($socket)) 

   
stream_set_timeout($socket$this->timeout);
$thisBlock fread($socket65535);

$response .= $thisBlock
$testResp .= strtolower($thisBlock);

$info stream_get_meta_data($socket);
if ($info['timed_out'])
{
if (($this->succeedOnTimeout) && (strpos($testResp'</head') > 0)) {
// Probably an incomplete page and the caller of this function
// is ready for it...
break;
} else {
$this->responseCode = -1;
$this->responseMessage 'Timed Out';
$this->_lastResponse '';
return false;
}
}

if (strlen($response) == 0)
{
// If I am here then it means that the socket was closed on me
// without me receiving anything - otherwise, a complete failure. 

$this->responseCode = -2;
$this->responseMessage 'Bad Request';
$this->_lastResponse '';
return false;
}


// Important: Check to see if there is a redirect or a problem...
// If so, store that and return the redirect page.
if (!$checkedHeader)
{
$testResp str_replace(chr(13), ''$testResp);
$stop strpos($testRespchr(10) . chr(10));
if ($stop 0)
{
if (strpos(substr($testResp0$stop), '301 error') > 0)
{
$ptr strpos($testResp'location:');
$response substr($response$ptrstrlen($response));
$ptr strpos($responsechr(10));
$response trim(substr($response0$ptr));
$this->responseCode 301;
$this->responseMessage $response;
$this->_lastResponse '';
return false;
}

if (strpos(substr($testResp0$stop), '404 error') > 0) {
$ptr strpos($testResp'location:');
$response 'Page not found';
$this->responseCode 404;
$this->responseMessage $response;
$this->_lastResponse '';
return false;
}

$checkedHeader true;
}
}

if (strpos($testResp$endOnStr) > 0) { break; }

}

fclose($socket);

} else { $response "Error $errno$errstr"; }

$this->responseCode 200;
$this->_lastResponse $response;

// Time to do a little work on the returned packet...
$this->headerList = array(); // clear it out
$this->cookieList = array(); // Clear it out as well...
$lines explode(chr(10), $response);
foreach($lines as $line)
{
if ($line <= ' ') { break; }
if (strpos($line':') === false) { continue; }
preg_match('/^([^:]*)\: (.*)$/'$line$matches);
$name $matches[1];
$value $matches[2];
$this->headerList[$name] = $value;
if (strtolower($name) == 'set-cookie')
{
preg_match('/([^=]*)\=([^;]*)/'$value$matches);
$name $matches[1];
$value $matches[2];
$this->cookieList[$name] = $value;
}
}

return $response;
}

function Get() {
$this->_method 'GET';
return $this->__Dispatch();
}

function GetCookie($cookieName) { return $this->cookieList[$cookieName]; }
function GetHeader($headerName) { return $this->headerList[$headerName]; }
function Headers() { return $this->headerList; }

function Post() {
$this->_method 'POST';
return $this->__Dispatch();
}

function Reset() {
$this->URL '';
$this->Host '';
$this->Port 80;
$this->_method 'GET';
$this->_GetArray['__count'] = -1;
$this->_PostArray['__count'] = -1;
$this->_lastResponse '';
}

function SetCookie($cookieName$cookieValue) { $this->cookieList[$cookieName] = $cookieValue; }

}

?>

Logged

It is now believed, that after having lived in one compound with 3 wives and never leaving the house for 5 years, Bin Laden called the U.S. Navy Seals himself.
thedarkness
Lifer
*****
Offline Offline

Posts: 585



View Profile
« Reply #1 on: April 21, 2007, 04:35:42 PM »

Rockin' perk
Logged

"I want to be the guy my dog thinks I am."
 - Unknown
perkiset
Olde World Hacker
Administrator
Lifer
*****
Offline Offline

Posts: 10096



View Profile
« Reply #2 on: April 23, 2007, 04:58:22 PM »

Thanks TD - I got a couple PMs, so here's a bit of usage:

Code:
$req = new WebRequest();
$req->Host = 'www.mysite.com';
$req->URL = '/aDir/aFile.html';
$req->Get();
echo $req->Content();

$req->AddPostParam('aName', 'aValue');
$req->Post();
echo $req->Content();
« Last Edit: May 07, 2007, 04:28:12 PM by perkiset » Logged

It is now believed, that after having lived in one compound with 3 wives and never leaving the house for 5 years, Bin Laden called the U.S. Navy Seals himself.
vsloathe
vim ftw!
Global Moderator
Lifer
*****
Offline Offline

Posts: 1669



View Profile
« Reply #3 on: November 08, 2007, 03:31:59 PM »

This class is t3h s3x0r. Perk has hooked me. If you can modify this so it can handle multipart form submission/file uploads, I'll never use cURL again.
Logged

hai
nutballs
Administrator
Lifer
*****
Offline Offline

Posts: 5627


Back in my day we had 9 planets


View Profile
« Reply #4 on: November 08, 2007, 07:32:43 PM »

coincidentally i was about to write one. now i dont need to.

btw, i have noticed alot of people write their functions/classes to break apart the host and URL.
Why do it like:
$prot='http://'
$host='www.something.com/somepage.htm';
$url='/somepage.htm';

why not do it just as:
$biglongurl='http://www.something.com/somepage.htm';

i have not been able to figure out a reason for not doing it as one big url


oh and another question. you do this.
if ($this->URL <= ' ')

what is the advantage over:
if ($this->URL == '')
« Last Edit: November 08, 2007, 07:50:50 PM by nutballs » Logged

I could eat a bowl of Alphabet Soup and shit a better argument than that.
thedarkness
Lifer
*****
Offline Offline

Posts: 585



View Profile
« Reply #5 on: November 09, 2007, 06:07:42 AM »


oh and another question. you do this.
if ($this->URL <= ' ')

what is the advantage over:
if ($this->URL == '')

Personally, I would lean towards;

if( $this->URL === '' )

Cheers,
td
Logged

"I want to be the guy my dog thinks I am."
 - Unknown
perkiset
Olde World Hacker
Administrator
Lifer
*****
Offline Offline

Posts: 10096



View Profile
« Reply #6 on: November 10, 2007, 10:50:59 AM »

This class is t3h s3x0r. Perk has hooked me. If you can modify this so it can handle multipart form submission/file uploads, I'll never use cURL again.
You are too kind Praise Actually, I originally started writing this for a host I was working on that had several of the HTTP mechanisms shut down (no PEAR or cURL either) but they had not shut down socket level connections.

Let's look at the multipart thang and I'll add it. That's a nice new chunk that should be added.


btw, i have noticed alot of people write their functions/classes to break apart the host and URL.
Why do it like:
$prot='http://'
$host='www.something.com/somepage.htm';
$url='/somepage.htm';

why not do it just as:
$biglongurl='http://www.something.com/somepage.htm';
For me it was forgetfulness. What I noticed over time of using the (then-evolving) class was that I'd forget whether I needed to add http: or whatever to the front... and since I'm doing construction on the end with get params and "perfecting" the url in any case, it just made more sense to get JUST the host and finish it up myself. I'd do stoopid stuff like passing a url of '/anotherPage.html' because I'd be thinking from the wrong perspective and then spend 2 hours trying to figure out WTF...  ROFLMAO


oh and another question. you do this.
if ($this->URL <= ' ')

what is the advantage over:
if ($this->URL == '')
Old habit, not much else. Less-than-or-equal-to a space is just in general more succesful for me than "equals a space" or equals nothing. And TD I'd not personal go towards exactly-equals-a-space (==='') or such because if I did something stoopid like $this->URL = false then the evaluation would not catch it. So the <= ' ' in general is the most broad capture of my stupidity and has for me for a long time.
Logged

It is now believed, that after having lived in one compound with 3 wives and never leaving the house for 5 years, Bin Laden called the U.S. Navy Seals himself.
nutballs
Administrator
Lifer
*****
Offline Offline

Posts: 5627


Back in my day we had 9 planets


View Profile
« Reply #7 on: November 11, 2007, 09:50:54 PM »

i got a problem using this class, and its when I tested it on my blogs.

When i grab the content from any of my blogs I get back just under 4Kb of data. I am guessing this is a packet issue, that you are looking for something and assuming its done when you see it. but im not figuring it out.

These are wordpress blogs btw.

run this code against your class:
Code:
$host='www.braindonkey.com';
$url='/2007/11/10/the-road-record-cheated-out-of-my-millions/';
$req = new WebRequest();
$req->Host = $host;
$req->URL = $url;
$req->Get();
echo $req->Content();

and to add another, that I get nothing back from...
Code:
$host='blogs.pcworld.com';
$url='/staffblog/archives/005885.html';
$req = new WebRequest();
$req->Host = $host;
$req->URL = $url;
$req->Get();
echo $req->Content();
« Last Edit: November 11, 2007, 10:06:35 PM by nutballs » Logged

I could eat a bowl of Alphabet Soup and shit a better argument than that.
perkiset
Olde World Hacker
Administrator
Lifer
*****
Offline Offline

Posts: 10096



View Profile
« Reply #8 on: November 12, 2007, 10:38:29 AM »

Leave it to NutBalls.  Sarcasm

Sarcasm aside, nice finds. What I did was to put echos of all (current) activity during the course of the pull to see what the heck is going on. Interestingly, these two URLs offer two completely different problems.

The second was easier to ascertain the problem - it is with the Content() function and not the dispatch itself. The entire thing is coming down, but there's not content-length header, so the Content portion was innacurately reporting the length of the content as well as the content string itself. Additionally, they do not report the totla length of the content after the \r\n\r\n like a chunked packet. Here's the header:

Code:
HTTP/1.1 200 OK^M
Date: Mon, 12 Nov 2007 16:42:32 GMT^M
Server: Apache/1.3.27 (Unix)^M
Connection: close^M
Content-Type: text/html^M
Vary: Accept-Encoding^M
^M

Note that they do have the correct amount of CR/LFs, but not length. The problem was not actually with the retrieval of the content, but the figuring out how much is there - it's the Content() function not the dispatch. To patch it, I've added 2 lines:

      if (!$contentLength)
         $contentLength = strlen($this->_lastResponse) - strpos($this->_lastResponse, "\r\r");

... so the Content function now should look like this:

Code:
function Content() {
$tempStr = str_replace(chr(13), '', $this->_lastResponse);
$tempArr = explode(chr(10), $tempStr);
$chunked = false;
while (true)
{
$thisLine = trim(strtolower($tempArr[0]));
if ($thisLine <= ' ') { break; }
if (strpos($thisLine, '-length') > 0) {
$ptr = strpos($thisLine, ':');
$contentLength = trim(substr($thisLine, $ptr + 1, 1024));
}

if (strpos($thisLine, 'encoding') > 0) {
// If the encoding is chunked then I have to gather the content differently...
$chunked = (strpos($thisLine, 'chunked') > 0);
}

array_shift($tempArr);
}

array_shift($tempArr);
$tempStr = trim(implode(chr(10), $tempArr));

if (!$contentLength)
$contentLength = strlen($this->_lastResponse) - strpos($this->_lastResponse, "\r\r");

if ($chunked) {
// Bummer! NN2collect the data in chunks into a new array and send it back...
// (also, there wont be a "length" value yet)
$out = '';
$ptr = strpos($tempStr, chr(10));
$thisLen = hexdec(trim(substr($tempStr, 0, $ptr)));
while ($thisLen > 0) {
$tempStr = trim(substr($tempStr, $ptr + 1, strlen($tempStr)));
$out .= substr($tempStr, 0, $thisLen);
$tempStr = trim(substr($tempStr, $thisLen + 1, strlen($tempStr)));
$ptr = strpos($tempStr, chr(10));
$thisLen = hexdec(trim(substr($tempStr, 0, $ptr)));
}

return $out;
} else return substr($tempStr, 0, $contentLength);
}

The second one is more difficult. What's going on with braindonkey is more complicated. That server is chunking the response at 4K, which complicates retrieval and is make the PHP function feof() report that the send is over. In looking at my previous page collections, I had assumed that I was handling chunked pages just fine, but in this case I am not. It is also not reporting the length in the header (so it would have had the same issue as the pcblog), however it is reporting the length at the head of the body (as it should for a chunked page). Here is that header:

Code:
HTTP/1.1 200 OK^M
Set-Cookie: X-Mapping-caklakng=FFA80B05317C972222F38C43FC9D6DE6; path=/^M
Date: Mon, 12 Nov 2007 17:32:37 GMT^M
Transfer-Encoding: chunked^M
Connection: close^M
X-Pingback: http://www.braindonkey.com/xmlrpc.php^M
Server: Apache/1.3.34 (Debian) PHP/5.2.0-8+etch5~pu1^M
X-Powered-By: PHP/5.2.0-8+etch5~pu1^M
Content-Type: text/html; charset=UTF-8^M
Keep-Alive: timeout=15, max=49^M
^M
31a0^M

My class is requesting packets of 64K at a time and is confused that it's chunking at 4K. I am working right now on a patch for this and will report when I get all set. At that point I'll report the entire set of code - as well I will add an easy "debug mode" property so that we can all watch whats going on more easily.

/p
Logged

It is now believed, that after having lived in one compound with 3 wives and never leaving the house for 5 years, Bin Laden called the U.S. Navy Seals himself.
perkiset
Olde World Hacker
Administrator
Lifer
*****
Offline Offline

Posts: 10096



View Profile
« Reply #9 on: November 12, 2007, 10:40:33 AM »

Actually, in thinking further there are several things that I'd like to modify about the class, so I am going to do a quick rewrite of a couple more things and report that here, since I'm going to tear it up anyway.

More as I know it,
/p
Logged

It is now believed, that after having lived in one compound with 3 wives and never leaving the house for 5 years, Bin Laden called the U.S. Navy Seals himself.
nutballs
Administrator
Lifer
*****
Offline Offline

Posts: 5627


Back in my day we had 9 planets


View Profile
« Reply #10 on: November 12, 2007, 11:55:34 AM »

I figured it was the packet sizes. I didnt actually check it, but since I understood in the code that you were working at the packet level, and that it seemed to just cut off from wordpress blogs, it must have been packet size. I also wondered if it was a buffer flushing issue.

Seems you know what it is though, so cool, i look forward to the updated version Smiley
Logged

I could eat a bowl of Alphabet Soup and shit a better argument than that.
perkiset
Olde World Hacker
Administrator
Lifer
*****
Offline Offline

Posts: 10096



View Profile
« Reply #11 on: November 12, 2007, 12:30:13 PM »

Actually I am knee deep in it at the moment and am *very* pleased thus far... I wrote the old one with considerably less regex and packet experience... this new one is going to be FAR more robust and cool.

Course, there is the You'llBeMyBetaTester thang but we'll probably get through that pretty quickly...
Logged

It is now believed, that after having lived in one compound with 3 wives and never leaving the house for 5 years, Bin Laden called the U.S. Navy Seals himself.
perkiset
Olde World Hacker
Administrator
Lifer
*****
Offline Offline

Posts: 10096



View Profile
« Reply #12 on: November 12, 2007, 03:46:13 PM »

This thread is officially closed out now, because discussion of the class has moved to the "Perk's NEW WebRequest Class" thread.
Logged

It is now believed, that after having lived in one compound with 3 wives and never leaving the house for 5 years, Bin Laden called the U.S. Navy Seals himself.
Pages: [1]
  Print  
 
Jump to:  

Perkiset's Place Home   Best of The Cache   phpMyIDE: MySQL Stored Procedures, Functions & Triggers
Politics @ Perkiset's   Pinkhat's Perspective   
cache
mart
coder
programmers
ajax
php
javascript
Powered by MySQL Powered by PHP Powered by SMF 1.1.2 | SMF © 2006-2007, Simple Machines LLC
Seo4Smf v0.2 © Webmaster's Talks


Valid XHTML 1.0! Valid CSS!