Politik

Hey guys, I have the following wordpress script that is not working. It will scrape the serps for the wordpress blogs and print the links, it will even put the links in the database. However, when I try to scrape the links themselves for the comment code and the postpage, it doesn't send any info to the database. Any ideas for a noob coder? Thanks.


<?

php

 
error_reporting(E_ALL);

include ("snoopy.class.

php

 ");


function MultiScrape ($fp, $kw){
 
  //SNOOPY!
$snoopy = new Snoopy;

//Loop thru Google
for ($g = 0; $g <=0; $g++) {

$googlequery = "http://www.google.com/search?q=";
  $search = $googlequery.$fp;

$googletarget = $search."+".$kw."&hl=en&start=".($g*10)."&sa=N&filter=0";
 
 
  // Verify that there are not illegal characters in the urls
    $googletarget = html_entity_decode($googletarget);

sleep(rand(10, 30));
    //hit google
$googlepage = $snoopy->fetch($googletarget);
$googlepage = $snoopy->results;
 
  //parse out the links we want
preg_match_all('/<a href="([^"]+)" class=l>(.*?)</a>/', $googlepage, $googleoutput);

for ($a=0; $a<(count($googleoutput[1])); $a++){
$googlelinks [] = $googleoutput[1][$a]." ";
  }
}



// Loop thru Yahoo
for ($y = 0; $y <=0; $y++) {
 
$yahootarget  = "http://api.search.yahoo.com/WebSearchService/V1/webSearch?appid=MYAPPID&query=".$fp."+".$kw."&adult_ok=1&results=100&output=xml&start=".($y*100+1);

// Verify that there are not illegal characters in the urls
    $yahootarget = html_entity_decode($yahootarget);

sleep(rand(5, 15));
    //hit yahoo
      $yahoopage = $snoopy->fetch($yahootarget);
$yahoopage = $snoopy->results;


//create an array to hold our links
$yahoolinks = array();


    //parse out the links we want
preg_match_all('/summary><url>(.+?)</url>/i', $yahoopage, $yahoo_output);

for ($b=0; $b<(count($yahoo_output[1])); $b++){
$yahoolinks [] = $yahoo_output[1][$b]." ";

}
}



// Loop thru MSN
for ($m = 0; $m <=0; $m++) {

$msntarget = "http://search.msn.com/results.

asp

 x?q=%22by+wordpress%22+".$kw."&count=50&format=xml&first=".($m*50+1);

// Verify that there are not illegal characters in the urls
    $msntarget = html_entity_decode($msntarget);

sleep(rand(15, 30));
    //hit yahoo
      $msnpage = $snoopy->fetch($msntarget);
$msnpage = $snoopy->results;

//create an array to hold our links

$msnlinks = array();

    //parse out the links we want
preg_match_all('/<url>(.+?)</url>/i', $msnpage, $msnoutput);

for ($c=0; $c<(count($msnoutput[1])); $c++){
$msnlinks [] = $msnoutput[1][$c]." ";

}
}


$links = (array_unique(array_merge($googlelinks,$yahoolinks,$msnlinks)));


foreach ($links as $link){
 
   
  $ch = curl_init("$link");
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.8.0.4) Gecko/20060508 Firefox/1.5.0.4');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_REFERER, $link);
$page = curl_exec($ch);
curl_close($ch);
 
eregi('<form action="(.*)" method="post" id="commentform">', $page, $postpage);
eregi('<input type="hidden" name="comment_post_ID" value="(.[^"]*)" />', $page, $comment);

$kw = urldecode(trim($kw));
$kw  = str_replace( ' ', '', $kw );


// Connect to Database
$con=mysql_connect ("localhost", "username", "password"Applause
or die
('Cannot connect to the database because:' . mysql_error());

// Create Database
mysql_query("CREATE DATABASE WordPressBlogs",$con);

// Create table in my_db database
mysql_select_db("WordPressBlogs", $con);
mysql_query("CREATE TABLE {$kw}
(
ID int NOT NULL AUTO_INCREMENT,
PRIMARY KEY(ID),
URL varchar(255)
PostPage varchar(255)
CommentID varchar(255)
)");

// Insert Info into Database
mysql_query("INSERT INTO {$kw} (URL)
VALUES ('$link')");
mysql_query("INSERT INTO {$kw} (PostPage)
VALUES ('$postpage[1]')");
mysql_query("INSERT INTO {$kw} (CommentID)
VALUES ('$comment[1]')");
}

mysql_close($con);

}

// Identify the search term
  $footprint = "%22powered+by+wordpress%22+OR+leave+a+reply+powered+by+wordpress+%2B%7E";
     
  $keyword = 'whatever keyword';
$keyword = urlencode(trim($keyword));

//Call Function
multiscrape($footprint,$keyword);




?>

perkiset

Hey Politik -

I keep pressing Mark Unread and coming back to this to see if a moment of inspiration will grab me, but I just don't have the time to debug it for you... I apologize.

I can probably help push you in the right direction if you simply add more syptoms...
add error_reporting(E_ALL) and look at all the messages...
get the mysql_error from every mysql function and see what comes back...

bring back some more symptoms and perhaps the answer will become manifest.

/p

Politik

Sounds good. Thanks Perk


Perkiset's Place Home   Politics @ Perkiset's