The Cache: Technology Expert's Forum
 
*
Welcome, Guest. Please login or register. September 17, 2019, 12:07:22 AM

Login with username, password and session length


Pages: [1]
  Print  
Author Topic: Any ideas on what wrong with my script  (Read 2531 times)
Politik
Rookie
**
Offline Offline

Posts: 11


View Profile
« on: September 12, 2007, 09:25:30 AM »

Hey guys, I have the following wordpress script that is not working. It will scrape the serps for the wordpress blogs and print the links, it will even put the links in the database. However, when I try to scrape the links themselves for the comment code and the postpage, it doesn't send any info to the database. Any ideas for a noob coder? Thanks.


Code:
<?php
error_reporting
(E_ALL);

include (
"snoopy.class.php");


function MultiScrape ($fp$kw){
  
   //SNOOPY!
$snoopy = new Snoopy;

//Loop thru Google
for ($g 0$g <=0$g++) {

$googlequery "http://www.google.com/search?q=";
  
$search $googlequery.$fp;

$googletarget $search."+".$kw."&hl=en&start=".($g*10)."&sa=N&filter=0";
  
  
  // Verify that there are not illegal characters in the urls
    
$googletarget html_entity_decode($googletarget);

 sleep(rand(1030));
    
//hit google
$googlepage $snoopy->fetch($googletarget);
$googlepage $snoopy->results;
  
   //parse out the links we want
preg_match_all('/<a href="([^"]+)" class=l>(.*?)<\/a>/'$googlepage$googleoutput);

for ($a=0$a<(count($googleoutput[1])); $a++){
$googlelinks [] = $googleoutput[1][$a]."\n";
   }
}



// Loop thru Yahoo
for ($y 0$y <=0$y++) {
  
$yahootarget  "http://api.search.yahoo.com/WebSearchService/V1/webSearch?appid=MYAPPID&query=".$fp."+".$kw."&adult_ok=1&results=100&output=xml&start=".($y*100+1);

// Verify that there are not illegal characters in the urls
    
$yahootarget html_entity_decode($yahootarget);

sleep(rand(515));
    
//hit yahoo
       
$yahoopage $snoopy->fetch($yahootarget);
$yahoopage $snoopy->results;


//create an array to hold our links
$yahoolinks = array();


    
//parse out the links we want
preg_match_all('/summary\>\<url\>(.+?)\<\/url\>/i'$yahoopage$yahoo_output);

for ($b=0$b<(count($yahoo_output[1])); $b++){
$yahoolinks [] = $yahoo_output[1][$b]."\n";

}
}



// Loop thru MSN
for ($m 0$m <=0$m++) {

$msntarget "http://search.msn.com/results.aspx?q=%22by+wordpress%22+".$kw."&count=50&format=xml&first=".($m*50+1);

// Verify that there are not illegal characters in the urls
    
$msntarget html_entity_decode($msntarget);

sleep(rand(1530));
    
//hit yahoo
       
$msnpage $snoopy->fetch($msntarget);
$msnpage $snoopy->results;

//create an array to hold our links

$msnlinks = array();

    
//parse out the links we want
preg_match_all('/\<url\>(.+?)\<\/url\>/i'$msnpage$msnoutput);

for ($c=0$c<(count($msnoutput[1])); $c++){
$msnlinks [] = $msnoutput[1][$c]."\n";

}
}


$links = (array_unique(array_merge($googlelinks,$yahoolinks,$msnlinks)));


foreach (
$links as $link){
  
    
  
$ch curl_init("$link");
curl_setopt($chCURLOPT_USERAGENT'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.8.0.4) Gecko/20060508 Firefox/1.5.0.4');
curl_setopt($chCURLOPT_RETURNTRANSFER1);
curl_setopt($chCURLOPT_REFERER$link);
$page curl_exec($ch);
curl_close($ch);
  
 
eregi('<form action="(.*)" method="post" id="commentform">'$page$postpage);
eregi('<input type="hidden" name="comment_post_ID" value="(.[^"]*)" />'$page$comment);

$kw  = urldecode(trim($kw));
$kw  str_replace' '''$kw );


// Connect to Database
$con=mysql_connect ("localhost""username""password"
or die
(
'Cannot connect to the database because:' mysql_error());

// Create Database
mysql_query("CREATE DATABASE WordPressBlogs",$con);

// Create table in my_db database
mysql_select_db("WordPressBlogs"$con);
mysql_query("CREATE TABLE {$kw}
(
ID int NOT NULL AUTO_INCREMENT, 
PRIMARY KEY(ID),
URL varchar(255)
PostPage varchar(255)
CommentID varchar(255)
)"
);

// Insert Info into Database
mysql_query("INSERT INTO {$kw} (URL) 
VALUES ('
$link')");
mysql_query("INSERT INTO {$kw} (PostPage) 
VALUES ('
$postpage[1]')");
mysql_query("INSERT INTO {$kw} (CommentID) 
VALUES ('
$comment[1]')");
}

mysql_close($con);

}

// Identify the search term
  
$footprint "%22powered+by+wordpress%22+OR+leave+a+reply+powered+by+wordpress+%2B%7E";
       

  
$keyword 'whatever keyword';
$keyword urlencode(trim($keyword));

//Call Function
multiscrape($footprint,$keyword);




?>
Logged

No links in signatures please
perkiset
Olde World Hacker
Administrator
Lifer
*****
Offline Offline

Posts: 10096



View Profile
« Reply #1 on: September 15, 2007, 11:06:55 AM »

Hey Politik -

I keep pressing Mark Unread and coming back to this to see if a moment of inspiration will grab me, but I just don't have the time to debug it for you... I apologize.

I can probably help push you in the right direction if you simply add more syptoms...
add error_reporting(E_ALL) and look at all the messages...
get the mysql_error from every mysql function and see what comes back...

bring back some more symptoms and perhaps the answer will become manifest.

/p
Logged

It is now believed, that after having lived in one compound with 3 wives and never leaving the house for 5 years, Bin Laden called the U.S. Navy Seals himself.
Politik
Rookie
**
Offline Offline

Posts: 11


View Profile
« Reply #2 on: September 17, 2007, 05:21:25 AM »

Sounds good. Thanks Perk
Logged

No links in signatures please
Pages: [1]
  Print  
 
Jump to:  

Perkiset's Place Home   Best of The Cache   phpMyIDE: MySQL Stored Procedures, Functions & Triggers
Politics @ Perkiset's   Pinkhat's Perspective   
cache
mart
coder
programmers
ajax
php
javascript
Powered by MySQL Powered by PHP Powered by SMF 1.1.2 | SMF © 2006-2007, Simple Machines LLC
Seo4Smf v0.2 © Webmaster's Talks


Valid XHTML 1.0! Valid CSS!