[FONT-SIZE=3]熟悉下正则表达式的运用
[CODE=php]
/*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/
header('Content-Type:text/html;charset=utf-8');
date_default_timezone_set('PRC');
require './Snoopy.class.php';
if(isset($_POST['submit']) && isset($_POST['url'])){
$url = $_POST['url'];
//验证url合法性
if(!preg_match('/^https?:\/\/[a-zA-Z0-9\-\.]+/i', $url)){
echo '
执行失败!URL不合法!
';
exit();
}
$snoopy = new Snoopy;
$snoopy->fetchlinks($url);
$myurl = array();
foreach($snoopy->results as $v){
if(strpos($v,'qiuzhitie')){
$myurl[]=$v;
}
}
$snoopy = null;
$snoopy = new Snoopy;
for($i=0;$i<4;++$i){
unset($myurl[$i]);
}
$j=0;
foreach($myurl as $v){
$snoopy->fetch($v);
echo '地址:',$v,'
';
preg_match('/(?<=
).*(?=<\/h1>)/',$snoopy->results,$matches);
echo '标题:',$matches[0],'
';
preg_match('/(?<="tel STYLE1">).*(?=<\/strong>)/',$snoopy->results,$matches);
echo '电话:',$matches[0],'
';
preg_match('/(?<=p" >)\s*.*\s*(?=<\/p>)/',$snoopy->results,$matches);
echo '详细信息:',trim($matches[0]),'
';
echo '
';
$j++;
if($j==10){
exit('在此只采集10条做演示');
}
}
}
?>
请输入待抓取页面url
测试地址:http://bj.h2h.cn/f2660gp1.html
[/CODE][/FONT-SIZE]