<?php
set_time_limit(0);
$site="http://stackoverflow.com";
$rsx=array();
$checksum=array();
$rsx[0]=$site;
$rsx=array();
$rsx[0]=$site;
function crawl($site){
$markup=file_get_contents($site);
if (!empty($markup))
{
preg_match_all('/<a.+href=\"([^\"]+)\"[^>]+>[^<]+<\/a>/i', $markup, $links);
return !empty($links[1]) ? $links[1] : FALSE;
}
}
function checklink($link,$site)
{
if ((strpos($link,"http://") >-1) || strpos($link,"https://") >-1 )
{
if (strpos($link,$site) >-1) return $link;
else return FALSE;
}
else return $site.$link;
}
function re($sitel,$n,&$rsx)
{
if ($n==3) return True; // So lan de quy
else
{
$links = crawl($sitel);
if ($links == FALSE) return;
else
{
foreach ($links as $link)
{
$link=checklink($link,$rsx[0]);
if (($link != FALSE) && (!in_array($link,$rsx)))
{
array_push($rsx,$link);
}
}
foreach ($links as $link)
{
$link=checklink($link,$rsx[0]);
if ($link != FALSE)
{
re($link,$n+1,$rsx);
}
}
}
}
}
re($site,0,$rsx);
var_dump($rsx);
?>
Phuongnamsoft Crawler 1.0
Vo Uu | 09:28 | 0
nhận xét
Related posts:
If you enjoyed this article just click here, or subscribe to receive more great content just like it.
Nhãn:
PHP
Đăng ký:
Đăng Nhận xét (Atom)

0 nhận xét:
Đăng nhận xét