Featured Content Slider

Home » » Phuongnamsoft Crawler 1.0

Phuongnamsoft Crawler 1.0

Vo Uu | 09:28 | 0 nhận xét
<?php
set_time_limit(0);
$site="http://stackoverflow.com";

$rsx=array();
$checksum=array();
$rsx[0]=$site;
$rsx=array();

$rsx[0]=$site;
function crawl($site){
$markup=file_get_contents($site);
    if (!empty($markup))
{
        preg_match_all('/<a.+href=\"([^\"]+)\"[^>]+>[^<]+<\/a>/i', $markup, $links);
        return !empty($links[1]) ? $links[1] : FALSE;
}
}

function checklink($link,$site)
{
if ((strpos($link,"http://") >-1) || strpos($link,"https://") >-1 )
{
if (strpos($link,$site) >-1) return $link;

else return FALSE;
}
else return $site.$link;

}


function re($sitel,$n,&$rsx)
{

if ($n==3) return True; // So lan de quy
else
{
$links = crawl($sitel);
if ($links == FALSE) return;
else
{
foreach ($links as $link)
{
$link=checklink($link,$rsx[0]);
if (($link != FALSE) && (!in_array($link,$rsx)))
{
array_push($rsx,$link);

}
}

foreach ($links as $link)
{
$link=checklink($link,$rsx[0]);
if ($link != FALSE)
{
re($link,$n+1,$rsx);
}

}
}
}
}
re($site,0,$rsx);
var_dump($rsx);
?>
Share this article :

0 nhận xét:

Đăng nhận xét

Recent Post

Test Footer 1

 
Support : Creating Website | Johny Template | Mas Template
Copyright © 2011. The UG - All Rights Reserved
Template Modify by Creating Website
Proudly powered by Blogger