المساعد الشخصي الرقمي

مشاهدة النسخة كاملة : لأول مرة بالوطن العربي : عناكب بحث !



admin
11-01-2013, بتوقيت غرينيتش 03:43 PM
http://www.dzbatna.com/images/icons/iconrote.gif ظ„ط£ظˆظ„ ظ…ط±ط© ط¨ط§ظ„ظˆط·ظ† ط§ظ„ط¹ط±ط¨ظٹ : ط¹ظ†ط§ظƒط¨ ط¨ط*ط« ! (http://www.dzbatna.com/t1468156/)



بسم الله الرحمن الرحيم ..
اليوم اقدم لكم هدية غالية لجميع الأعضاء ..
مع إني كنت سأستخدمها في محركي الخاص بس ماتغلى عليكم ..
اولاً الفانكشنز (الدوال الخاصة) :
رمز PHP:

<code style="white-space:nowrap"> <code> <?php
function getFileContents($url) {
global $user_agent;
$urlparts = parse_url($url);
$path = $urlparts&#91;'path'&#93;;
$host = $urlparts&#91;'host'&#93;;
if ($urlparts&#91;'query'&#93; != "")
$path .= "?".$urlparts&#91;'query'&#93;;
if (isset ($urlparts&#91;'port'&#93;)) {
$port = (int) $urlparts&#91;'port'&#93;;
} else
if ($urlparts&#91;'scheme'&#93; == "http") {
$port = 80;
} else
if ($urlparts&#91;'scheme'&#93; == "https") {
$port = 443;
}

if ($port == 80) {
$portq = "";
} else {
$portq = ":$port";
}

$all = "*/*";

$request = "GET $path HTTP/1.0\r\nHost: $host$portq\r\nAccept: $all\r\nUser-Agent: $user_agent\r\n\r\n";

$fsocket_timeout = 30;
if (substr($url, 0, 5) == "https") {
$target = "ssl://".$host;
} else {
$target = $host;
}


$errno = 0;
$errstr = "";
$fp = http://www.dzbatna.com/images/mail.gif fsockopen($target, $port, $errno, $errstr, $fsocket_timeout);

print $errstr;
if (!$fp) {
$contents&#91;'state'&#93; = "NOHOST";
printConnectErrorReport($errstr);
return $contents;
} else {
if (!fputs($fp, $request)) {
$contents&#91;'state'&#93; = "Cannot send request";
return $contents;
}
$data = null;
socket_set_timeout($fp, $fsocket_timeout);
do{
$status = socket_get_status($fp);
$data .= fgets($fp, 8192);
} while (!feof($fp) && !$status&#91;'timed_out'&#93;) ;

fclose($fp);
if ($status&#91;'timed_out'&#93; == 1) {
$contents&#91;'state'&#93; = "timeout";
} else
$contents&#91;'state'&#93; = "ok";
$contents&#91;'file'&#93; = substr($data, strpos($data, "\r\n\r\n") + 4);
}
return $contents;
}

/*
check if file is available and in readable form
*/
function url_status($url) {
global $user_agent, $index_pdf, $index_doc, $index_xls, $index_ppt;
$urlparts = parse_url($url);
$path = $urlparts&#91;'path'&#93;;
$host = $urlparts&#91;'host'&#93;;
if (isset($urlparts&#91;'query'&#93;))
$path .= "?".$urlparts&#91;'query'&#93;;

if (isset ($urlparts&#91;'port'&#93;)) {
$port = (int) $urlparts&#91;'port'&#93;;
} else
if ($urlparts&#91;'scheme'&#93; == "http") {
$port = 80;
} else
if ($urlparts&#91;'scheme'&#93; == "https") {
$port = 443;
}

if ($port == 80) {
$portq = "";
} else {
$portq = ":$port";
}

$all = "*/*"; //just to prevent "comment effect" in get accept
$request = "HEAD $path HTTP/1.1\r\nHost: $host$portq\r\nAccept: $all\r\nUser-Agent: $user_agent\r\n\r\n";

if (substr($url, 0, 5) == "https") {
$target = "ssl://".$host;
} else {
$target = $host;
}

$fsocket_timeout = 30;
$errno = 0;
$errstr = "";
$fp = fsockopen($target, $port, $errno, $errstr, $fsocket_timeout);
print $errstr;
$linkstate = "ok";
if (!$fp) {
$status&#91;'state'&#93; = "NOHOST";
} else {
socket_set_timeout($fp, 30);
fputs($fp, $request);
$answer = fgets($fp, 4096);
$regs = Array ();
if (preg_match("/HTTP/&#91;0-9.&#93;+ ((&#91;0-9&#93;)&#91;0-9&#93;{2})/", $answer, $regs)) {
$httpcode = $regs&#91;2&#93;;
$full_httpcode = $regs&#91;1&#93;;

if ($httpcode <> 2 && $httpcode <> 3) {
$status&#91;'state'&#93; = "Unreachable: http $full_httpcode";
$linkstate = "Unreachable";
}
}

if ($linkstate <> "Unreachable") {
while ($answer) {
$answer = fgets($fp, 4096);

if (preg_match("/Location: *(&#91;^\n\r &#93;+)/", $answer, $regs) && $httpcode == 3 && $full_httpcode != 302) {
$status&#91;'path'&#93; = $regs&#91;1&#93;;
$status&#91;'state'&#93; = "Relocation: http $full_httpcode";
fclose($fp);
return $status;
}

if (preg_match("/Last-Modified: *(&#91;a-z0-9,: &#93;+)/i", $answer, $regs)) {
$status&#91;'date'&#93; = $regs&#91;1&#93;;
}

if (preg_match("/Content-Type:/i", $answer)) {
$content = $answer;
$answer = '';
break;
}
}
$socket_status = socket_get_status($fp);
if (preg_match("/Content-Type: *(&#91;a-z\/.-&#93;*)/i", $content, $regs)) {
if ($regs&#91;1&#93; == 'text/html' || $regs&#91;1&#93; == 'text/' || $regs&#91;1&#93; == 'text/plain') {
$status&#91;'content'&#93; = 'text';
$status&#91;'state'&#93; = 'ok';
} else if ($regs&#91;1&#93; == 'application/pdf' && $index_pdf == 1) {
$status&#91;'content'&#93; = 'pdf';
$status&#91;'state'&#93; = 'ok';
} else if (($regs&#91;1&#93; == 'application/msword' || $regs&#91;1&#93; == 'application/vnd.ms-word') && $index_doc == 1) {
$status&#91;'content'&#93; = 'doc';
$status&#91;'state'&#93; = 'ok';
} else if (($regs&#91;1&#93; == 'application/excel' || $regs&#91;1&#93; == 'application/vnd.ms-excel') && $index_xls == 1) {
$status&#91;'content'&#93; = 'xls';
$status&#91;'state'&#93; = 'ok';
} else if (($regs&#91;1&#93; == 'application/mspowerpoint' || $regs&#91;1&#93; == 'application/vnd.ms-powerpoint') && $index_ppt == 1) {
$status&#91;'content'&#93; = 'ppt';
$status&#91;'state'&#93; = 'ok';
} else {
$status&#91;'state'&#93; = "Not text or html";
}

} else
if ($socket_status&#91;'timed_out'&#93; == 1) {
$status&#91;'state'&#93; = "Timed out (no reply from server)";

} else
$status&#91;'state'&#93; = "Not text or html";

}
}
fclose($fp);
return $status;
}

/*
Read robots.txt file in the server, to find any disallowed files/folders
*/
function check_robot_txt($url) {
global $user_agent;
$urlparts = parse_url($url);
$url = 'http://'.$urlparts&#91;'host'&#93;."/robots.txt";

$url_status = url_status($url);
$omit = array ();

if ($url_status&#91;'state'&#93; == "ok") {
$robot = file($url);
if (!$robot) {
$contents = getFileContents($url);
$file = $contents&#91;'file'&#93;;
$robot = explode("\n", $file);
}

$regs = Array ();
$this_agent= "";
while (list ($id, $line) = each($robot)) {
if (preg_match("/^user-agent: *(&#91;^#&#93;+) */", $line, $regs)) {
$this_agent = trim($regs&#91;1&#93;);
if ($this_agent == '*' || $this_agent == $user_agent)
$check = 1;
else
$check = 0;
}

if (preg_match("/disallow: *(&#91;^#&#93;+)/", $line, $regs) && $check == 1) {
$disallow_str = preg_replace("/&#91;\n &#93;+/i", "", $regs&#91;1&#93;);
if (trim($disallow_str) != "") {
$omit&#91;&#93; = $disallow_str;
} else {
if ($this_agent == '*' || $this_agent == $user_agent) {
return null;
}
}
}
}
}

return $omit;
}

/*
Remove the file part from an url (to build an url from an url and given relative path)
*/
function remove_file_from_url($url) {
$url_parts = parse_url($url);
$path = $url_parts&#91;'path'&#93;;

$regs = Array ();
if (preg_match('/(&#91;^\/&#93;+)$/i', $path, $regs)) {
$file = $regs&#91;1&#93;;
$check = $file.'$';
$path = preg_replace("/$check"."/i", "", $path);
}

if ($url_parts&#91;'port'&#93; == 80 || $url_parts&#91;'port'&#93; == "") {
$portq = "";
} else {
$portq = ":".$url_parts&#91;'port'&#93;;
}

$url = $url_parts&#91;'scheme'&#93;."://".$url_parts&#91;'host'&#93;.$portq.$path;
return $url;
}

/*
Extract links from html
*/
function get_links($file, $url, $can_leave_domain, $base) {

$chunklist = array ();
// The base URL comes from either the meta tag or the current URL.
if (!empty($base)) {
$url = $base;
}

$links = array ();
$regs = Array ();
$checked_urls = Array();

preg_match_all("/href\s*=\s*&#91;\'\"&#93;?(&#91;+:%\/\?~=&;\\\(\),._a-zA-Z0-9-&#93;*)(#&#91;.a-zA-Z0-9-&#93;*)?&#91;\'\" &#93;?(\s*rel\s*=\s*&#91;\'\"&#93;?(nofollow)&#91;\'\"&#93;?)?/i", $file, $regs, PREG_SET_ORDER);
foreach ($regs as $val) {
if ($checked_urls&#91;$val&#91;1&#93;&#93;!=1 && !isset ($val&#91;4&#93;)) { //if nofollow is not set
if (($a = url_purify($val&#91;1&#93;, $url, $can_leave_domain)) != '') {
$links&#91;&#93; = $a;
}
$checked_urls&#91;$val&#91;1&#93;&#93; = 1;
}
}
preg_match_all("/(frame&#91;^>&#93;*src&#91;&#91;:blank:&#93;&#93;*)=&#91;&#91;:blank:&#93;&#93;*&#91;\'\"&#93;?((&#91;&#91;a-z&#93;{3,5}:\/\/((&#91;.a-zA-Z0-9-&#93;)+(:&#91;0-9&#93;+)*))*(&#91;+:%\/?=&;\\\(\),._ a-zA-Z0-9-&#93;*))(#&#91;.a-zA-Z0-9-&#93;*)?&#91;\'\" &#93;?/i", $file, $regs, PREG_SET_ORDER);
foreach ($regs as $val) {
if ($checked_urls&#91;$val&#91;1&#93;&#93;!=1 && !isset ($val&#91;4&#93;)) { //if nofollow is not set
if (($a = url_purify($val&#91;1&#93;, $url, $can_leave_domain)) != '') {
$links&#91;&#93; = $a;
}
$checked_urls&#91;$val&#91;1&#93;&#93; = 1;
}
}
preg_match_all("/(window&#91;.&#93;location)&#91;&#91;:blank:&#93;&#93;*=&#91;&#91;:blank:&#93;&#93;*&#91;\'\"&#93;?((&#91;&#91;a-z&#93;{3,5}:\/\/((&#91;.a-zA-Z0-9-&#93;)+(:&#91;0-9&#93;+)*))*(&#91;+:%\/?=&;\\\(\),._ a-zA-Z0-9-&#93;*))(#&#91;.a-zA-Z0-9-&#93;*)?&#91;\'\" &#93;?/i", $file, $regs, PREG_SET_ORDER);
foreach ($regs as $val) {
if ($checked_urls&#91;$val&#91;1&#93;&#93;!=1 && !isset ($val&#91;4&#93;)) { //if nofollow is not set
if (($a = url_purify($val&#91;1&#93;, $url, $can_leave_domain)) != '') {
$links&#91;&#93; = $a;
}
$checked_urls&#91;$val&#91;1&#93;&#93; = 1;
}
}
preg_match_all("/(http-equiv=&#91;'\"&#93;refresh&#91;'\"&#93; *content=&#91;'\"&#93;&#91;0-9&#93;+;url)&#91;&#91;:blank:&#93;&#93;*=&#91;&#91;:blank:&#93;&#93;*&#91;\'\"&#93;?((&#91;&#91;a-z&#93;{3,5}:\/\/((&#91;.a-zA-Z0-9-&#93;)+(:&#91;0-9&#93;+)*))*(&#91;+:%\/?=&;\\\(\),._ a-zA-Z0-9-&#93;*))(#&#91;.a-zA-Z0-9-&#93;*)?&#91;\'\" &#93;?/i", $file, $regs, PREG_SET_ORDER);
foreach ($regs as $val) {
if ($checked_urls&#91;$val&#91;1&#93;&#93;!=1 && !isset ($val&#91;4&#93;)) { //if nofollow is not set
if (($a = url_purify($val&#91;1&#93;, $url, $can_leave_domain)) != '') {
$links&#91;&#93; = $a;
}
$checked_urls&#91;$val&#91;1&#93;&#93; = 1;
}
}

preg_match_all("/(window&#91;.&#93;open&#91;&#91;:blank:&#93;&#93;*&#91;(&#93;)&#91;&#91;:blank:&#93;&#93;*&#91;\'\"&#93;?((&#91;&#91;a-z&#93;{3,5}:\/\/((&#91;.a-zA-Z0-9-&#93;)+(:&#91;0-9&#93;+)*))*(&#91;+:%\/?=&;\\\(\),._ a-zA-Z0-9-&#93;*))(#&#91;.a-zA-Z0-9-&#93;*)?&#91;\'\" &#93;?/i", $file, $regs, PREG_SET_ORDER);
foreach ($regs as $val) {
if ($checked_urls&#91;$val&#91;1&#93;&#93;!=1 && !isset ($val&#91;4&#93;)) { //if nofollow is not set
if (($a = url_purify($val&#91;1&#93;, $url, $can_leave_domain)) != '') {
$links&#91;&#93; = $a;
}
$checked_urls&#91;$val&#91;1&#93;&#93; = 1;
}
}

return $links;
}

/*
Function to build a unique word array from the text of a webpage, together with the count of each word
*/
function unique_array($arr) {
global $min_word_length;
global $common;
global $word_upper_bound;
global $index_numbers, $stem_words;

if ($stem_words == 1) {
$newarr = Array();
foreach ($arr as $val) {
$newarr&#91;&#93; = stem($val);
}
$arr = $newarr;
}
sort($arr);
reset($arr);
$newarr = array ();

$i = 0;
$counter = 1;
$element = current($arr);

if ($index_numbers == 1) {
$pattern = "/&#91;a-z0-9&#93;+/";
} else {
$pattern = "/&#91;a-z&#93;+/";
}

$regs = Array ();
for ($n = 0; $n < sizeof($arr); $n ++) {
//check if word is long enough, contains alphabetic characters and is not a common word
//to eliminate/count multiple instance of words
$next_in_arr = next($arr);
if ($next_in_arr != $element) {
if (strlen($element) >= $min_word_length && preg_match($pattern, remove_accents($element)) && (http://www.dzbatna.com/images/mail.gif $common&#91;$element&#93; <> 1)) {
if (preg_match("/^(-|\\\')(.*)/", $element, $regs))
$element = $regs&#91;2&#93;;

if (preg_match("/(.*)(\\\'|-)$/", $element, $regs))
$element = $regs&#91;1&#93;;

$newarr&#91;$i&#93;&#91;1&#93; = $element;
$newarr&#91;$i&#93;&#91;2&#93; = $counter;
$element = current($arr);
$i ++;
$counter = 1;
} else {
$element = $next_in_arr;
}
} else {
if ($counter < $word_upper_bound)
$counter ++;
}

}
return $newarr;
}

/*
Checks if url is legal, relative to the main url.
*/
function url_purify($url, $parent_url, $can_leave_domain) {
global $ext, $mainurl, $apache_indexes, $strip_sessids;



$urlparts = parse_url($url);

$main_url_parts = parse_url($mainurl);
if ($urlparts&#91;'host'&#93; != "" && $urlparts&#91;'host'&#93; != $main_url_parts&#91;'host'&#93; && $can_leave_domain != 1) {
return '';
}

reset($ext);
while (list ($id, $excl) = each($ext))
if (preg_match("/\.$excl$/i", $url))
return '';

if (substr($url, -1) == '\\') {
return '';
}



if (isset($urlparts&#91;'query'&#93;)) {
if ($apache_indexes&#91;$urlparts&#91;'query'&#93;&#93;) {
return '';
}
}

if (preg_match("/&#91;\/&#93;?mailto:|&#91;\/&#93;?javascript:|&#91;\/&#93;?news:/i", $url)) {
return '';
}
if (isset($urlparts&#91;'scheme'&#93;)) {
$scheme = $urlparts&#91;'scheme'&#93;;
} else {
$scheme ="";
}



//only http and https links are followed
if (!($scheme == 'http' || $scheme == '' || $scheme == 'https')) {
return '';
}

//parent url might be used to build an url from relative path
$parent_url = remove_file_from_url($parent_url);
$parent_url_parts = parse_url($parent_url);


if (substr($url, 0, 1) == '/') {
$url = $parent_url_parts&#91;'scheme'&#93;."://".$parent_url_parts&#91;'host'&#93;.$url;
} else
if (!isset($urlparts&#91;'scheme'&#93;)) {
$url = $parent_url.$url;
}

$url_parts = parse_url($url);

$urlpath = $url_parts&#91;'path'&#93;;

$regs = Array ();

while (preg_match("/&#91;^\/&#93;*\/&#91;.&#93;{2}\//", $urlpath, $regs)) {
$urlpath = str_replace($regs&#91;0&#93;, "", $urlpath);
}

//remove relative path instructions like ../ etc
$urlpath = preg_replace("/\/+/", "/", $urlpath);
$urlpath = preg_replace("/&#91;^\/&#93;*\/&#91;.&#93;{2}/", "", $urlpath);
$urlpath = str_replace("./", "", $urlpath);
$query = "";
if (isset($url_parts&#91;'query'&#93;)) {
$query = "?".$url_parts&#91;'query'&#93;;
}
if ($main_url_parts&#91;'port'&#93; == 80 || $url_parts&#91;'port'&#93; == "") {
$portq = "";
} else {
$portq = ":".$main_url_parts&#91;'port'&#93;;
}
$url = $url_parts&#91;'scheme'&#93;."://".$url_parts&#91;'host'&#93;.$portq.$urlpath.$query;

//if we index sub-domains
if ($can_leave_domain == 1) {
return $url;
}

$mainurl = remove_file_from_url($mainurl);

if ($strip_sessids == 1) {
$url = remove_sessid($url);
}
//only urls in staying in the starting domain/directory are followed
$url = convert_url($url);
if (strstr($url, $mainurl) == false) {
return '';
} else
return $url;
}

function save_keywords($wordarray, $link_id, $domain) {
global $mysql_table_prefix, $all_keywords;
reset($wordarray);
while ($thisword = each($wordarray)) {
$word = $thisword&#91;1&#93;&#91;1&#93;;
$wordmd5 = substr(md5($word), 0, 1);
$weight = $thisword&#91;1&#93;&#91;2&#93;;
if (strlen($word)<= 30) {
$keyword_id = $all_keywords&#91;$word&#93;;
if ($keyword_id == "") {
mysql_query("insert into ".$mysql_table_prefix."keywords (keyword) values ('$word')");
if (mysql_errno() == 1062) {
$result = mysql_query("select keyword_ID from ".$mysql_table_prefix."keywords where keyword='$word'");
echo mysql_error();
$row = mysql_fetch_row($result);
$keyword_id = $row&#91;0&#93;;
} else{
$keyword_id = mysql_insert_id();
$all_keywords&#91;$word&#93; = $keyword_id;
echo mysql_error();
}
}
$inserts&#91;$wordmd5&#93; .= ",($link_id, $keyword_id, $weight, $domain)";
}
}

for ($i=0;$i<=15; $i++) {
$char = dechex($i);
$values= substr($inserts&#91;$char&#93;, 1);
if ($values!="") {
$query = "insert into ".$mysql_table_prefix."link_keyword$char (link_id, keyword_id, weight, domain) values $values";
mysql_query($query);
echo mysql_error();
}


}
}

function get_head_data($file) {
$headdata = "";

preg_match("http://www.dzbatna.com/images/mail.gif<head&#91;^>&#93;*>(.*?)<\/head>http://www.dzbatna.com/images/mail.gifsi",$file, $regs);

$headdata = $regs&#91;1&#93;;

$description = "";
$robots = "";
$keywords = "";
$base = "";
$res = Array ();
if ($headdata != "") {
preg_match("/<meta +name *=&#91;\"'&#93;?robots&#91;\"'&#93;? *content=&#91;\"'&#93;?(&#91;^<>'\"&#93;+)&#91;\"'&#93;?/i", $headdata, $res);
if (isset ($res)) {
$robots = $res&#91;1&#93;;
}

preg_match("/<meta +name *=&#91;\"'&#93;?description&#91;\"'&#93;? *content=&#91;\"'&#93;?(&#91;^<>'\"&#93;+)&#91;\"'&#93;?/i", $headdata, $res);
if (isset ($res)) {
$description = $res&#91;1&#93;;
}

preg_match("/<meta +name *=&#91;\"'&#93;?keywords&#91;\"'&#93;? *content=&#91;\"'&#93;?(&#91;^<>'\"&#93;+)&#91;\"'&#93;?/i", $headdata, $res);
if (isset ($res)) {
$keywords = $res&#91;1&#93;;
}
// e.g. <base href="http://www.consil.co.uk/index.php" />
preg_match("/<base +href *= *&#91;\"'&#93;?(&#91;^<>'\"&#93;+)&#91;\"'&#93;?/i", $headdata, $res);
if (isset ($res)) {
$base = $res&#91;1&#93;;
}
$keywords = preg_replace("/&#91;, &#93;+/", " ", $keywords);
$robots = explode(",", strtolower($robots));
$nofollow = 0;
$noindex = 0;
foreach ($robots as $x) {
if (trim($x) == "noindex") {
$noindex = 1;
}
if (trim($x) == "nofollow") {
$nofollow = 1;
}
}
$data&#91;'description'&#93; = addslashes($description);
$data&#91;'keywords'&#93; = addslashes($keywords);
$data&#91;'nofollow'&#93; = $nofollow;
$data&#91;'noindex'&#93; = $noindex;
$data&#91;'base'&#93; = $base;
}
return $data;
}

function clean_file($file, $url, $type) {
global $entities, $index_host, $index_meta_keywords;

$urlparts = parse_url($url);
$host = $urlparts&#91;'host'&#93;;
//remove filename from path
$path = preg_replace('/(&#91;^\/&#93;+)$/i', "", $urlparts&#91;'path'&#93;);
$file = preg_replace("/<link rel&#91;^<>&#93;*>/i", " ", $file);
$file = preg_replace("http://www.dzbatna.com/images/mail.gif<!--sphider_noindex-->.*?<!--\/sphider_noindex-->http://www.dzbatna.com/images/mail.gifsi", " ",$file);
$file = preg_replace("http://www.dzbatna.com/images/mail.gif<!--.*?-->http://www.dzbatna.com/images/mail.gifsi", " ",$file);
$file = preg_replace("http://www.dzbatna.com/images/mail.gif<script&#91;^>&#93;*?>.*?</script>http://www.dzbatna.com/images/mail.gifsi", " ",$file);
$headdata = get_head_data($file);
$regs = Array ();
if (preg_match("http://www.dzbatna.com/images/mail.gif<title *>(.*?)<\/title*>http://www.dzbatna.com/images/mail.gifsi", $file, $regs)) {
$title = trim($regs&#91;1&#93;);
$file = str_replace($regs&#91;0&#93;, "", $file);
} else if ($type == 'pdf' || $type == 'doc') { //the title of a non-html file is its first few words
$title = substr($file, 0, strrpos(substr($file, 0, 40), " "));
}

$file = preg_replace("http://www.dzbatna.com/images/mail.gif<style&#91;^>&#93;*>.*?<\/style>http://www.dzbatna.com/images/mail.gifsi", " ", $file);

//create spaces between tags, so that removing tags doesnt concatenate strings
$file = preg_replace("/<&#91;\w &#93;+>/", "\\0 ", $file);
$file = preg_replace("/<\/&#91;\w &#93;+>/", "\\0 ", $file);
$file = strip_tags($file);
$file = preg_replace("/&nbsp;/", " ", $file);

$fulltext = $file;
$file .= " ".$title;
if ($index_host == 1) {
$file = $file." ".$host." ".$path;
}
if ($index_meta_keywords == 1) {
$file = $file." ".$headdata&#91;'keywords'&#93;;
}


//replace codes with ascii chars
$file = preg_replace('~&#x(&#91;0-9a-f&#93;+);~ei', 'chr(hexdec("\\1"))', $file);
$file = preg_replace('~&#(&#91;0-9&#93;+);~e', 'chr("\\1")', $file);
$file = strtolower($file);
reset($entities);
while ($char = each($entities)) {
$file = preg_replace("/".$char&#91;0&#93;."/i", $char&#91;1&#93;, $file);
}
$file = preg_replace("/&&#91;a-z&#93;{1,6};/", " ", $file);
$file = preg_replace("/&#91;\*\^\+\?\\\.\&#91;\&#93;\^\$\|\{\)\(\}~!\"\/http://www.dzbatna.com/images/mail.gif#£$%&=`´;><:,&#93;+/", " ", $file);
$file = preg_replace("/\s+/", " ", $file);
$data&#91;'fulltext'&#93; = addslashes($fulltext);
$data&#91;'content'&#93; = addslashes($file);
$data&#91;'title'&#93; = addslashes($title);
$data&#91;'description'&#93; = $headdata&#91;'description'&#93;;
$data&#91;'keywords'&#93; = $headdata&#91;'keywords'&#93;;
$data&#91;'host'&#93; = $host;
$data&#91;'path'&#93; = $path;
$data&#91;'nofollow'&#93; = $headdata&#91;'nofollow'&#93;;
$data&#91;'noindex'&#93; = $headdata&#91;'noindex'&#93;;
$data&#91;'base'&#93; = $headdata&#91;'base'&#93;;

return $data;

}

function calc_weights($wordarray, $title, $host, $path, $keywords) {
global $index_host, $index_meta_keywords;
$hostarray = unique_array(explode(" ", preg_replace("/&#91;^&#91;:alnum:&#93;-&#93;+/i", " ", strtolower($host))));
$patharray = unique_array(explode(" ", preg_replace("/&#91;^&#91;:alnum:&#93;-&#93;+/i", " ", strtolower($path))));
$titlearray = unique_array(explode(" ", preg_replace("/&#91;^&#91;:alnum:&#93;-&#93;+/i", " ", strtolower($title))));
$keywordsarray = unique_array(explode(" ", preg_replace("/&#91;^&#91;:alnum:&#93;-&#93;+/i", " ", strtolower($keywords))));
$path_depth = countSubstrs($path, "/");

while (list ($wid, $word) = each($wordarray)) {
$word_in_path = 0;
$word_in_domain = 0;
$word_in_title = 0;
$meta_keyword = 0;
if ($index_host == 1) {
while (list ($id, $path) = each($patharray)) {
if ($path&#91;1&#93; == $word&#91;1&#93;) {
$word_in_path = 1;
break;
}
}
reset($patharray);

while (list ($id, $host) = each($hostarray)) {
if ($host&#91;1&#93; == $word&#91;1&#93;) {
$word_in_domain = 1;
break;
}
}
reset($hostarray);
}

if ($index_meta_keywords == 1) {
while (list ($id, $keyword) = each($keywordsarray)) {
if ($keyword&#91;1&#93; == $word&#91;1&#93;) {
$meta_keyword = 1;
break;
}
}
reset($keywordsarray);
}
while (list ($id, $tit) = each($titlearray)) {
if ($tit&#91;1&#93; == $word&#91;1&#93;) {
$word_in_title = 1;
break;
}
}
reset($titlearray);

$wordarray&#91;$wid&#93;&#91;2&#93; = (int) (calc_weight($wordarray&#91;$wid&#93;&#91;2&#93;, $word_in_title, $word_in_domain, $word_in_path, $path_depth, $meta_keyword));
}
reset($wordarray);
return $wordarray;
}

function isDuplicateMD5($md5sum) {
global $mysql_table_prefix;
$result = mysql_query("select link_id from ".$mysql_table_prefix."links where md5sum='$md5sum'");
echo mysql_error();
if (mysql_num_rows($result) > 0) {
return true;
}
return false;
}

function check_include($link, $inc, $not_inc) {
$url_inc = Array ();
$url_not_inc = Array ();
if ($inc != "") {
$url_inc = explode("\n", $inc);
}
if ($not_inc != "") {
$url_not_inc = explode("\n", $not_inc);
}
$oklinks = Array ();

$include = true;
foreach ($url_not_inc as $str) {
$str = trim($str);
if ($str != "") {
if (substr($str, 0, 1) == '*') {
if (preg_match(substr($str, 1), $link)) {
$include = false;
break;
}
} else {
if (!(strpos($link, $str) === false)) {
$include = false;
break;
}
}
}
}
if ($include && $inc != "") {
$include = false;
foreach ($url_inc as $str) {
$str = trim($str);
if ($str != "") {
if (substr($str, 0, 1) == '*') {
if (preg_match(substr($str, 1), $link)) {
$include = true;
break 2;
}
} else {
if (strpos($link, $str) !== false) {
$include = true;
break;
}
}
}



}
}
return $include;
}

function check_for_removal($url) {
global $mysql_table_prefix;
global $command_line;
$result = mysql_query("select link_id, visible from ".$mysql_table_prefix."links"." where url='$url'");
echo mysql_error();
if (mysql_num_rows($result) > 0) {
$row = mysql_fetch_row($result);
$link_id = $row&#91;0&#93;;
$visible = $row&#91;1&#93;;
if ($visible > 0) {
$visible --;
mysql_query("update ".$mysql_table_prefix."links set visible=$visible where link_id=$link_id");
echo mysql_error();
} else {
mysql_query("delete from ".$mysql_table_prefix."links where link_id=$link_id");
echo mysql_error();
for ($i=0;$i<=15; $i++) {
$char = dechex($i);
mysql_query("delete from ".$mysql_table_prefix."link_keyword$char where link_id=$link_id");
echo mysql_error();
}
printStandardReport('pageRemoved',$command_line);
}
}
}

function convert_url($url) {
$url = str_replace("&amp;", "&", $url);
$url = str_replace(" ", "%20", $url);
return $url;
}

function extract_text($contents, $source_type) {
global $tmp_dir, $pdftotext_path, $catdoc_path, $xls2csv_path, $catppt_path;

$temp_file = "tmp_file";
$filename = $tmp_dir."/".$temp_file ;
if (!$handle = fopen($filename, 'w')) {
die ("Cannot open file $filename");
}

if (fwrite($handle, $contents) === FALSE) {
die ("Cannot write to file $filename");
}

fclose($handle);
if ($source_type == 'pdf') {
$command = $pdftotext_path." $filename -";
$a = exec($command,$result, $retval);
} else if ($source_type == 'doc') {
$command = $catdoc_path." $filename";
$a = exec($command,$result, $retval);
} else if ($source_type == 'xls') {
$command = $xls2csv_path." $filename";
$a = exec($command,$result, $retval);
} else if ($source_type == 'ppt') {
$command = $catppt_path." $filename";
$a = exec($command,$result, $retval);
}

unlink ($filename);
return implode(' ', $result);

}

//function to calculate the weight of pages
function calc_weight ($words_in_page, $word_in_title, $word_in_domain, $word_in_path, $path_depth, $meta_keyword) {
global $title_weight, $domain_weight, $path_weight,$meta_weight;
$weight = ($words_in_page + $word_in_title * $title_weight +
$word_in_domain * $domain_weight +
$word_in_path * $path_weight + $meta_keyword * $meta_weight) *10 / (0.8 +0.2*$path_depth);

return $weight;
}

function remove_sessid($url) {
return preg_replace("/(\?|&)(PHPSESSID|JSESSIONID|ASPSESSIONID|sid)=&#91;0-9a-zA-Z&#93;+$/", "", $url);
}
?> </code> </code>

ملف كبير هع ..
ماقدر اشرح طريقةه كله بس بحاول
ع العموم ملف الدوال + ملف التجربة بالمرفقات + ملف الرسائل عند وجود عطل ..
يرجى ملاحظة وجود جداول .. عدلها او اتركها وانشأ جدول مثلها ..
أي استفسار موجود ..
سلام عليكم








ألعاب الأندرويد مجانا و حصريا (http://www.apotox.info/forum)


الملفات المرفقة
http://www.traidnt.net/vb/images/attach/zip.gif files.zip (http://www.traidnt.net/vb/attachments/453970d1265808597-files.zip) (12.7 كيلوبايت, عدد مرات المشاهدة 69 مرة)


https://fbcdn-sphotos-d-a.akamaihd.net/hphotos-ak-ash4/482113_236967293114455_1193518507_n.png (http://www.dzbatna.com)
©المشاركات المنشورة تعبر عن وجهة نظر صاحبها فقط، ولا تُعبّر بأي شكل من الأشكال عن وجهة نظر إدارة المنتدى (http://www.dzbatna.com)©

استعمل مربع البحث في الاسفل لمزيد من المواضيع


سريع للبحث عن مواضيع في المنتدى