100) {
break;
}
$hkey = "";
$hvalue = "";
$v = 0;
for ($i = 0; $i < strlen($line); $i++) {
if ($v == 1) {
$hvalue .= $line[$i];
}
if ($line[$i] == ":") {
$v = 1;
}
if ($v == 0) {
$hkey .= $line[$i];
}
}
$hkey = trim($hkey);
if ($hkey != "") {
$m_httphead[strtolower($hkey)] = trim($hvalue);
}
}
//分析返回记录
if (preg_match("/^3/", $m_httphead["http-state"])) {
if (isset($m_httphead["location"]) && $JumpCount < 3) {
$JumpCount++;
DownImageKeep($gurl, $rfurl, $filename, $gcookie, $JumpCount);
} else {
return FALSE;
}
}
if (!preg_match("/^2/", $m_httphead["http-state"])) {
return FALSE;
}
if (!isset($m_httphead)) {
return FALSE;
}
$contentLength = $m_httphead['content-length'];
//保存文件
$fp = fopen($filename, "w") or die("写入文件:{$filename} 失败");
$i = 0;
$okdata = "";
$starttime = time();
while (!feof($m_fp)) {
$okdata .= fgetc($m_fp);
$i++;
//超时结束
if (time() - $starttime > $maxtime) {
break;
}
//到达指定大小结束
if ($i >= $contentLength) {
break;
}
}
if ($okdata != "") {
fwrite($fp, $okdata);
}
fclose($fp);
if ($okdata == "") {
@unlink($filename);
fclose($m_fp);
return FALSE;
}
fclose($m_fp);
return TRUE;
}
/**
* 获得某页面返回的Cookie信息
*
* @access public
* @param string $gurl 调整地址
* @return string
*/
function RefurlCookie($gurl)
{
global $gcookie, $lastRfurl;
$gurl = trim($gurl);
if (!empty($gcookie) && $lastRfurl == $gurl) {
return $gcookie;
} else {
$lastRfurl = $gurl;
}
if (trim($gurl) == '') {
return '';
}
$urlinfos = GetHostInfo($gurl);
$ghost = $urlinfos['host'];
$gquery = $urlinfos['query'];
$sessionQuery = "GET $gquery HTTP/1.1\r\n";
$sessionQuery .= "Host: $ghost\r\n";
$sessionQuery .= "Accept: */*\r\n";
$sessionQuery .= "User-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)\r\n";
$sessionQuery .= "Connection: Close\r\n\r\n";
$errno = "";
$errstr = "";
$m_fp = fsockopen($ghost, 80, $errno, $errstr, 10) or die($ghost.'
');
fwrite($m_fp, $sessionQuery);
$lnum = 0;
//获取详细应答头
$gcookie = "";
while (!feof($m_fp)) {
$line = trim(fgets($m_fp, 256));
if ($line == "" || $lnum > 100) {
break;
} else {
if (preg_match("/^cookie/i", $line)) {
$gcookie = $line;
break;
}
}
}
fclose($m_fp);
return $gcookie;
}
/**
* 获得网址的host和query部份
*
* @access public
* @param string $gurl 调整地址
* @return string
*/
function GetHostInfo($gurl)
{
$gurl = preg_replace("/^http:\/\//i", "", trim($gurl));
$garr['host'] = preg_replace("/\/(.*)$/i", "", $gurl);
$garr['query'] = "/".preg_replace("/^([^\/]*)\//i", "", $gurl);
return $garr;
}
/**
* HTML里的图片转DEDE格式
*
* @access public
* @param string $body 文章内容
* @return string
*/
function TurnImageTag(&$body)
{
global $cfg_album_width, $cfg_ddimg_width;
if (empty($cfg_album_width)) {
$cfg_album_width = 800;
}
if (empty($cfg_ddimg_width)) {
$cfg_ddimg_width = 150;
}
$patten = "/<\\s*img\\s.*?src\\s*=\\s*([\"\\'])?(?(1)(.*?)\\1|([^\\s\\>\"\\']+))/isx";
preg_match_all($patten, $body, $images);
$returnArray1 = $images[2];
$returnArray2 = $images[3];
foreach ($returnArray1 as $key => $value) {
if ($value) {
$ttx .= "{dede:img ddimg='$litpicname' text='图 ".($key + 1)."'}".$value."{/dede:img}"."\r\n";
} else {
$ttx .= "{dede:img ddimg='$litpicname' text='图 ".($key + 1)."'}".$returnArray2[$key]."{/dede:img}"."\r\n";
}
}
$ttx = "\r\n{dede:pagestyle maxwidth='{$cfg_album_width}' ddmaxwidth='{$cfg_ddimg_width}' row='3' col='3' value='2'/}\r\n{dede:comments}图集类型会采集时生成此配置是正常的,不过如果后面没有跟着img标记则表示规则无效{/dede:comments}\r\n".$ttx;
return $ttx;
}
/**
* HTML里的网址格式转换
*
* @access public
* @param string $body 文章内容
* @return string
*/
function TurnLinkTag(&$body)
{
$ttx = '';
$handid = '服务器';
preg_match_all("/]+?)>(.+?)<\/a>/is", $body, $match);
if (is_array($match[1]) && count($match[1]) > 0) {
for ($i = 0; isset($match[1][$i]); $i++) {
$servername = (isset($match[3][$i]) ? str_replace("'", "`", $match[3][$i]) : $handid.($i + 1));
if (preg_match("/[<>]/", $servername) || strlen($servername) > 40) {
$servername = $handid.($i + 1);
}
$ttx .= "{dede:link text='$servername'} {$match[1][$i]} {/dede:link}\r\n";
}
}
return $ttx;
}
/**
* 替换XML的CDATA
*
* @access public
* @param string $str 字符串
* @return string
*/
function RpCdata($str)
{
$str = str_replace('', '', $str);
return $str;
}
/**
* 分析RSS里的链接
*
* @access public
* @param string $rssurl rss地址
* @return string
*/
function GetRssLinks($rssurl)
{
global $cfg_soft_lang;
$dhd = new DedeHttpDown();
$dhd->OpenUrl($rssurl);
$rsshtml = $dhd->GetHtml();
//分析编码
preg_match("/encoding=[\"']([^\"']*)[\"']/is", $rsshtml, $infos);
if (isset($infos[1])) {
$pcode = strtolower(trim($infos[1]));
} else {
$pcode = strtolower($cfg_soft_lang);
}
if ($cfg_soft_lang == 'gb2312') {
if ($pcode == 'utf-8') {
$rsshtml = utf82gb($rsshtml);
} else if ($pcode == 'big5') {
$rsshtml = big52gb($rsshtml);
}
} else if ($cfg_soft_lang == 'utf-8') {
if ($pcode == 'gbk' || $pcode == 'gb2312') {
$rsshtml = gb2utf8($rsshtml);
} else if ($pcode == 'big5') {
$rsshtml = gb2utf8(big52gb($rsshtml));
}
}
$rsarr = array();
preg_match_all("/- (.*)<\/title>/isU", $rsshtml, $titles);
preg_match_all("/
- (.*)<\/link>/isU", $rsshtml, $links);
preg_match_all("/
- (.*)<\/description>/isU", $rsshtml, $descriptions);
if (!isset($links[2])) {
return '';
}
foreach ($links[2] as $k => $v) {
$rsarr[$k]['link'] = RpCdata($v);
if (isset($titles[2][$k])) {
$rsarr[$k]['title'] = RpCdata($titles[2][$k]);
} else {
$rsarr[$k]['title'] = preg_replace("/^(.*)\//i", "", RpCdata($titles[2][$k]));
}
if (isset($descriptions[2][$k])) {
$rsarr[$k]['image'] = GetddImgFromRss($descriptions[2][$k], $rssurl);
} else {
$rsarr[$k]['image'] = '';
}
}
return $rsarr;
}
/**
* 从RSS摘要获取图片信息
*
* @access public
* @param string $descriptions 描述
* @param string $refurl 来源地址
* @return string
*/
function GetddImgFromRss($descriptions, $refurl)
{
if ($descriptions == '') {
return '';
}
preg_match_all("/
\r\n\t]{1,}/isU", $descriptions, $imgs);
if (isset($imgs[2][0])) {
$imgs[2][0] = preg_replace("/[\"']/", '', $imgs[2][0]);
$imgs[2][0] = preg_replace("/\/{1,}/", '/', $imgs[2][0]);
return FillUrl($refurl, $imgs[2][0]);
} else {
return '';
}
}
/**
* 补全网址
*
* @access public
* @param string $refurl 来源地址
* @param string $surl 站点地址
* @return string
*/
function FillUrl($refurl, $surl)
{
$i = $pathStep = 0;
$dstr = $pstr = $okurl = '';
$refurl = trim($refurl);
$surl = trim($surl);
$urls = @parse_url($refurl);
$basehost = ((!isset($urls['port']) || $urls['port'] == '80') ? $urls['host'] : $urls['host'].':'.$urls['port']);
//$basepath = $basehost.(!isset($urls['path']) ? '' : '/'.$urls['path']);
//由于直接获得的path在处理 http://xxxx/nnn/aaa?fdsafd 这种情况时会有错误,因此用其它方式处理
$basepath = $basehost;
$paths = explode('/', preg_replace("/^http:\/\//i", "", $refurl));
$n = count($paths);
for ($i = 1; $i < ($n - 1); $i++) {
if (!preg_match("/[\?]/", $paths[$i])) $basepath .= '/'.$paths[$i];
}
if (!preg_match("/[\?\.]/", $paths[$n - 1])) {
$basepath .= '/'.$paths[$n - 1];
}
if ($surl == '') {
return $basepath;
}
$pos = strpos($surl, "#");
if ($pos > 0) {
$surl = substr($surl, 0, $pos);
}
//用 '/' 表示网站根的网址
if ($surl[0] == '/') {
$okurl = $basehost.$surl;
} else if ($surl[0] == '.') {
if (strlen($surl) <= 2) {
return '';
} else if ($surl[1] == '/') {
$okurl = $basepath.preg_replace('/^./', '', $surl);
} else {
$okurl = $basepath.'/'.$surl;
}
} else {
if (strlen($surl) < 7) {
$okurl = $basepath.'/'.$surl;
} else if (preg_match("/^http:\/\//i", $surl)) {
$okurl = $surl;
} else {
$okurl = $basepath.'/'.$surl;
}
}
$okurl = preg_replace("/^http:\/\//i", '', $okurl);
$okurl = 'http://'.preg_replace("/\/{1,}/", '/', $okurl);
return $okurl;
}
/**
* 从匹配规则中获取列表网址
*
* @access public
* @param string $regxurl 正则地址
* @param string $handurl 操作地址
* @param string $startid 开始ID
* @param string $endid 结束ID
* @param string $addv 增值
* @param string $usemore 使用更多
* @param string $batchrule 列表规则
* @return string
*/
function GetUrlFromListRule($regxurl = '', $handurl = '', $startid = 0, $endid = 0, $addv = 1, $usemore = 0, $batchrule = '')
{
global $dsql, $islisten;
$lists = array();
$n = 0;
$islisten = (empty($islisten) ? 0 : $islisten);
if ($handurl != '') {
$handurls = explode("\n", $handurl);
foreach ($handurls as $handurl) {
$handurl = trim($handurl);
if (preg_match("/^http:\/\//i", $handurl)) {
$lists[$n][0] = $handurl;
$lists[$n][1] = 0;
$n++;
if ($islisten == 1) {
break;
}
}
}
}
if ($regxurl != '') {
//没指定(#)和(*)
if (!preg_match("/\(\*\)/i", $regxurl) && !preg_match("/\(#\)/", $regxurl)) {
$lists[$n][0] = $regxurl;
$lists[$n][1] = 0;
$n++;
} else {
if ($addv <= 0) {
$addv = 1;
}
//没指定多栏目匹配规则
if ($usemore == 0) {
while ($startid <= $endid) {
$lists[$n][0] = str_replace("(*)", sprintf('%0'.strlen($startid).'d', $startid), $regxurl);
$lists[$n][1] = 0;
$startid = sprintf('%0'.strlen($startid).'d', $startid + $addv);
$n++;
if ($n > 2000 || $islisten == 1) {
break;
}
}
}
//匹配多个栏目
//规则表达式 [(#)=>(#)匹配的网址; (*)=>(*)的范围,如:1-20; typeid=>栏目id; addurl=>附加的网址(用|分开多个)]
else {
$nrules = explode(']', trim($batchrule));
foreach ($nrules as $nrule) {
$nrule = trim($nrule);
$nrule = preg_replace("/^\[|\]$/", '', $nrule);
$nrules = explode(';', $nrule);
if (count($nrules) < 3) {
continue;
}
$brtag = '';
$startid = 0;
$endid = 0;
$typeid = 0;
$addurls = array();
foreach ($nrules as $nrule) {
$nrule = trim($nrule);
list($k, $v) = explode('=>', $nrule);
if (trim($k) == '(#)') {
$brtag = trim($v);
} else if (trim($k) == 'typeid') {
$typeid = trim($v);
} else if (trim($k) == 'addurl') {
$addurl = trim($v);
$addurls = explode('|', $addurl);
} else if (trim($k) == '(*)') {
$v = preg_replace("/[ \r\n\t]/", '', trim($v));
list($startid, $endid) = explode('-', $v);
}
}
//如果栏目用栏目名称
if (preg_match('/[^0-9]/', $typeid)) {
$arr = $dsql->GetOne("SELECT id FROM `#@__arctype` WHERE typename LIKE '$typeid' ");
if (is_array($arr)) {
$typeid = $arr['id'];
} else {
$typeid = 0;
}
}
//附加网址优先
$mjj = 0;
if (isset($addurls[0])) {
foreach ($addurls as $addurl) {
$addurl = trim($addurl);
if ($addurl == '') {
continue;
}
$lists[$n][0] = $addurl;
$lists[$n][1] = $typeid;
$n++;
$mjj++;
if ($islisten == 1) {
break;
}
}
}
//如果为非监听模式或监听模式没手工指定的附加网址
if ($islisten != 1 || $mjj == 0) {
//匹配规则里的网址,注:(#)的网址是是允许使用(*)的
while ($startid <= $endid) {
$lists[$n][0] = str_replace("(#)", $brtag, $regxurl);
$lists[$n][0] = str_replace("(*)", sprintf('%0'.strlen($startid).'d', $startid), $lists[$n][0]);
$lists[$n][1] = $typeid;
$startid = sprintf('%0'.strlen($startid).'d', $startid + $addv);
$n++;
if ($islisten == 1) {
break;
}
if ($n > 20000) {
break;
}
}
}
}
} //End 匹配多栏目
} //End使用规则匹配的情况
}
return $lists;
}//End