Files
TwoNav/system/get_page_info.php
2023-07-20 14:03:00 +08:00

108 lines
4.8 KiB
PHP
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
function get_page_info($output, $friend_link = '', $curl_info=array()) {
$page_info = array();
$page_info['site_title'] = ''; //标题
$page_info['site_description'] = ''; //描述
$page_info['site_keywords'] = ''; //关键字
$page_info['friend_link_status'] = 0; //友情链接检测
$page_info['site_home_size'] = 0; //字符串长度
if(empty($output)) return $page_info;
// 获取网页编码把非utf-8网页编码转成utf-8防止网页出现乱码
$meta_content_type = '';
if(isset($curl_info['content_type']) && strstr($curl_info['content_type'], "charset=") != "") {
$meta_content_type = explode("charset=", $curl_info['content_type'])[1];
}
if($meta_content_type == '') {
preg_match('/<META\s+http-equiv="Content-Type"\s+content="([\w\W]*?)"/si', $output, $matches); // 中文编码,如 http://www.qq.com
if (empty($matches[1])) {
preg_match('/<META\s+content="([\w\W]*?)"\s+http-equiv="Content-Type"/si', $output, $matches);
}
if (empty($matches[1])) {
preg_match('/<META\s+charset="([\w\W]*?)"/si', $output, $matches); // 特殊字符编码,如 http://www.500.com
}
if (!empty($matches[1]) && strstr($matches[1], "charset=") != "") {
$meta_content_type = explode("charset=", $matches[1])[1];
}
}
if(!in_array(strtolower($meta_content_type), array('','utf-8','utf8'))) {
$output = mb_convert_encoding($output, "utf-8", $meta_content_type); // gbk, gb2312
}
// 若网页仍然有乱码有乱码则gbk转utf-8
if(json_encode( $output ) == '' || json_encode( $output ) == null) {
$output = mb_convert_encoding($output, "utf-8", 'gbk');
}
$page_info['site_home_size'] = strlen($output);
// 标题
preg_match('/<TITLE>([\w\W]*?)<\/TITLE>/si', $output, $matches);
if (!empty($matches[1])) {
$page_info['site_title'] = $matches[1];
}
// 正则匹配获取全部的meta元数据
preg_match_all('/<META(.*?)>/si', $output, $matches);
$meta_str_array = $matches[0];
$meta_array = array();
$meta_array['description'] = '';
$meta_array['keywords'] = '';
foreach($meta_str_array as $meta_str) {
preg_match('/<META\s+name="([\w\W]*?)"\s+content="([\w\W]*?)"/si', $meta_str, $res);
if(!empty($res)) $meta_array[strtolower($res[1])] = $res[2];
preg_match('/<META\s+content="([\w\W]*?)"\s+name="([\w\W]*?)"/si', $meta_str, $res);
if(!empty($res)) $meta_array[strtolower($res[2])] = $res[1];
preg_match('/<META\s+http-equiv="([\w\W]*?)"\s+content="([\w\W]*?)"/si', $meta_str, $res);
if(!empty($res)) $meta_array[strtolower($res[1])] = $res[2];
preg_match('/<META\s+content="([\w\W]*?)"\s+http-equiv="([\w\W]*?)"/si', $meta_str, $res);
if(!empty($res)) $meta_array[strtolower($res[2])] = $res[1];
preg_match('/<META\s+scheme="([\w\W]*?)"\s+content="([\w\W]*?)"/si', $meta_str, $res);
if(!empty($res)) $meta_array[strtolower($res[1])] = $res[2];
preg_match('/<META\s+content="([\w\W]*?)"\s+scheme="([\w\W]*?)"/si', $meta_str, $res);
if(!empty($res)) $meta_array[strtolower($res[2])] = $res[1];
// 20230716 新增匹配语法
preg_match('/<META\s+content=[\'"](.*?)[\'"]\s+itemprop=[\'"](.*?)[\'"]\s+name=[\'"](.*?)[\'"]>/si', $meta_str, $res);
if(!empty($res)) $meta_array[strtolower($res[3])] = $res[1];
preg_match('/<meta\s+itemprop=[\'"](.*?)[\'"]\s+name=[\'"](.*?)[\'"]\s+content=[\'"](.*?)[\'"]>/si', $meta_str, $res);
if(!empty($res)) $meta_array[strtolower($res[2])] = $res[3];
}
//如果正则匹配失败则使用php函数尝试再次匹配
if(empty($meta_array['keywords']) || empty($meta_array['description'])){
//将html保存为临时文件
$key = md5(uniqid().Get_Rand_Str(8));
$tempFile = DIR ."/data/temp/".md5(uniqid().Get_Rand_Str(8)).".html";
file_put_contents($tempFile, $output);
$tags = get_meta_tags($tempFile);
unlink($tempFile); //删除临时文件
if(empty($meta_array['keywords']) && !empty($tags['keywords'])){
$meta_array['keywords'] = $tags['keywords'];
}
if(empty($meta_array['description']) && !empty($tags['description'])){
$meta_array['description'] = $tags['description'];
}
}
$page_info['site_keywords'] = $meta_array['keywords'];
$page_info['site_description'] = $meta_array['description'];
//$page_info['meta_array'] = $meta_array; //暂时不需要全部meta
# 判断是否存在友链
if(!empty($friend_link) && strstr($output, $friend_link) != "") {
$page_info['friend_link_status'] = 1;
}
return $page_info;
}