繁体中文
设为首页
加入收藏
当前位置:网络编程首页 >> php >> 基于PHP网页快照类

基于PHP网页快照类

2007-12-29 09:46:20  作者:  来源:互联网  浏览次数:67  文字大小:【】【】【
简介:<?PHP//====================================================// FileName: snap.class.php// Summary: 网页快照类// Author: mi ...
关键字:网页

<?PHP
//====================================================
//                FileName:        snap.class.php
//                Summary:        网页快照类
//                Author:                millken(迷路林肯)
//                LastModifed:2007-06-29
//                copyright (c)2007 [email]millken@gmail.com[/email]
//====================================================
class snap{
        var $dir;
        var $log;
        var $contents;
        var $filename;
        var $host;
        var $name;
        var $data_ts;
        var $ttl;
        var $url;
        var $ts;
        function snap(){
                $this->log = "New snap() object instantiated.<br />\n";       
                $this->dir = dirname(__FILE__)."/";
        }
        function fetch($url="",$ttl=10){
        $this->log .= "--------------------------------<br />fetch() called<br />\n";
        $this->log .= "url: ".$url."<br />\n";
                $hosts = parse_url($url);
                $this->host = $hosts['scheme'].'://'.$hosts['host'].'/';
        if (!$url) {
            $this->log .= "OOPS: You need to pass a URL!<br />";
            return false;
        }
                $this->ttl = $ttl;
                $this->url = $url;
                $this->name = md5($this->url);
                $this->filename = $this->dir.$this->name;
        $this->log .= "Filename: ".$this->filename."<br />";
                $this->getFile_ts();
                $this->file_get_content();

        }
        function file_get_content(){
                ob_start();
                $this->ts = time() - $this->data_ts;
                if($this->data_ts <>0 && $this->ts <= $this->ttl){
                        $this->log .= "cache has expired<br />";
                        @readfile($this->filename);       
                        $this->contents = ob_get_contents();
                        ob_end_clean();
                }else{
                        $this->log .= "cache hasn't expired<br />";               
                        @readfile($this->url);       
                        $this->contents = ob_get_contents();
                        ob_end_clean();
                        $this->saveToCache();
                }
                return true;
        }
        function saveToCache(){
        $this->log .= "saveToCache() called<br />";
        //create file pointer
        if (!$fp=@fopen($this->filename,"w")) {
            $this->log .= "Could not open ".$this->filename."<br />";
            return false;
        }
                $this->contents = $this->formaturl($this->contents,$this->host);
                $this->contents = preg_replace("'<script[^>]*?>.*?</script>'si","",$this->contents);
        //write to file
        if (!@fwrite($fp,$this->contents)) {
            $this->log .= "Could not write to ".$this->filename."<br />";
            fclose($fp);
            return false;
        }
        //close file pointer
        fclose($fp);
        return true;
        }
        function getFile_ts(){
        $this->log .= "getFile_ts() called<br />";
        if (!file_exists($this->filename)) {
            $this->data_ts = 0;
            $this->log .= $this->filename." does not exist<br />";
            return false;
        }
        $this->data_ts = filemtime($this->filename);
        return true;
        }
        function formaturl($l1,$l2){
        if (preg_match_all("/(<img[^>]+src=\"([^\"]+)\"[^>]*>)|(<link[^>]+href=\"([^\"]+)\"[^>]*>)|(<a[^>]+href=\"([^\"]+)\"[^>]*>)|(<img[^>]+src='([^']+)'[^>]*>)|(<a[^>]+href='([^']+)'[^>]*>)/i",$l1,$regs)){
      foreach($regs[0] as $num => $url){
       $l1 = str_replace($url,$this->lIIIIl($url,$l2),$l1);
      }
        }
        return         $l1;
        }

        function lIIIIl($l1,$l2){
        if(preg_match("/(.*)(href|src)\=(.+?)( |\/\>|\>).*/i",$l1,$regs)){$I2 = $regs[3];}
        if(strlen($I2)>0){
      $I1 = str_replace(chr(34),"",$I2);
      $I1 = str_replace(chr(39),"",$I1);
        }else{return $l1;}
        $url_parsed = parse_url($l2);
        $scheme      = $url_parsed["scheme"];if($scheme!=""){$scheme = $scheme."://";}
        $host      = $url_parsed["host"];       
        $l3       = $scheme.$host;
        if(strlen($l3)==0){return $l1;}
        $path      = dirname($url_parsed["path"]);if($path[0]=="\\"){$path="";}
        $pos      = strpos($I1,"#");
        if($pos>0) $I1 = substr($I1,0,$pos);
        //判断类型
        if(preg_match("/^(http|https|ftp):(\/\/|\\\\)(([\w\/\\\+\-~`@:%])+\.)+([\w\/\\\.\=\?\+\-~`@\':!%#]|(&)|&)+/i",$I1)){return $l1; }//http开头的url类型要跳过
        elseif($I1[0]=="/"){$I1 = $l3.$I1;}//绝对路径
        elseif(substr($I1,0,3)=="../"){//相对路径
              while(substr($I1,0,3)=="../"){
       $I1 = substr($I1,strlen($I1)-(strlen($I1)-3),strlen($I1)-3);
       if(strlen($path)>0){
        $path = dirname($path);
       }
      }
      $I1 = $l3.$path."/".$I1;
        }
        elseif(substr($I1,0,2)=="./"){
      $I1 = $l3.$path.substr($I1,strlen($I1)-(strlen($I1)-1),strlen($I1)-1);
        }
        elseif(strtolower(substr($I1,0,7))=="mailto:"||strtolower(substr($I1,0,11))=="javascript:"){
      return $l1;
        }else{
      $I1 = $l3.$path."/".$I1;
        }
        return str_replace($I2,"\"$I1\"",$l1);
        }
}
?>

test.php

<?php
require_once(dirname(__FILE__).'/snap.class.php');
$h = new snap();
$h->fetch($_GET['url']);
//echo $h->log;
echo $h->contents;
?>

责任编辑:探索者


相关文章
 

最新文章

更多

· php教程:mysql的常用语句
· PHP教程:cookie和数组的...
· PHP创建windows服务并自...
· PHP技巧教程:setcookie...
· 可以在读者复制网页内容...
· 高亮显示php代码
· PHP加速器 eaccelerator...
· ffmpeg+mencoder环境搭建...
· ffmpeg的编译大全
· Ffmpeg使用语法

推荐文章

更多

· php教程:mysql的常用语句
· PHP教程:cookie和数组的...
· PHP创建windows服务并自...
· PHP技巧教程:setcookie...
· 可以在读者复制网页内容...
· 高亮显示php代码
· PHP加速器 eaccelerator...
· ffmpeg+mencoder环境搭建...
· ffmpeg的编译大全
· Ffmpeg使用语法

热点文章

更多