一个网页抓取的类支持get+post+cookie存储

以前做个贴吧发帖机,然后设定的的是发一贴需登录一次,最后账号被禁用了。最后就改了下程序,登录一次保存cookie信息,下次再发帖就不用登陆了。

[php]
<?php
class httpconnector {
private $curl;
private $cookie;
private $kv;
function __construct(){
$this->kv = new SaeKV();
$this->kv->init();
if($data=$this->kv->get("cookie"))
$this->cookie=$data;

}
public function get($url) {
$this->curl = curl_init();
curl_setopt($this->curl, CURLOPT_URL, $url);
curl_setopt($this->curl, CURLOPT_HEADER, 1);
curl_setopt($this->curl, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)");
curl_setopt($this->curl, CURLOPT_COOKIE, $this->cookie);
curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, 1);
$data = curl_exec($this->curl);
curl_close($this->curl);
preg_match_all("/Set-Cookie:(.*?);/", $data, $match, PREG_SET_ORDER);
foreach ($match as $r) {
if ($this->cookie != ”) {
$this->cookie = $this->cookie . ‘;’;
}
if (isset($r[1])) {
$this->cookie .= trim(str_replace("rn", "", $r[1]));
}
}
$this->kv->set("cookie",$this->cookie);
return $data;

}
public function post($url, $params) {
$this->curl = curl_init();
curl_setopt($this->curl, CURLOPT_URL, $url);
curl_setopt($this->curl, CURLOPT_HEADER, 1);
curl_setopt($this->curl, CURLOPT_COOKIE, $this->cookie);
curl_setopt($this->curl, CURLOPT_POST, 1);
curl_setopt($this->curl, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)");
curl_setopt($this->curl, CURLOPT_POSTFIELDS, $params);
curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, 1);
$data = curl_exec($this->curl);
curl_close($this->curl);
preg_match_all("/Set-Cookie:(.*?);/", $data, $match, PREG_SET_ORDER);
foreach ($match as $r) {
if ($this->cookie != ”) {
$this->cookie = $this->cookie . ‘;’;
}
if (isset($r[1])) {
$this->cookie .= trim(str_replace("rn", "", $r[1]));
}
}
$this->kv->set("cookie",$this->cookie);
return $data;

}
}
?>
[/php]

Leave a Reply

Time limit is exhausted. Please reload CAPTCHA.

此站点使用Akismet来减少垃圾评论。了解我们如何处理您的评论数据