网站前端、后端开发书籍推荐

网站前端、后端开发书籍推荐

前端

《精通CSS+DIV网页样式与布局》(前沿科技)

《CSS设计彻底研究》(作者:温谦)

《jquery基础教程》

《javascript高级程序设计》

后端(PHP方向)

 

《PHP与MYSQL WEB开发》

《PHP深度分析-101个核心技巧、窍门、和问题的解决方法》

学习MVC 框架 如framework YII 国内的thinkphp框架

剩下的就靠自己练习、体会了

SQL语句

[sql]
#c 取消命令
#-A –skip-auto-rehash 自动补全
#mysql –print-defaults
#my_print_defaults client
#mysql learn-database < learn.sql 执行命令
#source learn.sql 同上
#mysql test 使用test数据库
#mysql -e "select * from city limit 0,10" test 直接执行一条命令
#mysqldump test |mysql test1 备份数据
#mysql test < select.sql > q.txt 重定向
#mysql -t test < select.sql > q.txt 重定向(格式输出)
#mysql -H -e "select * from city limit 0,10" test > 1.html 重定向生成html
#mysql -X -e "select * from city limit 0,10" test > 1.xml 重定向生成XML
#mysql -ss -e "select * from city limit 0,10" test 忽略头部 –skip-column-names
#mysql >show full columns from city G; 垂直输出表的内容
#mysql –tee=tmp.out test 记录回话信息 T开启 t关闭
#select @max :=max(cityid) from city; 变量 max 使用的时候用@max 大小写不敏感 且为Mysql特有
#set @max=1; 设置max值为1
# set @ln=0;select @ln:=@ln+1 as ln ,cityid from city limit 0,20; 结果中输出行号
#show status like "uptime"; 已经运行时间
#status
#mysql test <<MYSQL_INPUT
/*输入的文档或数据 */
#MYSQL_INPUT

[/sql]

网页抓取中避免多次登录

网页抓取中要尽量避免多次登录,理由是:

1.多次登录浪费资源

2.登录次数过多会导致账号禁用或被封

所以我们要把cookie信息存起来,下次使用只有登录才能使用的功能时,直接装入cookie信息就行了。

但是有可能带来一个问题,那就是有的网站一次登录操作的次数有限制,那么你写个脚本隔特定时间清除cookie重新登录就可以了,详细例子见上一篇文章,贴吧发帖机、盖楼机

百度贴吧发帖机、盖楼机程序(PHP版)

不说废话,翠花,上代码

 
[php]
<?php
class httpconnector {
/**Curl类
*
*/
private $curl;
/**cookie字符串
*/
private $cookie;
private $read_cookie=false;

/**get方式下载网页内容
*@param $url
*@return web conntent
*/
function __construct(){
$handle = fopen(‘./cookie.txt’, ‘r’);
$content=fgets($handle, 1024);
if($content!=""){
$this->cookie=$content;
$this->read_cookie=true;
}

fclose($handle);;

}
public function get($url) {

$this->curl = curl_init();

curl_setopt($this->curl, CURLOPT_URL, $url);

// 设置header
curl_setopt($this->curl, CURLOPT_HEADER, 1);
curl_setopt($this->curl, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)");
curl_setopt($this->curl, CURLOPT_COOKIE, $this->cookie);//设置cookie 以分号分隔

// 设置cURL 参数,要求结果保存到字符串中还是输出到屏幕上。
curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, 1);

// 运行cURL,请求网页
$data = curl_exec($this->curl);
// 关闭URL请求
curl_close($this->curl);
// 找到cookie 放入cookiestring
preg_match_all("/Set-Cookie:(.*?);/", $data, $match, PREG_SET_ORDER);
foreach ($match as $r) {
if ($this->cookie != ”) {
$this->cookie = $this->cookie . ‘;’;
}
if (isset($r[1])) {
$this->cookie .= trim(str_replace("rn", "", $r[1]));
}
}

$handle = fopen(‘./cookie.txt’, ‘w+’);
fwrite($handle,$this->cookie);

fclose($handle);;

return $data;

}

/**POST方式下载网页内容
*@param $url
*@param $params post的信息串
*@return web conntent
*/
public function post($url, $params) {

$this->curl = curl_init();

curl_setopt($this->curl, CURLOPT_URL, $url);

// 设置header
curl_setopt($this->curl, CURLOPT_HEADER, 1);
curl_setopt($this->curl, CURLOPT_COOKIE, $this->cookie);
curl_setopt($this->curl, CURLOPT_POST, 1);
curl_setopt($this->curl, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)");
curl_setopt($this->curl, CURLOPT_POSTFIELDS, $params);

// 设置cURL 参数,要求结果保存到字符串中还是输出到屏幕上。
curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, 1);

// 运行cURL,请求网页
$data = curl_exec($this->curl);

// 关闭URL请求
curl_close($this->curl);
// 找到cookie 放入cookiestring
preg_match_all("/Set-Cookie:(.*?);/", $data, $match, PREG_SET_ORDER);

foreach ($match as $r) {
if ($this->cookie != ”) {
$this->cookie = $this->cookie . ‘;’;
}
if (isset($r[1])) {
$this->cookie .= trim(str_replace("rn", "", $r[1]));
}
}

$handle = fopen(‘./cookie.txt’, ‘w+’);
fwrite($handle,$this->cookie);

fclose($handle);;

return $data;

}
}
class tieba{
/*抓取编码GBK,抓取贴吧名也为GBK
*回复编码utf-8,登录也用utf-8
*/
private $http;

function __construct(){
$this->http=new httpconnector;

}
function login($username,$pwd){
$data= $this->http->post(‘http://wappass.baidu.com/passport/’ , "login_username=".$username."&login_loginpass=".$pwd."&login=yes&aa=登录&can_input=0");

if(strpos($data,"密码错误")>0){
return 0;
}else if(strpos($data,"验证码")>0) {
return 1;
}else if(strpos($data,"账户不存在")>0){
return 2;
}else {
return 3;

}

}
function send($tiebaname,$title,$content){
$url="http://wapp.baidu.com/f/?kw=".$tiebaname;
$data=$this->http->get($url);
$data=explode(‘<div class="d h">’,$data);
$data=explode(‘</form>’,$data[1]);
$pos=strpos("action ",$data[0]);
$action=substr($data[0],$pos+15);
$action=explode(‘"’,$action);
$action=$action[0];
$data=explode("<input",$data[0]);
$num=count($data);
$postdata="";
for($i=3;$i<$num;$i++){
$pos1=strpos($data[$i],"name");
$name=substr($data[$i],$pos1+6);
$name=explode(‘"’,$name);
$name=$name[0];
$pos1=strpos($data[$i],"value");
$value=substr($data[$i],$pos1+7);
$value=explode(‘"’,$value);
$value=$value[0];
$postdata.=$name."=".$value."&";
if($name==’sub1′)
break;
}
$postdata.="ti=".$title."&co= ".$content;
$posturl="http://wapp.baidu.com/".$action;
$data=$this->http->post($posturl,$postdata);
$data=$this->http->get($url);
/**注意置顶帖带来的误差
*/
$tid=explode(‘<div class="i">’,$data);
$num=count($tid);
for($i=1;$i<$num;$i++){
if(strpos($tid[$i],'<span class="light">’)>0){

}else{

$tid=explode("m?kz=",$tid[$i]);
$tid=explode("&",$tid[1]);
$tid=$tid[0];
break;

}

}

return $tid;

}
function logout(){
$url="http://wapp.baidu.com";
$data=$this->http->get($url);
$data=explode("退出</a>",$data);
$data=explode(‘href="’,$data[0]);
$count=count($data);
$data=explode(‘"’,$data[$count-1]);
$logouturl=$data[0];
$ret=$this->http->get($logouturl);
return $ret;

}
function replay($tid,$content){
$url="http://wapp.baidu.com/f/?kz=".$tid;

$data=$this->http->get($url);
echo $data;
$data=explode(‘<div class="d h">’,$data);
$data=explode(‘</form>’,$data[1]);
$pos=strpos("action ",$data[0]);
$action=substr($data[0],$pos+15);
$action=explode(‘"’,$action);
$action=$action[0];
$data=explode("<input",$data[0]);
$num=count($data);
$postdata="";
for($i=3;$i<$num;$i++){
$pos1=strpos($data[$i],"name");
$name=substr($data[$i],$pos1+6);
$name=explode(‘"’,$name);
$name=$name[0];
$pos1=strpos($data[$i],"value");
$value=substr($data[$i],$pos1+7);
$value=explode(‘"’,$value);
$value=$value[0];
$postdata.=$name."=".$value."&";
if($name==’sub1′)
break;
}
$postdata.="co=".$content;
$url="http://wapp.baidu.com/".$action;
echo $url;
$data=$this->http->post($url,$postdata);
echo $data;

}
/*
*抓取的贴吧名和返回的结果都是GBK
*/
function crul($tiebaname){
$tiebaname= iconv("UTF-8","GBK", $tiebaname);
$url="http://tieba.baidu.com/f?kw=".$tiebaname;
$content=$this->http->get($url);
$out=explode("</tr>",$content);
$num=count($out)-7;
$result=array();
for($i=1;$i<$num+1;$i++){
$tli=explode("</td>",$out[$i]);
$pos1 = (int)strpos($tli[0], ‘<td nowrap>’);
$clickcount=substr($tli[0],$pos1+11);
$replaynum=$this->getNum($tli[1]);
$author=trim(strip_tags($tli[3]));
$replay=$this->getChinese($tli[4]);
$href=$tli[2];
$pos1 = (int)strpos($href, ‘"_blank"’);
$pos2 = (int)strpos($href, ‘</a>’);
$title=substr($href,$pos1+9,$pos2-$pos1-5);
$title=strip_tags($title);
$pos1 = (int)strpos($href, ‘/p’);
$tid=substr($href,$pos1+3,10);
if($replay=="")
$replay=$author;
$result[]=array("clickcount"=>$clickcount ,"replaynum"=>$replaynum,"title"=>$title,"author"=>$author,"tid"=>$tid,"replay"=>$replay);

}
return $result;

}
function getChinese($string) {
$tmpstr = ”;
$arr = array(1,2,3,4,5,6,7,8,9,0);
$strlen = strlen($string);
for($i=0; $i<$strlen; $i++) {
$str=substr($string, $i, 1);
$str1=trim($str);
if( ord($str)>0xA0 ){
$tmpstr.= substr($string, $i, 2);
$i = $i+1;
}

}
return $tmpstr;
}
function getChineseNum($string) {
$tmpstr = ”;
$arr = array(1,2,3,4,5,6,7,8,9,0);
$strlen = strlen($string);
for($i=0; $i<$strlen; $i++) {
$str=substr($string, $i, 1);
$str1=trim($str);
if( ord($str)>0xA0 ){
$tmpstr.= substr($string, $i, 2);
$i = $i+1;
}
if(is_numeric($str1)){
$tmpstr.= $str1;
}

}
return $tmpstr;
}

function getNum($string){
$tmpstr = ”;
$arr = array(1,2,3,4,5,6,7,8,9,0);
$strlen = strlen($string);
for($i=0; $i<$strlen; $i++) {
$str=substr($string, $i, 1);
$str1=trim($str);
if(is_numeric($str1)){
$tmpstr.= $str1;
}
}
return $tmpstr;
}

}

?>
[/php]

一个网页抓取的类支持get+post+cookie存储

以前做个贴吧发帖机,然后设定的的是发一贴需登录一次,最后账号被禁用了。最后就改了下程序,登录一次保存cookie信息,下次再发帖就不用登陆了。

[php]
<?php
class httpconnector {
private $curl;
private $cookie;
private $kv;
function __construct(){
$this->kv = new SaeKV();
$this->kv->init();
if($data=$this->kv->get("cookie"))
$this->cookie=$data;

}
public function get($url) {
$this->curl = curl_init();
curl_setopt($this->curl, CURLOPT_URL, $url);
curl_setopt($this->curl, CURLOPT_HEADER, 1);
curl_setopt($this->curl, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)");
curl_setopt($this->curl, CURLOPT_COOKIE, $this->cookie);
curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, 1);
$data = curl_exec($this->curl);
curl_close($this->curl);
preg_match_all("/Set-Cookie:(.*?);/", $data, $match, PREG_SET_ORDER);
foreach ($match as $r) {
if ($this->cookie != ”) {
$this->cookie = $this->cookie . ‘;’;
}
if (isset($r[1])) {
$this->cookie .= trim(str_replace("rn", "", $r[1]));
}
}
$this->kv->set("cookie",$this->cookie);
return $data;

}
public function post($url, $params) {
$this->curl = curl_init();
curl_setopt($this->curl, CURLOPT_URL, $url);
curl_setopt($this->curl, CURLOPT_HEADER, 1);
curl_setopt($this->curl, CURLOPT_COOKIE, $this->cookie);
curl_setopt($this->curl, CURLOPT_POST, 1);
curl_setopt($this->curl, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)");
curl_setopt($this->curl, CURLOPT_POSTFIELDS, $params);
curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, 1);
$data = curl_exec($this->curl);
curl_close($this->curl);
preg_match_all("/Set-Cookie:(.*?);/", $data, $match, PREG_SET_ORDER);
foreach ($match as $r) {
if ($this->cookie != ”) {
$this->cookie = $this->cookie . ‘;’;
}
if (isset($r[1])) {
$this->cookie .= trim(str_replace("rn", "", $r[1]));
}
}
$this->kv->set("cookie",$this->cookie);
return $data;

}
}
?>
[/php]