网站前端、后端开发书籍推荐

网站前端、后端开发书籍推荐

前端

《精通CSS+DIV网页样式与布局》(前沿科技)

《CSS设计彻底研究》(作者:温谦)

《jquery基础教程》

《javascript高级程序设计》

后端(PHP方向)

 

《PHP与MYSQL WEB开发》

《PHP深度分析-101个核心技巧、窍门、和问题的解决方法》

学习MVC 框架 如framework YII 国内的thinkphp框架

剩下的就靠自己练习、体会了

SQL语句

[sql]
#c 取消命令
#-A –skip-auto-rehash 自动补全
#mysql –print-defaults
#my_print_defaults client
#mysql learn-database < learn.sql 执行命令
#source learn.sql 同上
#mysql test 使用test数据库
#mysql -e "select * from city limit 0,10" test 直接执行一条命令
#mysqldump test |mysql test1 备份数据
#mysql test < select.sql > q.txt 重定向
#mysql -t test < select.sql > q.txt 重定向(格式输出)
#mysql -H -e "select * from city limit 0,10" test > 1.html 重定向生成html
#mysql -X -e "select * from city limit 0,10" test > 1.xml 重定向生成XML
#mysql -ss -e "select * from city limit 0,10" test 忽略头部 –skip-column-names
#mysql >show full columns from city G; 垂直输出表的内容
#mysql –tee=tmp.out test 记录回话信息 T开启 t关闭
#select @max :=max(cityid) from city; 变量 max 使用的时候用@max 大小写不敏感 且为Mysql特有
#set @max=1; 设置max值为1
# set @ln=0;select @ln:=@ln+1 as ln ,cityid from city limit 0,20; 结果中输出行号
#show status like "uptime"; 已经运行时间
#status
#mysql test <<MYSQL_INPUT
/*输入的文档或数据 */
#MYSQL_INPUT

[/sql]

网页抓取中避免多次登录

网页抓取中要尽量避免多次登录,理由是:

1.多次登录浪费资源

2.登录次数过多会导致账号禁用或被封

所以我们要把cookie信息存起来,下次使用只有登录才能使用的功能时,直接装入cookie信息就行了。

但是有可能带来一个问题,那就是有的网站一次登录操作的次数有限制,那么你写个脚本隔特定时间清除cookie重新登录就可以了,详细例子见上一篇文章,贴吧发帖机、盖楼机

百度贴吧发帖机、盖楼机程序(PHP版)

不说废话,翠花,上代码

 
[php]
<?php
class httpconnector {
/**Curl类
*
*/
private curl; /**cookie字符串 */ private cookie;
private read_cookie=false; /**get方式下载网页内容 *@param url
*@return web conntent
*/
function __construct(){
handle = fopen(‘./cookie.txt’, ‘r’); content=fgets(handle, 1024); if(content!=""){
this->cookie=content;
this->read_cookie=true; } fclose(handle);;

}
public function get(url) { this->curl = curl_init();

curl_setopt(this->curl, CURLOPT_URL, url);

// 设置header
curl_setopt(this->curl, CURLOPT_HEADER, 1); curl_setopt(this->curl, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)");
curl_setopt(this->curl, CURLOPT_COOKIE, this->cookie);//设置cookie 以分号分隔

// 设置cURL 参数,要求结果保存到字符串中还是输出到屏幕上。
curl_setopt(this->curl, CURLOPT_RETURNTRANSFER, 1); // 运行cURL,请求网页 data = curl_exec(this->curl); // 关闭URL请求 curl_close(this->curl);
// 找到cookie 放入cookiestring
preg_match_all("/Set-Cookie:(.*?);/", data, match, PREG_SET_ORDER);
foreach (match as r) {
if (this->cookie != ”) { this->cookie = this->cookie . ‘;’; } if (isset(r[1])) {
this->cookie .= trim(str_replace(“rn”, “”, r[1]));
}
}

handle = fopen(‘./cookie.txt’, ‘w+’); fwrite(handle,this->cookie); fclose(handle);;

return data; } /**POST方式下载网页内容 *@param url
*@param params post的信息串 *@return web conntent */ public function post(url, params) { this->curl = curl_init();

curl_setopt(this->curl, CURLOPT_URL, url);

// 设置header
curl_setopt(this->curl, CURLOPT_HEADER, 1); curl_setopt(this->curl, CURLOPT_COOKIE, this->cookie); curl_setopt(this->curl, CURLOPT_POST, 1);
curl_setopt(this->curl, CURLOPT_USERAGENT, “Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)”); curl_setopt(this->curl, CURLOPT_POSTFIELDS, params); // 设置cURL 参数,要求结果保存到字符串中还是输出到屏幕上。 curl_setopt(this->curl, CURLOPT_RETURNTRANSFER, 1);

// 运行cURL,请求网页
data = curl_exec(this->curl);

// 关闭URL请求
curl_close(this->curl); // 找到cookie 放入cookiestring preg_match_all(“/Set-Cookie:(.*?);/”, data, match, PREG_SET_ORDER); foreach (match as r) { if (this->cookie != ”) {
this->cookie = this->cookie . ‘;’;
}
if (isset(r[1])) { this->cookie .= trim(str_replace("rn", "", r[1])); } } handle = fopen(‘./cookie.txt’, ‘w+’);
fwrite(handle,this->cookie);

fclose(handle);; return data;

}
}
class tieba{
/*抓取编码GBK,抓取贴吧名也为GBK
*回复编码utf-8,登录也用utf-8
*/
private http; function __construct(){ this->http=new httpconnector;

}
function login(username,pwd){
data= this->http->post(‘http://wappass.baidu.com/passport/’ , "login_username=".username.”&login_loginpass=”.pwd."&login=yes&aa=登录&can_input=0");

if(strpos(data,”密码错误”)>0){ return 0; }else if(strpos(data,"验证码")>0) {
return 1;
}else if(strpos(data,”账户不存在”)>0){ return 2; }else { return 3; } } function send(tiebaname,title,content){
url=”http://wapp.baidu.com/f/?kw=”.tiebaname;
data=this->http->get(url); data=explode(‘<div class="d h">’,data); data=explode(‘</form>’,data[1]); pos=strpos("action ",data[0]); action=substr(data[0],pos+15);
action=explode(‘”‘,action);
action=action[0];
data=explode(“data[0]);
num=count(data);
postdata=””; for(i=3;i<num;i++){ pos1=strpos(data[i],"name");
name=substr(data[i],pos1+6);
name=explode(‘”‘,name);
name=name[0];
pos1=strpos(data[i],”value”); value=substr(data[i],pos1+7); value=explode(‘"’,value); value=value[0]; postdata.=name.”=”.value."&";
if(name==’sub1′) break; } postdata.="ti=".title.”&co= “.content;
posturl=”http://wapp.baidu.com/”.action;
data=this->http->post(posturl,postdata);
data=this->http->get(url); /**注意置顶帖带来的误差 */ tid=explode(‘<div class="i">’,data); num=count(tid); for(i=1;i<num;i++){ if(strpos(tid[i],’‘)>0){ }else{ tid=explode("m?kz=",tid[i]);
tid=explode(“&”,tid[1]);
tid=tid[0];
break;

}

}

return tid; } function logout(){ url="http://wapp.baidu.com";
data=this->http->get(url); data=explode("退出</a>",data); data=explode(‘href="’,data[0]); count=count(data); data=explode(‘"’,data[count-1]);
logouturl=data[0];
ret=this->http->get(logouturl); return ret;

}
function replay(tid,content){
url=”http://wapp.baidu.com/f/?kz=”.tid;

data=this->http->get(url); echo data;
data=explode(‘

‘,data);
data=explode(‘

‘,data[1]);
pos=strpos(“action “,data[0]);
action=substr(data[0],pos+15); action=explode(‘"’,action); action=action[0]; data=explode("<input",data[0]); num=count(data); postdata="";
for(i=3;i<num;i++){
pos1=strpos(data[i],”name”); name=substr(data[i],pos1+6); name=explode(‘"’,name); name=name[0]; pos1=strpos(data[i],"value");
value=substr(data[i],pos1+7);
value=explode(‘”‘,value);
value=value[0];
postdata.=name."=".value.”&”; if(name==’sub1′)
break;
}
postdata.=”co=”.content;
url=”http://wapp.baidu.com/”.action;
echo url; data=this->http->post(url,postdata); echo data;

}
/*
*抓取的贴吧名和返回的结果都是GBK
*/
function crul(tiebaname){ tiebaname= iconv("UTF-8","GBK", tiebaname); url="http://tieba.baidu.com/f?kw=".tiebaname; content=this->http->get(url);
out=explode(“

“,content);
num=count(out)-7;
result=array(); for(i=1;i<num+1;i++){ tli=explode("</td>",out[i]);
pos1 = (int)strpos(tli[0], ‘<td nowrap>’);
clickcount=substr(tli[0],pos1+11); replaynum=this->getNum(tli[1]);
author=trim(strip_tags(tli[3]));
replay=this->getChinese(tli[4]); href=tli[2]; pos1 = (int)strpos(href, ‘”_blank”‘); pos2 = (int)strpos(href, ‘‘); title=substr(href,pos1+9,pos2-pos1-5);
title=strip_tags(title);
pos1 = (int)strpos(href, ‘/p’);
tid=substr(href,pos1+3,10); if(replay=="")
replay=author;
result[]=array(“clickcount”=>clickcount ,"replaynum"=>replaynum,”title”=>title,"author"=>author,”tid”=>tid,"replay"=>replay); } return result;

}
function getChinese(string) { tmpstr = ”;
arr = array(1,2,3,4,5,6,7,8,9,0); strlen = strlen(string); for(i=0; i<strlen; i++) { str=substr(string, i, 1);
str1=trim(str);
if( ord(str)>0xA0 ){ tmpstr.= substr(string, i, 2);
i = i+1;
}

}
return tmpstr; } function getChineseNum(string) {
tmpstr = ”; arr = array(1,2,3,4,5,6,7,8,9,0);
strlen = strlen(string);
for(i=0; i<strlen; i++) {
str=substr(string, i, 1); str1=trim(str); if( ord(str)>0xA0 ){
tmpstr.= substr(string, i, 2); i = i+1; } if(is_numeric(str1)){
tmpstr.= str1;
}

}
return tmpstr; } function getNum(string){
tmpstr = ”; arr = array(1,2,3,4,5,6,7,8,9,0);
strlen = strlen(string);
for(i=0; i<strlen; i++) {
str=substr(string, i, 1); str1=trim(str); if(is_numeric(str1)){
tmpstr.= str1;
}
}
return $tmpstr;
}

}

?>
[/php]

一个网页抓取的类支持get+post+cookie存储

以前做个贴吧发帖机,然后设定的的是发一贴需登录一次,最后账号被禁用了。最后就改了下程序,登录一次保存cookie信息,下次再发帖就不用登陆了。

[php]
<?php
class httpconnector {
private curl; private cookie;
private kv; function __construct(){ this->kv = new SaeKV();
this->kv->init(); if(data=this->kv->get(“cookie”)) this->cookie=data; } public function get(url) {
this->curl = curl_init(); curl_setopt(this->curl, CURLOPT_URL, url); curl_setopt(this->curl, CURLOPT_HEADER, 1);
curl_setopt(this->curl, CURLOPT_USERAGENT, “Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)”); curl_setopt(this->curl, CURLOPT_COOKIE, this->cookie); curl_setopt(this->curl, CURLOPT_RETURNTRANSFER, 1);
data = curl_exec(this->curl);
curl_close(this->curl); preg_match_all(“/Set-Cookie:(.*?);/”, data, match, PREG_SET_ORDER); foreach (match as r) { if (this->cookie != ”) {
this->cookie = this->cookie . ‘;’;
}
if (isset(r[1])) { this->cookie .= trim(str_replace("rn", "", r[1])); } } this->kv->set("cookie",this->cookie); return data;

}
public function post(url, params) {
this->curl = curl_init(); curl_setopt(this->curl, CURLOPT_URL, url); curl_setopt(this->curl, CURLOPT_HEADER, 1);
curl_setopt(this->curl, CURLOPT_COOKIE, this->cookie);
curl_setopt(this->curl, CURLOPT_POST, 1); curl_setopt(this->curl, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)");
curl_setopt(this->curl, CURLOPT_POSTFIELDS, params);
curl_setopt(this->curl, CURLOPT_RETURNTRANSFER, 1); data = curl_exec(this->curl); curl_close(this->curl);
preg_match_all("/Set-Cookie:(.*?);/", data, match, PREG_SET_ORDER);
foreach (match as r) {
if (this->cookie != ”) { this->cookie = this->cookie . ‘;’; } if (isset(r[1])) {
this->cookie .= trim(str_replace(“rn”, “”, r[1]));
}
}
this->kv->set(“cookie”,this->cookie);
return $data;

}
}
?>
[/php]