#!/usr/bin/env python# coding:utf8importosimportsysimportrequestsimportrefrompyqueryimportPyQueryaspqentity={}images=[]defmain():try:get_url()load_page()save_imgs()exceptException,e:printedefget_url():args=sys.argv# 如果不是dl.py zhihu_url这种格式的话,抛出异常iflen(args)!=2:raiseException(u"Wrong number for args, please use Zhihu question url!")zhihu_url=args[1]# zhihu_url不符合问题页面url格式的话,抛出异常re_exp=re.compile(ur"^https://www\.zhihu\.com/question/(\d+)")match=re_exp.match(zhihu_url)ifnotmatch:raiseException(u"Zhihu url is invalid!")entity['url']=zhihu_urlentity['question']=match.groups()[0]printentitydefload_page():header={ur'User-Agent':ur'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36',ur'Host':ur'www.zhihu.com',ur'Accept':ur'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',ur'Accept-Language':ur'zh-CN,zh;q=0.8,en;q=0.6',ur'Accept-Encoding':ur'gzip, deflate, sdch',ur'Connection':ur'keep-alive',ur'Cache-Control':ur'max-age=0'}resp=requests.get(entity['url'],headers=header)ifresp.status_code!=200:raiseException(u"Http error!")d=pq(resp.content)title=d('title').text()entity['title']=title.split(u" ")[0]imgs=d("img.origin_image.zh-lightbox-thumb.lazy")foreleinimgs:images.append(pq(ele).attr("data-original"))defsave_imgs():dest_dir=os.path.dirname(os.path.abspath(__file__))+"/images/"+entity['question']+ \
entity['title']printdest_dirifnotos.path.exists(dest_dir):os.makedirs(dest_dir)forimginimages:res=requests.get(img)filename=os.path.basename(img)fp=open(dest_dir+"/"+filename,"wb")fp.write(res.content)fp.close()printimg+" done."if__name__=="__main__":main()
/** * Created by caiknife on 16/9/9. */$(function(){$('<div id="showImg"></div>').prependTo($("body"));$('img.origin_image.zh-lightbox-thumb.lazy').each(function(){$("#showImg").append($(this).data('original')+"<br/>");});});