url=self.url
f.write(result+ 'n')
fromlxml importetree
results=target.xpath( '//div[@class="read-content j_readContent"]/p/text')
fromlxml importetree
def__init__(self):
defmian(self):
#剖析链接地址
forresult inresults:
3.获取图片的链接地址。
url=self.url
f.write(result+ 'n')
fromlxml importetree
results=target.xpath( '//div[@class="read-content j_readContent"]/p/text')
fromlxml importetree
def__init__(self):
defmian(self):
#剖析链接地址
forresult inresults:
3.获取图片的链接地址。importrequests
2.发送请求,获取网页。defget_html(self,url):
print(name)
links=target.xpath( '//ul[@class="cf"]/li/a/@href')
defparse_html(self,html):
fori inrange( 1, 100):
names=target.xpath( '//span[@class="content-wrap"]/text')
html=response.content.decode( 'utf-8')
forlink,name inzip(links,names):
self.headers = {
html=response.content.decode( 'utf-8')
links=target.xpath( '//ul[@class="cf"]/li/a/@href')
self.headers = {
res=requests.get(host,headers=self.headers)
defmain(self):
print(name)
c=res.content.decode( 'utf-8')
target=etree.HTML(c)
fromlxml importetree
forname innames:
打开网址:
fromfake_useragent importUserAgent
forlink inlinks:
classphoto_spider(object):
5.生存为txt文件到当地。
withopen( 'F:/pycharm文件/document/'+ name + '.txt', 'a') asf:
fromfake_useragent importUserAgent
names=target.xpath( '//span[@class="content-wrap"]/text')
fori inrange( 1, 100):
'User-Agent': ua.random
forlink,name inzip(links,names):
self.headers = {
html=response.content.decode( 'utf-8')
self.headers = {
res=requests.get(host,headers=self.headers)
defmain(self):
print(name)
html=self.get_html(url)
forlink inlinks:
classphoto_spider(object):
5.生存为txt文件到当地。withopen( 'F:/pycharm文件/document/。
本文来源:开云全站APP官网-www.lywtzz.cn