威望0
积分7946
贡献0
在线时间763 小时
UID1
注册时间2021-4-14
最后登录2024-11-21
管理员
- UID
- 1
- 威望
- 0
- 积分
- 7946
- 贡献
- 0
- 注册时间
- 2021-4-14
- 最后登录
- 2024-11-21
- 在线时间
- 763 小时
|
[mw_shl_code=applescript,true]# -*- coding: utf-8 -*-
import requests
import random
import time
import os
import json
from lxml import etree
import threading
from pynput.keyboard import Key, Listener
def getpage(url, refer='https://www.mzitu.com/', host='www.mzitu.com'):
if len(url.strip())==0: return 0
AgentList = [
"Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:82.0) Gecko/20100101 Firefox/82.0",
"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
]
rdAgent = random.choice(AgentList)
oHeader = {'Host': host,
'Referer': refer,
'User-Agent': rdAgent,
'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Connection': 'keep-alive',
}
page = requests.get(url, headers=oHeader)
if page.status_code != 200:
print('the page status is :', page.status_code )
return 0
return page
def getPicCoverlist(HomePage):
HomePage.encoding = 'utf-8'
html = etree.HTML(HomePage.text)
coverlist = html.xpath('//div[@class="postlist"]/ul[@id="pins"]/li/a/@href')
covertitle = html.xpath('//div[@class="postlist"]/ul[@id="pins"]/li/a/img/@alt')
return list(zip(coverlist, covertitle))
def getPageCount(headpageurl):
page = getpage(headpageurl)
page.encoding = 'utf=8'
html = etree.HTML(page.text)
picurllist = html.xpath('//div[@class="pagenavi"]/a/@href')
HeadNum = int(picurllist[-1].split('/')[-1])
EndNum = int(picurllist[-2].split('/')[-1])
urlrange = [headpageurl, HeadNum, EndNum]
return urlrange
def MakepageUrllist(pageurlNum):
pageurllist = []
pageurllist.append(pageurlNum[0])
for i in range(pageurlNum[1], pageurlNum[2] + 1):
pageurllist.append(pageurlNum[0] + '/' + str(i))
return pageurllist
def getPicdown(piclasspageurl, savepath):
pos = piclasspageurl.rfind('/')
last = int(piclasspageurl[pos + 1:])
if last > 1:
lasturl = piclasspageurl[:pos+1] + str(last - 1)
else:
lasturl = piclasspageurl
page = getpage(piclasspageurl, lasturl)
page.encoding = 'utf-8'
html = etree.HTML(page.text)
imgurl = html.xpath('//div[@class="main-image"]/p/a/img/@src')
imgname = savepath + '\\' + imgurl[0].split('/')[-1]
if not os.path.exists(imgname):
imghtml = getpage(imgurl[0], piclasspageurl, 'imgpc.iimzt.com')
if imghtml != 0:
try:
with open(imgname, 'wb') as f:
f.write(imghtml.content)
f.close
except:
print('写入错误:' + imgurl[0])
pass
time.sleep(random.randint(3, 8))
return last
else:
print('下载图片错误', piclasspageurl, imgurl[0])
return 0
else:
print('\n已下载:', imgurl)
return 0
def checkdownfolder(savefolder, covername):
tfolder = savefolder + '\\' + covername
if not os.path.exists(tfolder):
os.makedirs(tfolder)
os.chdir(tfolder)
return tfolder
def getPicClasslist(homepage):
homepage.encoding = 'utf-8'
html = etree.HTML(homepage.text)
classURLlist = html.xpath('//div[@class="mainnav"]/ul[@id="menu-nav"]/li/a/@href')
classtitlelist = html.xpath('//div[@class="mainnav"]/ul[@id="menu-nav"]/li/a/@title')
return list(zip(classtitlelist, classURLlist))
def choosePicClass():
homeURL = 'https://www.mzitu.com'
pclass = getPicClasslist(getpage(homeURL))
print('选择类别'.center(45,'*'))
for k in range(1, len(pclass) - 3):
#print('序号 ' +str(k)+ ' -- ' + pclass[k][0] + ' > '+pclass[k][1] )
print('序号' + str(k) + ' >> ' + pclass[k][1])
print(' your choice? '.center(45,'*'))
relist = []
urlnumber=input('请选择数字:')
if urlnumber.isdigit() and int(urlnumber) >0 and int(urlnumber)<7 :
relist=pclass[int(urlnumber)]
return relist
def getClasspagelist(classhomeurl):
page = getpage(classhomeurl)
page.encoding = 'utf=8'
html = etree.HTML(page.text)
tmppagelist = html.xpath('//div[@class="nav-links"]/a[@class="page-numbers"]/@href')
knumber=tmppagelist[-1].rstrip('/').rfind('/')
lastpage=tmppagelist[-1].rstrip('/')[knumber+1:]
classpagelist=[classhomeurl]
for p in range(2,int(lastpage)+1):
classpagelist.append(classhomeurl+'page/'+str(p)+'/')
return classpagelist
def getclassCode(ClassUrl):
pos=ClassUrl.rstrip('/').rfind('/')
return ClassUrl.rstrip('/')[pos + 1:]
#def delspicalchar(ostring):
#ochar=r'[\/:*?"<>|]*'
#return re.sub(ochar,'',ostring)
def formatname(img_title):
for i in ['\\','/',':','*','?','"','<','>','!','|']:
while i in img_title:
img_title = img_title.strip().replace(i, '')
return img_title
def MainDownThread():
picClass = choosePicClass()
picClasstitle = picClass[0]
picClassURL = picClass[1]
picClassCode = getclassCode(picClassURL)
progressDict = {'classname': picClassCode, 'classpage': 0, 'covernumber': 0, 'picpage': 0}
progress = []
#从log文件载入下载进度
if os.path.exists(logpath):
with open(logpath, 'r') as f:
progress = json.load(f)
f.close
for pDict in progress:
if pDict['classname'] == picClassCode:
progressDict = pDict
picClassURL = mainurl + progressDict['classname'] + '/'
picClassCode = progressDict['classname']
picClasstitle = progressDict['classname']
break
classpagebegin = progressDict['classpage']
coverbegin = progressDict['covernumber']
picbegin = progressDict['picpage']
classpagelist=getClasspagelist(picClassURL)
classsavefolder=downfolder + '\\' + picClassCode
print('开始下载'.center(50,'+'))
try:
for cp in range(classpagebegin,len(classpagelist)):
if ExitKey==True : break
print('\n 正在下载 : '+ picClasstitle + ' 第 ' + str(cp+1) + ' 页 ' )
progressDict['classpage']=cp
classpageurl=classpagelist[cp]
coverlist = getPicCoverlist(getpage(classpageurl))
if cp > classpagebegin : coverbegin=0
for i in range(coverbegin,len(coverlist)):
if ExitKey==True : break
cover=coverlist
coverUrl = cover[0]
covertitle = formatname(cover[1])
print('\n\n Cover ' + str(i+1) + ' : [' + covertitle + ']')
progressDict['covernumber']=i
coverfolder = checkdownfolder(classsavefolder, covertitle)
Pagelist = MakepageUrllist(getPageCount(coverUrl))
print(' 套图共计 : ' + str(len(Pagelist)) + ' 张\n')
if cp > classpagebegin or i > coverbegin:
picbegin = 0
for j in range(picbegin,len(Pagelist)):
if ExitKey==True : break
try:
purl=Pagelist[j]
getre = getPicdown(purl, coverfolder)
if getre > 0:
print('\r' + '---->完成下载第 ' + str(j + 1) + ' 张', end='', flush=True)
progressDict['picpage'] = j
except Exception as e:
progressDict['picpage'] = j
SaveProgress(progressDict, progress)
print('\n第' + str(j + 1) + '张, 下载错误。', purl, e)
j = j - 1
continue
except Exception as e:
SaveProgress(progressDict, progress)
print('\n 出现错误,无法继续' , e)
else:
SaveProgress(progressDict,progress)
print('\n下载进程退出~~~~')
def SaveProgress(pdict, progress):
for pd in progress:
if pd['classname'] == pdict['classname']:
progress.remove(pd)
progress.append(pdict)
jsonstr = json.dumps(progress)
with open(logpath, 'w') as f:
f.write(jsonstr)
f.close
def presskey(key):
global ExitKey, TimeStamp
if key == Key.ctrl_l:
TimeStamp = time.time()
if key == Key.f3:
if (time.time() - TimeStamp) < 0.5:
ExitKey = True
time.sleep(1)
return False
def onrelease(key):
if key== Key.esc:
return False
def startlisten():
#with Listener(on_press=presskey, on_release=onrelease) as listener:
with Listener(on_press=presskey) as listener:
listener.join()
if __name__ == '__main__':
ExitKey = False
TimeStamp = 0
mainurl = 'https://www.mzitu.com/'
downfolder = r'F:\Backup\privatebak\mypic'
logpath = downfolder + '\\' + 'downinfo.log'
t1 = threading.Thread(target=MainDownThread)
t1.setDaemon(True)
t1.start()
startlisten()
t1.join()
os.system('pause')
[/mw_shl_code] |
|