威望0
积分7956
贡献0
在线时间763 小时
UID1
注册时间2021-4-14
最后登录2024-11-22
管理员
- UID
- 1
- 威望
- 0
- 积分
- 7956
- 贡献
- 0
- 注册时间
- 2021-4-14
- 最后登录
- 2024-11-22
- 在线时间
- 763 小时
|
# -*- codeing = utf-8 -*-
# @Time : 2021/11/13 20:21
# @File :JD.py
# @Software : PyCharm
import requests
import re
import os
def main():
keyword = input("请输入要爬取的内容:")
page = input("请输入你要爬取页数:")
makedir("D:JD/"+keyword)
getImg(keyword, page)
def getImg(keyword, page):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36'}
rule = r'<img width="220" height="220" data-img="1" data-lazy-img="(.*?)jpg" />'
if page.isnumeric:
page = int(page)
num = 0
for i in range(0, page):
url = "https://search.jd.com/search?keyword=" + keyword + "&qrst=1&wq=" + keyword + "&stock=1&stock=1&cid2=12221&page=" + str(
2 * i + 1) + "&click=0"
req = requests.get(url=url, headers=headers)
html = req.text
imgUrlList = re.findall(rule, html, re.S | re.M)
num += 1
imgNum = 0
print(imgUrlList)
for j in imgUrlList:
imgUrl = "https:" + j + "jpg"
print(imgUrl)
imgReq = requests.get(url=imgUrl, headers=headers)
imgName = "D:JD/" + keyword + "/" + str(num) + "_" + str(imgNum) + ".jpg"
with open(imgName, "wb") as f:
f.write(imgReq.content)
f.close()
num += 1
def makedir(path):
if os.path.exists(path):
pass
else:
os.mkdir(path)
if __name__ == '__main__':
main()
|
|