python爬取网络图片保存到本地
python 爬取网络图片保存到本地
这里把爬取的网站地址处理掉了。直接展示不好。
经常找壁纸的童鞋,应该会知道我爬的是哪个网站(找不到狗头) from typing import List, Any, Union # import requests import random import string import urllib.request from bs4 import BeautifulSoup import os,stat from PIL import Image from io import BytesIO from util.accessWebContent import accessWebContent class _4kpicSpider: # 下载 def download(self): pass # 进入网站 - 爬动漫图片 def linkWebSit(self, page): result: List[Union[str, Any]] = [] _base_url = "....." file_path="D:/book/img" if not os.path.exists(file_path): #创建路径 os.makedirs(file_path) url = "" if (page is None) or (page == 1): # 首页 url = "....." else: url = "....."+ str(page) +".html" content = accessWebContent().accessContent(url) soup = BeautifulSoup(content, "html.parser") pics = soup.find("ul", class_="clearfix").find_all("img") num = pics.__len__() if num > 0: for index in range(0, num): next_url = _base_url + pics[index].attrs["src"] result.append(next_url) # 下载图片 # pic = requests.get(next_url, timeout=10) ran_str = "".join(random.sample(string.ascii_letters + string.digits, 10)) filename = "x" + ran_str + ".jpg" print(filename) # urllib.request.urlretrieve(next_url,filename=filename) with urllib.request.urlopen(next_url, timeout=30) as response, open("D:/book/img/"+filename , "wb") as f_save: f_save.write(response.read()) f_save.flush() f_save.close() """ 下面是访问链接后在进行爬取,想爬取相对高清一点的但是失败了,request无法爬取使用js渲染的,因此想要搞的需要使用 selenium linkList = soup.find("ul", class_="clearfix").find_all("a") num = linkList.__len__() if num > 0: for index in range(0, num): next_url = _base_url + linkList[index].attrs["href"] result.append(next_url) next_content = accessWebContent().accessContent(next_url) next_html = BeautifulSoup(content, "html.parser") imgEle = next_html.find("#img") print(imgEle) """ # print(result) return result if __name__ == "__main__": spider = _4kpicSpider() for i in range(1,147): res = spider.linkWebSit(i) import requests import logging class accessWebContent: # 无需请求头 def accessContent(self, url): req = requests.get(url) if req.encoding == "ISO-8859-1": encodings = requests.utils.get_encodings_from_content(req.text) if encodings: encoding = encodings[0] else: encoding = req.apparent_encoding # encode_content = req.content.decode(encoding, "replace").encode("utf-8", "replace") global encode_content #如果设置为replace,则会用?取代非法字符; encode_content = req.content.decode(encoding, "replace") # 默认日志级别为 warning logging.debug(encode_content) return encode_content
爬取效果展示: