python之爬虫篇
1,POST请求# -*- coding: UTF-8 -*- import requests formdata = { "type": "AUTO", "i": "i love python", "doctype": "json", "xmlVersion": "1.8", "keyfrom": "fanyi.web", "ue": "UTF-8", "action": "FY_BY_ENTER", "typoResult": "true" } url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&smartresult=ugc&sessionFrom=null" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"} response = requests.post(url, data=formdata, headers=headers) print response.text
2,下载图片# -*- coding: UTF-8 -*- import requests import re import random import string def write_file(file, content): with open(file, "wb") as f: f.write(content) print "done" def down_picture(file, url, headers): response = requests.get(url, headers) if response.status_code == 200: write_file(file, response.content) def for_get_picture(url): headers = {} response = requests.get(url, headers = headers) print "code: %d " % (response.status_code) patterm = re.compile("src=(.*.jpg)") if response.status_code == 200: content = response.content list = patterm.findall(content) str1 = str(list) image_list = str1.split(""") for i in image_list: if i.endswith("jpg"): jpg_url = "https://pic.netbian.com" + i print jpg_url radom = "".join(random.sample(string.ascii_letters + string.digits, 8)) #print salt file = "D:pythondownload" + radom + ".jpg" print file down_picture(file, jpg_url, "") #write_file("D:pythondownload1.jpg", response.content) url = "https://pic.netbian.com/4kmeinv/index_3.html" for i in range(4,175,1): url1 = "https://pic.netbian.com/4kmeinv/index_%s.html" % (i) #print url1 for_get_picture(url1)
3,下载视频# -*- coding: UTF-8 -*- import requests #url = "https://www.fa67e7417bdc.com/shipin/play-143013.html" url1 = "https://s1.cdn-c55291f64e9b0e3a.com/common/duanshipin/2021-05-11/dsp_7814d78ed1b2cafda14e85dd81488872_wm/dsp_7814d78ed1b2cafda14e85dd81488872_wm.mp4" #res = requests.get(url1) #print res.content hd = { "Origin": "https://www.fa67e7417bdc.com", "Referer": "https://www.fa67e7417bdc.com/shipin/play-143014.html", "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Mobile Safari/537.36" } print("开始下载") url = "https://myjkgl.net/common/duanshipin/2021-05-11/dsp_ad0acdb7273fef972bcc14b3d81edac2_wm/enc_dsp_ad0acdb7273fef972bcc14b3d81edac2_wm4.ts" r = requests.get(url, headers=hd, stream=True) with open("test.mp4", "wb") as mp4: for chunk in r.iter_content(chunk_size=1024 * 1024): if chunk: mp4.write(chunk)
4,request请求# -*- coding: UTF-8 -*- import requests from requests.packages.urllib3.exceptions import InsecureRequestWarning requests.packages.urllib3.disable_warnings(InsecureRequestWarning) url = "https://www.cnblogs.com/ljfight/p/9577783.html" header = { "test": "123", "test1": "124" } parameters = { "test": "124" } #response = requests.get(url, params = parameters, headers = header) response = requests.get(url, verify = False) print response.content print response.status_code