老套的爬图片实例,陆续写了几天,不容易,终于出来了。
难点有:分别为图片创建文件夹,爬取时不惧怕防盗链。
没有定义函数 比较难看,哈哈
from bs4 import BeautifulSoup
import requests
import urllib.request
import os
import re
website = "http://www.sucaibar.com/image/meinv/"
web_data = requests.get(website)
soup = BeautifulSoup(web_data.text,'lxml')
urls= soup.select('#pic-list > li > a')
url_list = []
for url in urls:
url_list.append(url.get('href'))
print(url_list)
for url in url_list:
web_data = requests.get(url)
soup = BeautifulSoup(web_data.text,'lxml')
links= soup.select('body > div.content > div.col-main > div > div.gbox.sucai-detail > div > div > div > a')
folder_name = soup.title.string.replace('_美女图片_素材吧', '')
print(links)
folder_path = "C:/ChromeDL/Pics/" + str(folder_name) + "/"
if not os.path.exists(folder_path): # 路径不存在时创建一个
os.makedirs(folder_path)
print(folder_path)
download_links = []
for link in links:
imageURL = link.get('href')
download_links.append(link.get('href'))
print(download_links)
for item in download_links:
opener = urllib.request.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.76 Mobile Safari/537.36')]
urllib.request.install_opener(opener)
urllib.request.urlretrieve(item,folder_path + item.split('/')[-1])
运行结果如下:

