'''1. 分析网页结构,找到咱们所要爬取的网页url的共同点2. 分析我们要爬取的页面, 发现页面中有图片url链接3. 对页面进行请求4. 拿到图片url5. 下载图片'''import requestsfrom bs4 import BeautifulSoupimport osurls = []class Producer(): def __init__(self, query,num): self.url = f"https://gz.17zwd.com/sks.htm?" self.headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.20 Safari/537.36" } self.query = query self.num = num self.param = { "so": query, "page": num } def collect(self): ''' https://gz.17zwd.com/item/122040382 ''' resp = requests.get(url = self.url, headers = self.headers, params = self.param) html = resp.text soup = BeautifulSoup(html, "lxml") resp.close() divs = soup.find_all("div", {"class": "huohao-img-container"}) global urls for div in divs: item_id = div.a["href"].split("?")[0].split("/")[2] title = div.a["title"] dict = {title: "https://gz.17zwd.com/item/" + item_id} urls.append(dict)class Downloader(): def __init__(self): self.headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.20 Safari/537.36" } def down(self): for url in urls: title = list(url.keys())[0] resp = requests.get(url = url[title], headers = self.headers) soup = BeautifulSoup(resp.text, "lxml") if not os.path.exists(title): os.mkdir(title) imgs = soup.select("div.index-root-18be6rFhXn87nebHWkNwG1 > div > img") if len(imgs) == 0: imgs = soup.select("div.index-root-18be6rFhXn87nebHWkNwG1 > div > div > img") if len(imgs) == 0: imgs = soup.select("div.index-root-18be6rFhXn87nebHWkNwG1 > div > div > div > img") if len(imgs) == 0: imgs = soup.select("div.index-root-18be6rFhXn87nebHWkNwG1 > div > p > img") if len(imgs) == 0: imgs = soup.select("div.index-root-18be6rFhXn87nebHWkNwG1 > p > img") i = 1 for img in imgs: resp = requests.get(url = img["src"], headers = self.headers) with open(title + "/" + str(i) + ".jpg", "wb") as f: f.write(resp.content) i += 1 resp.close() resp.close()if __name__ == '__main__': query = input("请输入衣服款式:") num = input("请输入需要抓取的页码:") producer = Producer(query, num) producer.collect() downloader = Downloader() downloader.down()