python为什么叫爬虫_python怎么用

(55) 2024-08-07 22:01:01

废话先不多说,先上代码:


import requests

from bs4 import BeautifulSoup

import os,re

import sys

reload(sys)

sys.setdefaultencoding('utf8')

url = 'http://www.bfpgf.com/yld'

user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'

headers = { 'User-Agent' : user_agent }#写一个headers

def get_soup(url):#获取bs4的对象

r = requests.get(url,headers=headers) # 使用requests获取网页,在这添加请求头,应对反爬虫

print r.status_code

soup = BeautifulSoup(r.content, 'html.parser') # 把页面处理成bs的对象,好进行下一步操作

return soup#返回bs4对象

def download_img(url,page_number):

soup = get_soup(url)

img_urls = soup.article.find_all('img')

numerb_of_arr = len(img_urls)#获取当前页面图片的数量

title = soup.find_all('

THE END

发表回复