首先在setting里加入这个
# 定义ua列表 USER_AGENTS =[ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)", #'Mozilla/5.0 (Linux; U; Android 4.0.4; en-gb; GT-I9300 Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30' ] # 假如中间件 DOWNLOADER_MIDDLEWARES = { 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware':None, 'seo.middlewares.RandomUserAgent':400, }
然后新建middlewares中间件文件
#coding:utf-8 import random import base64 #from settings import PROXIES class RandomUserAgent(object): def __init__(self, agents): self.agents = agents @classmethod def from_crawler(cls, crawler): return cls(crawler.settings.getlist('USER_AGENTS')) def process_request(self, request, spider): #print "**************************" + random.choice(self.agents) request.headers.setdefault('User-Agent', random.choice(self.agents))
原文链接:python定向爬虫:scrapy中user-agent如何设置,转载请注明来源!