Python crawler uses webdriver.get ("url") and returns 403 Forbidden

< H1 > coding=utf-8 < / H1 >

import requests
from selenium import webdriver
import time

class JzSpider:

def __init__(self,):

    self.start_url = "http://radar.itjuzi.com//company"
    self.headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36",
                  "Accept":"Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
                  "Connection": "keep - alive",
                  "Accept-Encoding":"gzip, deflate, br"}

def parse_url(self,url):
    proxies = {"http": "http://117.127.0.204:8080"}
    response = requests.get(url, headers=self.headers)
    content = response.content.decode("utf-8")
    return content

def save_content_list(self,content):
    with open("Jz.txt", "w", encoding="utf-8") as f:
        f.write(content)
    print("")

def run(self):
    driver = webdriver.Chrome()
    -sharp driver.get()403ipip
    driver.get("https://www.itjuzi.com/user/login?flag=radar&redirect=/company")

    driver.find_element_by_id("create_account_email").send_keys("13333331328")
    driver.find_element_by_id("create_account_password").send_keys("lz133333333334")
    time.sleep(8)
    driver.find_element_by_id("login_btn").click()
    html_str = self.parse_url(self.start_url)
    self.save_content_list(html_str)

if name ="_ _ main__":

Jz_spider = JzSpider()
Jz_spider.run()
Mar.11,2021

403 Forbidden error, mostly blocked by the server, refusing to provide the returned content

generally, you can crawl

by changing server ip and setting proxy server.

the best way is to manually collect and crawl through a simulated browser

selenium + xvfb + firefox + proxy ip

here is my solution, for reference only, learn from each other

from selenium import webdriver
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.webdriver.common.proxy import *
from pyvirtualdisplay import Display
-sharp from xvfbwrapper import Xvfb

import bs4, os
from base64 import b64encode

import sys
reload(sys)
sys.setdefaultencoding('utf8')


-sharp-sharp webdriver + firefox 
def spider_url_firefox(url):
    browser = None
    display = None
    try:
        display = Display(visible=0, size=(800, 600))
        display.start()
        browser = webdriver.Firefox()       -sharp  FireFox 
        browser.get(url)     
        content = browser.page_source
        print("content: " + str(content))
    finally:
        if browser: browser.quit()
        if display: display.stop()


-sharp-sharp webdriver + firefox + proxy + whiteip ip
-sharp-sharp :https://proxy.mimvp.com
def spider_url_firefox_by_whiteip(url):
    browser = None
    display = None
    
    -sharp-sharp ip: https://proxy.mimvp.com/usercenter/userinfo.php?p=whiteip
    mimvp_proxy = { 
                    'ip'            : '140.143.62.84',      -sharp ip
                    'port_https'    : 19480,                -sharp http, https
                    'port_socks'    : 19481,                -sharp socks5
                    'username'      : 'mimvp-user',
                    'password'      : 'mimvp-pass'
                  }
    
    try:
        display = Display(visible=0, size=(800, 600))
        display.start()
        
        profile = webdriver.FirefoxProfile()
        
        -sharp add proxy
        profile.set_preference('network.proxy.type', 1)     -sharp ProxyType.MANUAL = 1
        if url.startswith("http://"):
            profile.set_preference('network.proxy.http', mimvp_proxy['ip'])
            profile.set_preference('network.proxy.http_port', mimvp_proxy['port_https'])    -sharp http
        elif url.startswith("https://"):
            profile.set_preference('network.proxy.ssl', mimvp_proxy['ip'])
            profile.set_preference('network.proxy.ssl_port', mimvp_proxy['port_https'])     -sharp https
        else:
            profile.set_preference('network.proxy.socks', mimvp_proxy['ip'])
            profile.set_preference('network.proxy.socks_port', mimvp_proxy['port_socks'])
            profile.set_preference('network.proxy.ftp', mimvp_proxy['ip'])
            profile.set_preference('network.proxy.ftp_port', mimvp_proxy['port_https'])
            profile.set_preference('network.proxy.no_proxies_on', 'localhost,127.0.0.1')
        
        -sharp-sharp  
-sharp         profile.set_preference("network.proxy.username", 'mimvp-user')
-sharp         profile.set_preference("network.proxy.password", 'mimvp-pass')
    
        profile.update_preferences()
        
        browser = webdriver.Firefox(profile)       -sharp  FireFox 
        browser.get(url)     
        content = browser.page_source
        print("content: " + str(content))
    finally:
        if browser: browser.quit()
        if display: display.stop()
Menu