Coding problem of python queue queue when passing data

problem description

python queue queue encodes when passing data

clipboard.png

queue

clipboard.png

the environmental background of the problems and what methods you have tried

python

related codes

/ / Please paste the code text below (do not replace the code with pictures)

-sharp -*- coding:utf-8 -*-

import re
import os
import Queue
import urllib
import requests
import threading
from lxml import etree

class Procuder(threading.Thread):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36"
    }

    -sharp*args,**kwargs  
    def __init__(self,page_queue,img_queue,*args,**kwargs):
        super(Procuder, self).__init__(*args,**kwargs)
        self.page_queue = page_queue
        self.img_queue = img_queue

    def run(self):
        while True:
            if self.page_queue.empty():
                break
            url = self.page_queue.get()
            self.parse_page(url)

    def parse_page(self,url):
        response = requests.get(url,headers=self.headers)
        text = response.text
        html = etree.HTML(text)
        imgs = html.xpath("//div[@class="page-content text-center"]//img[@class!="gif"]")
        for img in imgs:
            img_url = img.get("data-original")
            alt = img.get("alt")
            alt = re.sub(r"[\?\.!]","",alt)
            suffix = os.path.splitext(img_url)[1]
            filename = alt+suffix
            self.img_queue.put((img_url,filename))

class Consumer(threading.Thread):
    def __init__(self,page_queue,img_queue,*args,**kwargs):
        super(Consumer, self).__init__(*args,**kwargs)
        self.page_queue = page_queue
        self.img_queue = img_queue
    def run(self):
        while True:
            if self.page_queue.empty() and self.img_queue.empty():
                break
            img_url,filename = self.img_queue.get()
            urllib.urlretrieve(img_url,"imagess/"+filename)
            print filename+"     "


def main():
    page_queue = Queue.Queue(100)
    img_queue = Queue.Queue(1000)
    for x in range(1,101):
        url = "http://www.doutula.com/photo/list/?page=%d" % x
        page_queue.put(url)

    for x in range(5):
        t = Procuder(page_queue,img_queue)
        t.start()
    for x in range(5):
        t = Consumer(page_queue,img_queue)
        t.start()



if __name__ == "__main__":
    main()

what result do you expect? What is the error message actually seen?

how to solve the filename garbled coding problem here

Apr.07,2021
The

problem resolved that the imagess directory was not created

Menu