People are going to be killed. No module named 'misc' appears in scrapy execution.

problem description

I downloaded several scrapy projects from GitHub and put them into my own directory for execution, but I got an error.

clipboard.png

miscpip

clipboard.png


window7
Python3.7
scrapy 1.5.1

clipboard.png

related codes

/ / Please paste the code text below (do not replace the code with pictures)
settings.py

-sharp Scrapy settings for douyu project
-sharp
-sharp For simplicity, this file contains only the most important settings by
-sharp default. All the other settings are documented here:
-sharp
-sharp     http://doc.scrapy.org/en/latest/topics/settings.html
-sharp

import sys
import os
from os.path import dirname
path = dirname(dirname(os.path.abspath(os.path.dirname(__file__))))
sys.path.append(path)
from misc.log import *

BOT_NAME = "douyu"

SPIDER_MODULES = ["douyu.spiders"]
NEWSPIDER_MODULE = "douyu.spiders"

-sharp Crawl responsibly by identifying yourself (and your website) on the user-agent
-sharpUSER_AGENT = "douyu (+http://www.yourdomain.com)"

DOWNLOADER_MIDDLEWARES = {
   -sharp "misc.middleware.CustomHttpProxyMiddleware": 400,
    "misc.middleware.CustomUserAgentMiddleware": 401,
}

ITEM_PIPELINES = {
    "douyu.pipelines.JsonWithEncodingPipeline": 300,
    -sharp"douyu.pipelines.RedisPipeline": 301,
}

LOG_LEVEL = "INFO"

DOWNLOAD_DELAY = 1

spider.py

import re
import json
from urlparse import urlparse
import urllib
import pdb


from scrapy.selector import Selector
try:
    from scrapy.spiders import Spider
except:
    from scrapy.spiders import BaseSpider as Spider
from scrapy.utils.response import get_base_url
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor as sle


from douyu.items import *
from misc.log import *
from misc.spider import CommonSpider


class douyuSpider(CommonSpider):
    name = "douyu"
    allowed_domains = ["douyu.com"]
    start_urls = [
        "http://www.douyu.com/directory/all"
    ]
    rules = [
        Rule(sle(allow=("http://www.douyu.com/directory/all")), callback="parse_1", follow=True),
    ]

    list_css_rules = { 
        "-sharplive-list-contentbox li": {
            "url": "a::attr(href)",
            "room_name": "a::attr(title)",
            "tag": "span.tag.ellipsis::text",
            "people_count": ".dy-num.fr::text"
        }
    }

    list_css_rules_for_item = {
        "-sharplive-list-contentbox li": {
            "__use": "1",
            "__list": "1",
            "url": "a::attr(href)",
            "room_name": "a::attr(title)",
            "tag": "span.tag.ellipsis::text",
            "people_count": ".dy-num.fr::text"
        }
    }


    def parse_1(self, response):
        info("Parse "+response.url)
        -sharpx = self.parse_with_rules(response, self.list_css_rules, dict)
        x = self.parse_with_rules(response, self.list_css_rules_for_item, douyuItem)
        print(len(x))
        -sharp print(json.dumps(x, ensure_ascii=False, indent=2))
        -sharp pp.pprint(x)
        -sharp return self.parse_with_rules(response, self.list_css_rules, douyuItem)
        return x

pipelines.py

-sharp Define your item pipelines here
-sharp
-sharp Don"t forget to add your pipeline to the ITEM_PIPELINES setting
-sharp See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html

import redis


from scrapy import signals


import json
import codecs
from collections import OrderedDict


class JsonWithEncodingPipeline(object):

    def __init__(self):
        self.file = codecs.open("data_utf8.json", "w", encoding="utf-8")

    def process_item(self, item, spider):
        line = json.dumps(OrderedDict(item), ensure_ascii=False, sort_keys=False) + "\n"
        self.file.write(line)
        return item

    def close_spider(self, spider):
        self.file.close()


class RedisPipeline(object):

    def __init__(self):
        self.r = redis.StrictRedis(host="localhost", port=6379)

    def process_item(self, item, spider):
        if not item["id"]:
            print "no id item!!"

        str_recorded_item = self.r.get(item["id"])
        final_item = None
        if str_recorded_item is None:
            final_item = item
        else:
            ritem = eval(self.r.get(item["id"]))
            final_item = dict(item.items() + ritem.items())
        self.r.set(item["id"], final_item)

    def close_spider(self, spider):
        return

items.py

-sharp Define here the models for your scraped items
-sharp
-sharp See documentation in:
-sharp http://doc.scrapy.org/en/latest/topics/items.html

from scrapy.item import Item, Field

class douyuItem(Item):
    -sharp define the fields for your item here like:
    url = Field()
    room_name = Field()
    people_count = Field()
    tag = Field()

what result do you expect? What is the error message actually seen?

I hope I can give a special no answer.

Apr.02,2021

now the boss is waiting next to me with a kitchen knife. Is there any kind-hearted person who can save my life, thanks very very mush?


https://stackoverflow.com/que.

Menu