How to terminate the pipeline to judge and remove weight in scrapy

is to open two pipes in pipeline, insert data in mongodb, determine how to remove duplicates
and not download files. If not, insert database and download files
this is to use a download pipeline to insert a database insert pipeline
first check the database to determine whether the data is duplicated, if so, terminate the operation of the later pipes, if not, insert the data into the database, and start the download pipeline.

from scrapy.pipelines.files import FilesPipeline
from scrapy import Request
from scrapy.conf import settings
import pymongo


class XiaoMiQuanPipeLines(object):
    def __init__(self):
        host = settings["MONGODB_HOST"]
        port = settings["MONGODB_PORT"]
        dbname = settings["MONGODB_DBNAME"]
        sheetname = settings["MONGODB_SHEETNAME"]

        client = pymongo.MongoClient(host=host, port=port)

        mydb = client[dbname]

        self.post = mydb[sheetname]

    def process_item(self, item):
        url = item["file_url"]
        name = item["name"]

        result = self.post.aggregate(
            [
                {"$group": {"_id": {"url": url, "name": name}}}
            ]
        )
        if result:
            pass
        else:

            self.post.insert({"url": url, "name": name})
            return item


class DownLoadPipelines(FilesPipeline):

    def file_path(self, request, response=None, info=None):
        return request.meta.get("filename", "")

    def get_media_requests(self, item, info):
        file_url = item["file_url"]
        meta = {"filename": item["name"]}
        yield Request(url=file_url, meta=meta)

Aug.04,2021

DropItem- official document :

from scrapy.exceptions import DropItem

class PricePipeline(object):

    vat_factor = 1.15

    def process_item(self, item, spider):
        if item['price']:
            if item['price_excludes_vat']:
                item['price'] = item['price'] * self.vat_factor
            return item
        else:
            raise DropItem("Missing price in %s" % item)
Menu