Pymongo aggregation module multi-condition, multi-table association, de-duplicate query

problem description

71

related codes

8.3 Update:

-sharp 
def get_registered_paying_user():
    unwind = {"$unwind": "$recharge"}                           -sharp 
    match1 = {"$match": {"regDate": regDate}}                   -sharp 
    match2 = {"$match": {"recharge.tradeNo": {"$ne": ""}}}      -sharp 
    match3 = {"$match": {"recharge.rechargeDate": regDate}}     -sharp 
    match4 = {"$match": {
                "$or": [
                    {"recharge.from": "weixin"},                -sharp 
                    {"recharge.from": "alipay"}                 -sharp 
                ]
             }}
    match5 = {"$match": {"recharge.real": {"$ne": 0}}}          -sharp 
    lookup = {"$lookup":
        {
            "from": "recharge",
            "localField": "_id",
            "foreignField": "uid",
            "as": "recharge"
        }
    }
    project = {"$project":
        {                                   -sharp 
            "_id": 1,                       -sharp id
            "nickname": 1,                  -sharp 
            "regDate": 1,                   -sharp 
            "recharge.tradeNo": 1,          -sharp 
            "recharge.rechargeDate": 1,     -sharp 
            "recharge.from": 1,             -sharp 
            "recharge.real": 1              -sharp 
        }
    }
    group = {"$group":
        {
            "_id": {
                "_id": "$_id",                                          -sharp id
                "nickname": "$nickname",                                -sharp 
                "regDate": "$regDate",                                  -sharp 
                "recharge.tradeNo": "$recharge.tradeNo",                -sharp 
                "recharge.rechargeDate": "$recharge.rechargeDate",      -sharp 
                "recharge.from": "$recharge.from",                      -sharp 
                "recharge.real": "$recharge.real"                       -sharp 
            },
        }
    }

    pipeline = [match1, lookup, unwind, project, match2, match3, match4, match5, group]
    result = collection_users.aggregate(pipeline)
    for i in result:
        pprint.pprint(i)

how to avoid this problem when you find an id duplicate that leads to a count of + 1?

{"_id": {"_id": ObjectId("5b5b035e1a620672c2438fa4"),
         "nickname": "3947",
         "recharge.from": "weixin",
         "recharge.real": 500,
         "recharge.rechargeDate": datetime.datetime(2018, 7, 28, 0, 16, 59, 418000),
         "recharge.tradeNo": "4200000145201807284145995390",
         "regDate": datetime.datetime(2018, 7, 27, 11, 34, 54, 113000)}}
{"_id": {"_id": ObjectId("5b5a65a21a620672c2438e3f"),
         "nickname": "9528",
         "recharge.from": "weixin",
         "recharge.real": 400,
         "recharge.rechargeDate": datetime.datetime(2018, 7, 27, 8, 28, 34, 458000),
         "recharge.tradeNo": "4200000154201807273809095287",
         "regDate": datetime.datetime(2018, 7, 27, 0, 21, 54, 837000)}}
{"_id": {"_id": ObjectId("5b5a65a21a620672c2438e3f"),
         "nickname": "9528",
         "recharge.from": "weixin",
         "recharge.real": 500,
         "recharge.rechargeDate": datetime.datetime(2018, 7, 27, 0, 39, 29, 708000),
         "recharge.tradeNo": "4200000145201807274527607790",
         "regDate": datetime.datetime(2018, 7, 27, 0, 21, 54, 837000)}}
< hr >

original problem code

-sharp 
def get_registered_paying_user():
    match1 = {"$match": {"regDate": regDate}}                   -sharp 
    match2 = {"$match": {"recharge.tradeNo": {"$ne": ""}}}      -sharp 
    match3 = {"$match": {"recharge.rechargeDate": regDate}}     -sharp 
    match4 = {"$match": {
                "$or": [
                    {"recharge.from": "weixin"},                -sharp 
                    {"recharge.from": "alipay"}                 -sharp 
                ]
             }}
    match5 = {"$match": {"recharge.real": {"$ne": 0}}}          -sharp 
    lookup = {"$lookup":
        {
            "from": "recharge",
            "localField": "_id",
            "foreignField": "uid",
            "as": "recharge"
        }
    }
    project = {"$project":
        {                                   -sharp 
            "_id": 1,                       -sharp id
            "nickname":1,                   -sharp 
            "regDate": 1,                   -sharp 
            "recharge.tradeNo": 1,          -sharp 
            "recharge.rechargeDate": 1,     -sharp 
            "recharge.from": 1,             -sharp 
            "recharge.real": 1              -sharp 
        }
    }

    pipeline = [match1, lookup, project, match2, match3, match4, match5]
    result = collection_users.aggregate(pipeline)

    for i in result:
        pprint.pprint(i)

get_registered_paying_user()

2826 recharge information for renderer: one is registered recharge and the other is paid recharge, but the paid recharge is not the one on July 1st

{"_id": ObjectId("5b38335a1a620672c2435371"),
 "nickname": "7128",
 "recharge": [{"from": "regCoupon",
               "rechargeDate": datetime.datetime(2018, 7, 1, 1, 50, 23, 143000)},
              {"from": "weixin",
               "real": 500,
               "rechargeDate": datetime.datetime(2018, 7, 1, 4, 31, 8, 191000),
               "tradeNo": "4200000157201807012108965206"}],
 "regDate": datetime.datetime(2018, 7, 1, 1, 50, 18, 382000)}
{"_id": ObjectId("5b3882541a620672930c532d"),
 "nickname": "2826",
 "recharge": [{"from": "regCoupon",
               "rechargeDate": datetime.datetime(2018, 7, 1, 7, 27, 26, 505000)},
              {"from": "alipay",
               "real": 100,
               "rechargeDate": datetime.datetime(2018, 7, 28, 4, 29, 3, 50000),
               "tradeNo": "2018072821001004410593666887"}],
 "regDate": datetime.datetime(2018, 7, 1, 7, 27, 16, 701000)}
{"_id": ObjectId("5b38c69e1a620672c24354a8"),
 "nickname": "3925",
 "recharge": [{"from": "regCoupon",
               "rechargeDate": datetime.datetime(2018, 7, 1, 12, 18, 45, 846000)},
              {"from": "alipay", "real": 300},
              {"from": "weixin",
               "real": 300,
               "rechargeDate": datetime.datetime(2018, 7, 2, 2, 11, 38, 284000),
               "tradeNo": "4200000150201807025314553741"}],
 "regDate": datetime.datetime(2018, 7, 1, 12, 18, 38, 859000)}
Apr.03,2021

< H1 > reason < / H1 >

$lookup starts with a left outer join query. When multiple records that meet the foreign key condition are matched, they are placed in the array subset. Then your match2 , match3 , match4 , match5 target the subcollection query. While mongo the subset here matches, as long as one item in the subset is satisfied, the whole record will be satisfied.

< H1 > solution < / H1 >
  1. if it is mongodb v3.2 , V3.4 version $lookup , the solution is first $unwind .
  2. in the case of mongodb V3.6 or above, $lookup provides the pipeline field. For more information, please see $lookup official document

unwind = {'$unwind': '$recharge'}                           -sharp 
    match1 = {'$match': {'regDate': regDate}}                   -sharp 
    match2 = {'$match': {'recharge.tradeNo': {'$ne': ''}}}      -sharp 
    match3 = {'$match': {'recharge.rechargeDate': regDate}}     -sharp 
    match4 = {'$match': {
                '$or': [
                    {'recharge.from': 'weixin'},                -sharp 
                    {'recharge.from': 'alipay'}                 -sharp 
                ]
             }}
    match5 = {'$match': {'recharge.real': {'$ne': 0}}}          -sharp 
    lookup = {'$lookup':
        {
            'from': 'recharge',
            'localField': '_id',
            'foreignField': 'uid',
            'as': 'recharge'
        }
    }
    project = {'$project':
        {                                   -sharp 
            '_id': 1,                       -sharp id
            'nickname': 1,                  -sharp 
            'regDate': 1,                   -sharp 
            'recharge.tradeNo': 1,          -sharp 
            'recharge.rechargeDate': 1,     -sharp 
            'recharge.from': 1,             -sharp 
            'recharge.real': 1              -sharp 
        }
    }
    group = {'$group':
        {
            '_id': {
                '_id': '$_id',                                          -sharp id
                'nickname': '$nickname',                                -sharp 
                'regDate': '$regDate',                                  -sharp 
            },
        }
    }

    pipeline = [match1, lookup, unwind, project, match2, match3, match4, match5, group]
    result = collection_users.aggregate(pipeline)
    a = 0
    for i in result:
        pprint.pprint(i)
        a = a + 1

    pprint.pprint(':' + str(a))
Menu