Python judges and merges CSV files

there are two files
one.csv

0,  1,   2,                 3
1,    a,     ww.ok;ww.ant;,      anything
2,    e,     ww.kdi;,            ihy
3,    se,    ww.sdd,             sld
4,    sd,    ww.akd,             sdjfa

two.csv

0,  1,    2,                 3
1,    sd,     ww.ok;,             1245
2,    2e3,    ww.kdi;,            432
3,    de,     ww.sdd;,            232

the goal is to first determine whether one.csv column 2 contains two.csv file column 2, and if so, append the data of two.csv column 3 to the end of the one.csv corresponding row. The final result is as follows:

result.csv

0,  1,   2,                3,        4
1,    a,     ww.ok;ww.ant;,     anything    1245
2,    e,     ww.kdi;,           ihy         432
3,    se,    ww.sdd,            sld         232
4,    sd,    ww.akd,            sdjfa

I have written a few lines of code, but I have been stuck in appending this piece, because the actual amount of data to be processed is large, unlike the sample given. If I traverse on the basis of one.csv, the data compared is incomplete. If I traverse on the basis of two.csv, I do not know how to write a file. Ask the elders for advice.

import csv
import datetime


start = datetime.datetime.now()

with open("D:\one.csv") as one:
    ic_rd = csv.reader(one)
    next(ic_rd)
    for i in ic_rd:
        with open("D:\two.csv") as ga:
            ga_rd = csv.reader(ga)
            next(ga_rd)
            for g in ga_rd:
                if g[2] in i[2]:
                    i.append(g[2])                 
                    break
        with open("D:\\result.csv","a+") as rs:
            writer = csv.writer(rs)
            writer.writerow(i)

end = datetime.datetime.now()
print (end-start)

Python

Mar.06,2021

-sharp coding: utf-8 import pandas as pd lst1 = [ [1, 'a', 'ww.ok;ww.ant', 'anything'], [2, 'e', 'ww.kdi', 'ihy'], [3, 'se', 'ww.sdd', 'sld'], [4, 'sd', 'ww.akd', 'sdjfa'] ] lst2 = [ [1, 'sd', 'ww.ant', 1245], [2,'2e3', 'ww.kdi', 432], [3,'de', 'ww.sdd', 232] ] df1 = pd.DataFrame(lst1, columns=['A1', 'B1', 'C1', 'D1']) df2 = pd.DataFrame(lst2, columns=['A2', 'B2', 'C2', 'D2']) -sharp df11 = df1.join(df1['C1'].str.split(';', expand=True).stack().reset_index(level=1, drop=True).rename('E')) df22 = df2.join(df2['C2'].str.split(';', expand=True).stack().reset_index(level=1, drop=True).rename('E')) -sharp, df3 = pd.merge(df11, df22, how='left', on='E').loc[:, ['A1', 'B1', 'C1', 'D1', 'D2']] -sharp print df3.groupby(['A1', 'B1', 'C1', 'D1'], as_index=False).max()

-sharp!/usr/bin/env python -sharp -*- coding: utf-8 -*- import re import os -sharptwo.csv23 reg_list2 = re.compile(r'.+?,.+?,\s*(.+?)\s*[;]*\s*,\s*(\d+)\s*') -sharpone.csv2 reg_name = re.compile(r'.+?,.+?,\s*(.+?)\s*[;]*,') -sharp reg_title = re.compile(r'\s*\S+.+') def main(argv): try: fptwo = open("two.csv", "r") fpone = open("one.csv", "r") fpnew = open("result.csv", "a") except UnicodeDecodeError: exit(1) text = fptwo.read() fptwo.close() data_two = [(re.escape(x),n) for x,n in reg_list2.findall(text)] otime = datetime.datetime.now() title = None new = "4\n" -sharp tell = fpone.tell() nu = 0 while True: line = fpone.readline() mate = reg_name.match(line) if not title: mate = reg_title.search(line) if mate: title = mate.group(0) title = (title + ",").ljust(len(title)+15) + new fpnew.write(title) continue if mate: name = mate.group(1) for item in data_two: if re.search(item[0],name): line = line[:-1] line = (line + ",").ljust(len(title)) + item[1] + "\n" print ("Pos L:", nu, "name: ", name) fpnew.write(line) nu += 1 if tell == fpone.tell(): break else: tell = fpone.tell() fpone.close() fpnew.close() return 0 if __name__ == '__main__': import sys sys.exit(main(sys.argv))