import pandas as pd base_dir = '/home/robert/projekte/python/gcstruct/Siebrecht/' def read_csv(): header = ['Kontonummer', 'Wert', 'Fehler', 'Beschreibung'] df_log = pd.read_csv(base_dir + 'Siebrecht_12_2021.txt', decimal=',', sep='\t', encoding='latin-1', converters={0: str, 1: str, 2: str, 3: str}, header=None, names=header) # , skiprows=3) df_log['Marke_HBV'] = df_log['Kontonummer'].str.slice(4, 8) df_log['Site'] = df_log['Kontonummer'].str.slice(8, 10) df_log['Account'] = df_log['Kontonummer'].str.slice(0, 4) df_log['Origin'] = df_log['Kontonummer'].str.slice(10, 12) df_log['SalesChannel'] = df_log['Kontonummer'].str.slice(12, 14) df_log['CostCarrier'] = df_log['Kontonummer'].str.slice(14, 16) # df_log['CumulatedYear'] = df_log['Wert'].str.replace(r'\.', '', regex=True) df_export = pd.read_csv(base_dir + 'export_2021-12.csv', sep=';', encoding='latin-1', converters={i: str for i in range(0, 200)}) df = df_export.merge(df_log, how='left', on=['Marke_HBV', 'Site', 'Account', 'Origin', 'SalesChannel', 'CostCarrier']) df.to_csv(base_dir + 'export_error_2021-12.csv', decimal=',', sep=';', encoding='latin-1', index=False) df_bookings = pd.read_csv(base_dir + 'GuV_Bilanz_Salden_Debug.csv', decimal=',', sep=';', encoding='latin-1', converters={0: str, 1: str, 6: str, 7: str}) df = df.merge(df_bookings, how='inner', on=[]) if __name__ == '__main__': read_csv()