123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166 |
- import pandas as pd
- import numpy as np
- import xml.etree.ElementTree as ET
- import csv
- from xml.dom import minidom
- from datetime import datetime
- base_dir = '/home/robert/projekte/python/gcstruct/'
- account_translation = base_dir + 'Kontenrahmen_kombiniert.csv'
- account_bookings = base_dir + 'GuV_Salden.csv'
- first_month_of_financial_year = '01'
- current_year = '2021'
- current_month = '05'
- account_ignored = base_dir + 'Buchungen_ignoriert.csv'
- account_invalid = base_dir + 'Buchungen_ungueltig.csv'
- last_year = str(int(current_year) - 1)
- next_year = str(int(current_year) + 1)
- def header(makes, sites):
- return {
- 'Country': 'DE',
- 'MainBmCode': sites[0]['Marke_HBV'],
- 'Month': current_month,
- 'Year': current_year,
- 'Currency': 'EUR',
- 'NumberOfMakes': len(makes),
- 'NumberOfSites': len(sites),
- 'ExtractionDate': datetime.now().strftime('%d.%m.%Y'),
- 'ExtractionTime': datetime.now().strftime('%H:%M:%S'),
- 'BeginFiscalYear': first_month_of_financial_year
- }
- def bookkeep_filter():
- period = [current_year + str(i).zfill(2) for i in range(1, 13)]
- if first_month_of_financial_year != '01':
- if first_month_of_financial_year > current_month:
- period = [last_year + str(i).zfill(2) for i in range(1, 13)] + period
- else:
- period = period + [next_year + str(i).zfill(2) for i in range(1, 13)]
- fm = int(first_month_of_financial_year)
- period = period[fm - 1:fm + 12]
- rename_to = ['Period' + str(i).zfill(2) for i in range(1, 13)]
- return dict(zip(period, rename_to))
- def export_month():
- # Übersetzungstabelle laden
- df_translate = pd.read_csv(account_translation, decimal=',', sep=';', encoding='latin-1', converters={i: str for i in range(0, 200)})
- # Kontensalden laden
- df_bookings = pd.read_csv(account_bookings, decimal=',', sep=';', encoding='latin-1', converters={0: str, 1: str})
- # Kontensalden auf gegebenen Monat filtern
- filter_from = current_year + first_month_of_financial_year
- if first_month_of_financial_year > current_month:
- filter_from = last_year + first_month_of_financial_year
- filter_to = current_year + current_month
- df_bookings = df_bookings[(df_bookings['Bookkeep Period'] >= filter_from) & (df_bookings['Bookkeep Period'] <= filter_to)]
- bk_filter = bookkeep_filter()
- period_no = list(bk_filter.keys()).index(filter_to) + 1
- # Spalten konvertieren
- df_bookings['period'] = df_bookings['Bookkeep Period'].apply(lambda x: bk_filter[x])
- df_bookings['amount'] = df_bookings['Debit Amount'] + df_bookings['Credit Amount']
- df_bookings['quantity'] = df_bookings['Debit Quantity'] + df_bookings['Credit Quantity']
- # Join auf Übersetzung
- df_combined = df_bookings.merge(df_translate, how='inner', on='Konto_Nr_Händler')
- df_combined.to_csv(account_invalid, decimal=',', sep=';', encoding='latin-1', index=False)
- # Gruppieren
- # df_grouped = df_combined.groupby(['Konto_Nr_SKR51', 'period']).sum()
- df_pivot = df_combined.pivot_table(index=['Konto_Nr_SKR51'], columns=['period'], values='amount',
- aggfunc=np.sum, margins=True, margins_name='CumulatedYear')
- # Infos ergänzen
- df_pivot['Decimals'] = 2
- df_pivot['OpeningBalance'] = 0.0
- df = df_translate.drop(columns=['Konto_Nr_Händler', 'Kostenträger_Ebene']).drop_duplicates()
- df = df.merge(df_pivot, how='inner', on='Konto_Nr_SKR51')
- # Bereinigung GW-Kostenträger
- df['GW_Verkauf'] = (df['Konto_Nr'].str.match('^[78]')) & (df['Kostenstelle'].str.match('^2'))
- df['Kostenträger'] = np.where(df['GW_Verkauf'] == True, '52', df['Kostenträger'])
- df['Kostenträger'] = np.where((df['GW_Verkauf'] == True) & (df['Marke'] == '01'), '50', df['Kostenträger'])
- from_label = ['Marke', 'Standort', 'Konto_Nr', 'Kostenstelle', 'Absatzkanal', 'Kostenträger']
- to_label = ['Make', 'Site', 'Account', 'Origin', 'SalesChannel', 'CostCarrier']
- df = df.rename(columns=dict(zip(from_label, to_label)))
- df['CostAccountingString'] = df['Make'] + df['Site'] + df['Origin'] + \
- df['SalesChannel'] + df['CostCarrier']
- df['IsNumeric'] = df['CostAccountingString'].str.isdigit()
- df_invalid = df[df['IsNumeric'] == False]
- df_invalid.to_csv(account_invalid, decimal=',', sep=';', encoding='latin-1', index=False)
- export_xml(df, bk_filter, period_no)
- # Join auf Übersetzung - nicht zugeordnet
- df_ignored = df_bookings.merge(df_translate, how='left', on='Konto_Nr_Händler')
- df_ignored = df_ignored[df_ignored['Konto_Nr_SKR51'].isna()] # [['Konto_Nr_Händler', 'Bookkeep Period', 'amount', 'quantity']]
- if not df_ignored.empty:
- df_ignored = df_ignored.pivot_table(index=['Konto_Nr_Händler'], columns=['period'], values='amount',
- aggfunc=np.sum, margins=True, margins_name='CumulatedYear')
- df_ignored.to_csv(account_ignored, decimal=',', sep=';', encoding='latin-1')
- def export_xml(df: pd.DataFrame, bk_filter, period_no):
- df = df[df['IsNumeric']]
- makes = df[['Make', 'Marke_HBV']].drop_duplicates().sort_values(by=['Make']).to_dict(orient='records')
- sites = df[['Make', 'Site', 'Marke_HBV']].drop_duplicates().sort_values(by=['Make', 'Site']).to_dict(orient='records')
- res = df.to_dict(orient='records')
- record_elements = ['Account', 'Make', 'Site', 'Origin', 'SalesChannel', 'CostCarrier', 'CostAccountingString',
- 'Decimals', 'OpeningBalance'] + list(bk_filter.values())[:period_no] + ['CumulatedYear']
- root = ET.Element('HbvData')
- h = ET.SubElement(root, 'Header')
- for k, v in header(makes, sites).items():
- ET.SubElement(h, k).text = str(v)
- make_list = ET.SubElement(root, 'MakeList')
- for m in makes:
- e = ET.SubElement(make_list, 'MakeListEntry')
- ET.SubElement(e, 'Make').text = m['Make']
- ET.SubElement(e, 'MakeCode').text = m['Marke_HBV']
- bm_code_list = ET.SubElement(root, 'BmCodeList')
- for s in sites:
- e = ET.SubElement(bm_code_list, 'BmCodeListEntry')
- ET.SubElement(e, 'Make').text = s['Make']
- ET.SubElement(e, 'Site').text = s['Site']
- ET.SubElement(e, 'BmCode').text = s['Marke_HBV']
- record_list = ET.SubElement(root, 'RecordList')
- for row in res:
- record = ET.SubElement(record_list, 'Record')
- for e in record_elements:
- child = ET.SubElement(record, e)
- field = row.get(e, 0.0)
- if str(field) == 'nan':
- field = '0'
- elif type(field) is float:
- field = '{:.0f}'.format(field * 100)
- child.text = str(field)
- with open(base_dir + 'export_' + current_year + '-' + current_month + '.xml', 'w', encoding='utf-8') as f:
- f.write(minidom.parseString(ET.tostring(root)).toprettyxml(indent=' '))
- def convert_to_row(node):
- return [child.text for child in node]
- def convert_xml_to_csv(xmlfile, csvfile):
- record_list = ET.parse(base_dir + xmlfile).getroot().find('RecordList')
- header = [child.tag for child in record_list.find('Record')]
- bookings = [convert_to_row(node) for node in record_list.findall('Record')]
- with open(base_dir + csvfile, 'w') as fwh:
- cwh = csv.writer(fwh, delimiter=';')
- cwh.writerow(header)
- cwh.writerows(bookings)
- return True
- if __name__ == '__main__':
- print(bookkeep_filter())
- export_month()
- convert_xml_to_csv('export_2021-05.xml', 'export_2021-05.csv')
- # convert_xml_to_csv('DE2119_2105_150621_080944.xml', 'DE2119_2105_150621_080944.csv')
|