gchr.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. import pandas as pd
  2. import numpy as np
  3. import xml.etree.ElementTree as ET
  4. import csv
  5. from xml.dom import minidom
  6. from datetime import datetime
  7. base_dir = '/home/robert/projekte/python/gcstruct/'
  8. account_translation = base_dir + 'Kontenrahmen_kombiniert.csv'
  9. account_bookings = base_dir + 'GuV_Salden.csv'
  10. first_month_of_financial_year = '01'
  11. current_year = '2021'
  12. current_month = '05'
  13. account_ignored = base_dir + 'Buchungen_ignoriert.csv'
  14. account_invalid = base_dir + 'Buchungen_ungueltig.csv'
  15. last_year = str(int(current_year) - 1)
  16. next_year = str(int(current_year) + 1)
  17. def header(makes, sites):
  18. return {
  19. 'Country': 'DE',
  20. 'MainBmCode': sites[0]['Marke_HBV'],
  21. 'Month': current_month,
  22. 'Year': current_year,
  23. 'Currency': 'EUR',
  24. 'NumberOfMakes': len(makes),
  25. 'NumberOfSites': len(sites),
  26. 'ExtractionDate': datetime.now().strftime('%d.%m.%Y'),
  27. 'ExtractionTime': datetime.now().strftime('%H:%M:%S'),
  28. 'BeginFiscalYear': first_month_of_financial_year
  29. }
  30. def bookkeep_filter():
  31. period = [current_year + str(i).zfill(2) for i in range(1, 13)]
  32. if first_month_of_financial_year != '01':
  33. if first_month_of_financial_year > current_month:
  34. period = [last_year + str(i).zfill(2) for i in range(1, 13)] + period
  35. else:
  36. period = period + [next_year + str(i).zfill(2) for i in range(1, 13)]
  37. fm = int(first_month_of_financial_year)
  38. period = period[fm - 1:fm + 12]
  39. rename_to = ['Period' + str(i).zfill(2) for i in range(1, 13)]
  40. return dict(zip(period, rename_to))
  41. def export_month():
  42. # Übersetzungstabelle laden
  43. df_translate = pd.read_csv(account_translation, decimal=',', sep=';', encoding='latin-1', converters={i: str for i in range(0, 200)})
  44. # Kontensalden laden
  45. df_bookings = pd.read_csv(account_bookings, decimal=',', sep=';', encoding='latin-1', converters={0: str, 1: str})
  46. # Kontensalden auf gegebenen Monat filtern
  47. filter_from = current_year + first_month_of_financial_year
  48. if first_month_of_financial_year > current_month:
  49. filter_from = last_year + first_month_of_financial_year
  50. filter_to = current_year + current_month
  51. df_bookings = df_bookings[(df_bookings['Bookkeep Period'] >= filter_from) & (df_bookings['Bookkeep Period'] <= filter_to)]
  52. bk_filter = bookkeep_filter()
  53. period_no = list(bk_filter.keys()).index(filter_to) + 1
  54. # Spalten konvertieren
  55. df_bookings['period'] = df_bookings['Bookkeep Period'].apply(lambda x: bk_filter[x])
  56. df_bookings['amount'] = df_bookings['Debit Amount'] + df_bookings['Credit Amount']
  57. df_bookings['quantity'] = df_bookings['Debit Quantity'] + df_bookings['Credit Quantity']
  58. # Join auf Übersetzung
  59. df_combined = df_bookings.merge(df_translate, how='inner', on='Konto_Nr_Händler')
  60. df_combined.to_csv(account_invalid, decimal=',', sep=';', encoding='latin-1', index=False)
  61. # Gruppieren
  62. # df_grouped = df_combined.groupby(['Konto_Nr_SKR51', 'period']).sum()
  63. df_pivot = df_combined.pivot_table(index=['Konto_Nr_SKR51'], columns=['period'], values='amount',
  64. aggfunc=np.sum, margins=True, margins_name='CumulatedYear')
  65. # Infos ergänzen
  66. df_pivot['Decimals'] = 2
  67. df_pivot['OpeningBalance'] = 0.0
  68. df = df_translate.drop(columns=['Konto_Nr_Händler', 'Kostenträger_Ebene']).drop_duplicates()
  69. df = df.merge(df_pivot, how='inner', on='Konto_Nr_SKR51')
  70. # Bereinigung GW-Kostenträger
  71. df['GW_Verkauf'] = (df['Konto_Nr'].str.match('^[78]')) & (df['Kostenstelle'].str.match('^2'))
  72. df['Kostenträger'] = np.where(df['GW_Verkauf'] == True, '52', df['Kostenträger'])
  73. df['Kostenträger'] = np.where((df['GW_Verkauf'] == True) & (df['Marke'] == '01'), '50', df['Kostenträger'])
  74. from_label = ['Marke', 'Standort', 'Konto_Nr', 'Kostenstelle', 'Absatzkanal', 'Kostenträger']
  75. to_label = ['Make', 'Site', 'Account', 'Origin', 'SalesChannel', 'CostCarrier']
  76. df = df.rename(columns=dict(zip(from_label, to_label)))
  77. df['CostAccountingString'] = df['Make'] + df['Site'] + df['Origin'] + \
  78. df['SalesChannel'] + df['CostCarrier']
  79. df['IsNumeric'] = df['CostAccountingString'].str.isdigit()
  80. df_invalid = df[df['IsNumeric'] == False]
  81. df_invalid.to_csv(account_invalid, decimal=',', sep=';', encoding='latin-1', index=False)
  82. export_xml(df, bk_filter, period_no)
  83. # Join auf Übersetzung - nicht zugeordnet
  84. df_ignored = df_bookings.merge(df_translate, how='left', on='Konto_Nr_Händler')
  85. df_ignored = df_ignored[df_ignored['Konto_Nr_SKR51'].isna()] # [['Konto_Nr_Händler', 'Bookkeep Period', 'amount', 'quantity']]
  86. if not df_ignored.empty:
  87. df_ignored = df_ignored.pivot_table(index=['Konto_Nr_Händler'], columns=['period'], values='amount',
  88. aggfunc=np.sum, margins=True, margins_name='CumulatedYear')
  89. df_ignored.to_csv(account_ignored, decimal=',', sep=';', encoding='latin-1')
  90. def export_xml(df: pd.DataFrame, bk_filter, period_no):
  91. df = df[df['IsNumeric']]
  92. makes = df[['Make', 'Marke_HBV']].drop_duplicates().sort_values(by=['Make']).to_dict(orient='records')
  93. sites = df[['Make', 'Site', 'Marke_HBV']].drop_duplicates().sort_values(by=['Make', 'Site']).to_dict(orient='records')
  94. res = df.to_dict(orient='records')
  95. record_elements = ['Account', 'Make', 'Site', 'Origin', 'SalesChannel', 'CostCarrier', 'CostAccountingString',
  96. 'Decimals', 'OpeningBalance'] + list(bk_filter.values())[:period_no] + ['CumulatedYear']
  97. root = ET.Element('HbvData')
  98. h = ET.SubElement(root, 'Header')
  99. for k, v in header(makes, sites).items():
  100. ET.SubElement(h, k).text = str(v)
  101. make_list = ET.SubElement(root, 'MakeList')
  102. for m in makes:
  103. e = ET.SubElement(make_list, 'MakeListEntry')
  104. ET.SubElement(e, 'Make').text = m['Make']
  105. ET.SubElement(e, 'MakeCode').text = m['Marke_HBV']
  106. bm_code_list = ET.SubElement(root, 'BmCodeList')
  107. for s in sites:
  108. e = ET.SubElement(bm_code_list, 'BmCodeListEntry')
  109. ET.SubElement(e, 'Make').text = s['Make']
  110. ET.SubElement(e, 'Site').text = s['Site']
  111. ET.SubElement(e, 'BmCode').text = s['Marke_HBV']
  112. record_list = ET.SubElement(root, 'RecordList')
  113. for row in res:
  114. record = ET.SubElement(record_list, 'Record')
  115. for e in record_elements:
  116. child = ET.SubElement(record, e)
  117. field = row.get(e, 0.0)
  118. if str(field) == 'nan':
  119. field = '0'
  120. elif type(field) is float:
  121. field = '{:.0f}'.format(field * 100)
  122. child.text = str(field)
  123. with open(base_dir + 'export_' + current_year + '-' + current_month + '.xml', 'w', encoding='utf-8') as f:
  124. f.write(minidom.parseString(ET.tostring(root)).toprettyxml(indent=' '))
  125. def convert_to_row(node):
  126. return [child.text for child in node]
  127. def convert_xml_to_csv(xmlfile, csvfile):
  128. record_list = ET.parse(base_dir + xmlfile).getroot().find('RecordList')
  129. header = [child.tag for child in record_list.find('Record')]
  130. bookings = [convert_to_row(node) for node in record_list.findall('Record')]
  131. with open(base_dir + csvfile, 'w') as fwh:
  132. cwh = csv.writer(fwh, delimiter=';')
  133. cwh.writerow(header)
  134. cwh.writerows(bookings)
  135. return True
  136. if __name__ == '__main__':
  137. print(bookkeep_filter())
  138. export_month()
  139. convert_xml_to_csv('export_2021-05.xml', 'export_2021-05.csv')
  140. # convert_xml_to_csv('DE2119_2105_150621_080944.xml', 'DE2119_2105_150621_080944.csv')