gchr.py 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. import pandas as pd
  2. import numpy as np
  3. import xml.etree.ElementTree as ET
  4. import csv
  5. from xml.dom import minidom
  6. from datetime import datetime
  7. base_dir = '/home/robert/projekte/python/gcstruct/Siebrecht/'
  8. account_translation = base_dir + 'Kontenrahmen_kombiniert.csv'
  9. account_bookings = base_dir + 'GuV_Bilanz_Salden.csv'
  10. first_month_of_financial_year = '01'
  11. current_year = '2021'
  12. current_month = '12'
  13. account_ignored = base_dir + f'ignoriert_{current_year}-{current_month}.csv'
  14. account_invalid = base_dir + f'ungueltig_{current_year}-{current_month}.csv'
  15. last_year = str(int(current_year) - 1)
  16. next_year = str(int(current_year) + 1)
  17. def header(makes, sites):
  18. return {
  19. 'Country': 'DE',
  20. 'MainBmCode': sites[0]['Marke_HBV'],
  21. 'Month': current_month,
  22. 'Year': current_year,
  23. 'Currency': 'EUR',
  24. 'NumberOfMakes': len(makes),
  25. 'NumberOfSites': len(sites),
  26. 'ExtractionDate': datetime.now().strftime('%d.%m.%Y'),
  27. 'ExtractionTime': datetime.now().strftime('%H:%M:%S'),
  28. 'BeginFiscalYear': first_month_of_financial_year
  29. }
  30. def bookkeep_filter():
  31. period = [current_year + str(i).zfill(2) for i in range(1, 13)]
  32. if first_month_of_financial_year != '01':
  33. if first_month_of_financial_year > current_month:
  34. period = [last_year + str(i).zfill(2) for i in range(1, 13)] + period
  35. else:
  36. period = period + [next_year + str(i).zfill(2) for i in range(1, 13)]
  37. fm = int(first_month_of_financial_year)
  38. period = period[fm - 1:fm + 12]
  39. rename_to = ['Period' + str(i).zfill(2) for i in range(1, 13)]
  40. return dict(zip(period, rename_to))
  41. def export_month():
  42. # Übersetzungstabelle laden
  43. df_translate = pd.read_csv(account_translation, decimal=',', sep=';', encoding='latin-1',
  44. converters={i: str for i in range(0, 200)}, index_col='Konto_Nr_Händler')
  45. # Kontensalden laden
  46. df_bookings = pd.read_csv(account_bookings, decimal=',', sep=';', encoding='latin-1', converters={0: str, 1: str})
  47. # Kontensalden auf gegebenen Monat filtern
  48. filter_from = current_year + first_month_of_financial_year
  49. if first_month_of_financial_year > current_month:
  50. filter_from = last_year + first_month_of_financial_year
  51. filter_to = current_year + current_month
  52. df_bookings = df_bookings[(df_bookings['Bookkeep Period'] >= filter_from) & (df_bookings['Bookkeep Period'] <= filter_to)]
  53. bk_filter = bookkeep_filter()
  54. period_no = list(bk_filter.keys()).index(filter_to) + 1
  55. # Spalten konvertieren
  56. df_bookings['period'] = df_bookings['Bookkeep Period'].apply(lambda x: bk_filter[x])
  57. df_bookings['amount'] = df_bookings['Debit Amount'] + df_bookings['Credit Amount']
  58. df_bookings['quantity'] = df_bookings['Debit Quantity'] + df_bookings['Credit Quantity']
  59. # Join auf Übersetzung
  60. df_combined = df_bookings.merge(df_translate, how='inner', on='Konto_Nr_Händler')
  61. # df_combined.to_csv(account_invalid, decimal=',', sep=';', encoding='latin-1', index=False)
  62. # Gruppieren
  63. # df_grouped = df_combined.groupby(['Konto_Nr_SKR51', 'period']).sum()
  64. df_pivot = df_combined.pivot_table(index=['Konto_Nr_SKR51'], columns=['period'], values='amount',
  65. aggfunc=np.sum, margins=True, margins_name='CumulatedYear')
  66. # Infos ergänzen
  67. df_pivot['Decimals'] = 2
  68. df_pivot['OpeningBalance'] = 0.0
  69. df = df_translate.reset_index(drop=True).drop(columns=['Kostenträger_Ebene']).drop_duplicates()
  70. df = df.merge(df_pivot, how='inner', on='Konto_Nr_SKR51')
  71. # Bereinigung GW-Kostenträger
  72. df['GW_Verkauf_1'] = (df['Konto_Nr'].str.match(r'^[78]0')) & (df['Kostenstelle'].str.match(r'^[^1]\d'))
  73. df['Kostenstelle'] = np.where(df['GW_Verkauf_1'] == True, '11', df['Kostenstelle'])
  74. df['GW_Verkauf_2'] = (df['Konto_Nr'].str.match(r'^[78]1')) & (df['Kostenstelle'].str.match(r'^[^2]\d'))
  75. df['Kostenstelle'] = np.where(df['GW_Verkauf_2'] == True, '21', df['Kostenstelle'])
  76. df['GW_Verkauf_3'] = (df['Konto_Nr'].str.match(r'^[78]3')) & (df['Kostenstelle'].str.match(r'^[^3]\d'))
  77. df['Kostenstelle'] = np.where(df['GW_Verkauf_3'] == True, '31', df['Kostenstelle'])
  78. df['GW_Verkauf_4'] = (df['Konto_Nr'].str.match(r'^[78]4')) & (df['Kostenstelle'].str.match(r'^[^4]\d'))
  79. df['Kostenstelle'] = np.where(df['GW_Verkauf_4'] == True, '41', df['Kostenstelle'])
  80. df['GW_Verkauf_5'] = (df['Konto_Nr'].str.match(r'^[78]5')) & (df['Kostenstelle'].str.match(r'^[^5]\d'))
  81. df['Kostenstelle'] = np.where(df['GW_Verkauf_5'] == True, '51', df['Kostenstelle'])
  82. df['GW_Verkauf_50'] = (df['Konto_Nr'].str.match(r'^[78]')) & (df['Kostenstelle'].str.match(r'^2'))
  83. df['Kostenträger'] = np.where(df['GW_Verkauf_50'] == True, '52', df['Kostenträger'])
  84. df['Kostenträger'] = np.where((df['GW_Verkauf_50'] == True) & (df['Marke'] == '01'), '50', df['Kostenträger'])
  85. df['Bilanz'] = (df['Konto_Nr'].str.match(r'^[013]'))
  86. df['Kostenträger'] = np.where(df['Bilanz'] == True, '00', df['Kostenträger'])
  87. df['Konto_5er'] = (df['Konto_Nr'].str.match('^5'))
  88. df['Absatzkanal'] = np.where(df['Konto_5er'] == True, '99', df['Absatzkanal'])
  89. from_label = ['Marke', 'Standort', 'Konto_Nr', 'Kostenstelle', 'Absatzkanal', 'Kostenträger']
  90. to_label = ['Make', 'Site', 'Account', 'Origin', 'SalesChannel', 'CostCarrier']
  91. df = df.rename(columns=dict(zip(from_label, to_label)))
  92. df['CostAccountingString'] = df['Make'] + df['Site'] + df['Origin'] + \
  93. df['SalesChannel'] + df['CostCarrier']
  94. df['IsNumeric'] = df['CostAccountingString'].str.isdigit()
  95. df_invalid = df[df['IsNumeric'] == False]
  96. df_invalid.to_csv(account_invalid + '.2.csv', decimal=',', sep=';', encoding='latin-1', index=False)
  97. export_csv = base_dir + 'export_' + current_year + '-' + current_month + '.csv'
  98. df.to_csv(export_csv, decimal=',', sep=';', encoding='latin-1', index=False)
  99. export_file = export_xml(df, bk_filter, period_no)
  100. # Join auf Übersetzung - nicht zugeordnet
  101. df_ignored = df_bookings.merge(df_translate, how='left', on='Konto_Nr_Händler')
  102. df_ignored = df_ignored[df_ignored['Konto_Nr_SKR51'].isna()] # [['Konto_Nr_Händler', 'Bookkeep Period', 'amount', 'quantity']]
  103. if not df_ignored.empty:
  104. df_ignored = df_ignored.pivot_table(index=['Konto_Nr_Händler'], columns=['period'], values='amount',
  105. aggfunc=np.sum, margins=True, margins_name='CumulatedYear')
  106. df_ignored.to_csv(account_ignored, decimal=',', sep=';', encoding='latin-1')
  107. return export_file
  108. def export_xml(df: pd.DataFrame, bk_filter, period_no):
  109. df = df[df['IsNumeric']]
  110. makes = df[['Make', 'Marke_HBV']].drop_duplicates().sort_values(by=['Make']).to_dict(orient='records')
  111. sites = df[['Make', 'Site', 'Marke_HBV']].drop_duplicates().sort_values(by=['Make', 'Site']).to_dict(orient='records')
  112. res = df.to_dict(orient='records')
  113. record_elements = ['Account', 'Make', 'Site', 'Origin', 'SalesChannel', 'CostCarrier', 'CostAccountingString',
  114. 'Decimals', 'OpeningBalance'] + list(bk_filter.values())[:period_no] + ['CumulatedYear']
  115. root = ET.Element('HbvData')
  116. h = ET.SubElement(root, 'Header')
  117. for k, v in header(makes, sites).items():
  118. ET.SubElement(h, k).text = str(v)
  119. make_list = ET.SubElement(root, 'MakeList')
  120. for m in makes:
  121. e = ET.SubElement(make_list, 'MakeListEntry')
  122. ET.SubElement(e, 'Make').text = m['Make']
  123. ET.SubElement(e, 'MakeCode').text = m['Marke_HBV']
  124. bm_code_list = ET.SubElement(root, 'BmCodeList')
  125. for s in sites:
  126. e = ET.SubElement(bm_code_list, 'BmCodeEntry')
  127. ET.SubElement(e, 'Make').text = s['Make']
  128. ET.SubElement(e, 'Site').text = s['Site']
  129. ET.SubElement(e, 'BmCode').text = s['Marke_HBV']
  130. record_list = ET.SubElement(root, 'RecordList')
  131. for row in res:
  132. record = ET.SubElement(record_list, 'Record')
  133. for e in record_elements:
  134. child = ET.SubElement(record, e)
  135. field = row.get(e, 0.0)
  136. if str(field) == 'nan':
  137. field = '0'
  138. elif type(field) is float:
  139. field = '{:.0f}'.format(field * 100)
  140. child.text = str(field)
  141. export_file = base_dir + 'export_' + current_year + '-' + current_month + '.xml'
  142. with open(export_file, 'w', encoding='utf-8') as f:
  143. f.write(minidom.parseString(ET.tostring(root)).toprettyxml(indent=' '))
  144. return export_file
  145. def convert_to_row(node):
  146. return [child.text for child in node]
  147. def convert_xml_to_csv(xmlfile, csvfile):
  148. record_list = ET.parse(xmlfile).getroot().find('RecordList')
  149. header = [child.tag for child in record_list.find('Record')]
  150. bookings = [convert_to_row(node) for node in record_list.findall('Record')]
  151. with open(csvfile, 'w') as fwh:
  152. cwh = csv.writer(fwh, delimiter=';')
  153. cwh.writerow(header)
  154. cwh.writerows(bookings)
  155. return True
  156. if __name__ == '__main__':
  157. print(bookkeep_filter())
  158. export_file = export_month()
  159. # convert_xml_to_csv(export_file, export_file[:-4] + '.csv')
  160. # convert_xml_to_csv('DE2119_2105_150621_080944.xml', 'DE2119_2105_150621_080944.csv')