gchr.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. import pandas as pd
  2. import numpy as np
  3. import xml.etree.ElementTree as ET
  4. import csv
  5. from xml.dom import minidom
  6. from datetime import datetime
  7. import logging
  8. base_dir = '/home/robert/projekte/python/gcstruct/Luchtenberg/'
  9. account_translation = base_dir + 'Kontenrahmen_kombiniert.csv'
  10. account_bookings = base_dir + 'GuV_Bilanz_Salden.csv'
  11. first_month_of_financial_year = '01'
  12. current_year = '2022'
  13. current_month = '01'
  14. logging.basicConfig(
  15. filename=base_dir + f'protokoll_{current_year}-{current_month}.log',
  16. encoding='utf-8',
  17. level=logging.DEBUG
  18. )
  19. account_ignored = base_dir + f'ignoriert_{current_year}-{current_month}.csv'
  20. account_invalid = base_dir + f'ungueltig_{current_year}-{current_month}.csv'
  21. last_year = str(int(current_year) - 1)
  22. last_year2 = str(int(current_year) - 2)
  23. next_year = str(int(current_year) + 1)
  24. pd.set_option('display.max_rows', 500)
  25. pd.set_option('display.float_format', lambda x: '%.2f' % x)
  26. def header(makes, sites):
  27. return {
  28. 'Country': 'DE',
  29. 'MainBmCode': sites[0]['Marke_HBV'],
  30. 'Month': current_month,
  31. 'Year': current_year,
  32. 'Currency': 'EUR',
  33. 'NumberOfMakes': len(makes),
  34. 'NumberOfSites': len(sites),
  35. 'ExtractionDate': datetime.now().strftime('%d.%m.%Y'),
  36. 'ExtractionTime': datetime.now().strftime('%H:%M:%S'),
  37. 'BeginFiscalYear': first_month_of_financial_year
  38. }
  39. def bookkeep_filter():
  40. period = [current_year + str(i).zfill(2) for i in range(1, 13)]
  41. if first_month_of_financial_year != '01':
  42. if first_month_of_financial_year > current_month:
  43. period = [last_year + str(i).zfill(2) for i in range(1, 13)] + period
  44. else:
  45. period = period + [next_year + str(i).zfill(2) for i in range(1, 13)]
  46. fm = int(first_month_of_financial_year)
  47. period = period[fm - 1:fm + 12]
  48. rename_to = ['Period' + str(i).zfill(2) for i in range(1, 13)]
  49. return dict(zip(period, rename_to))
  50. def extract_acct_info(df: pd.DataFrame):
  51. acct_info = ['Marke', 'Standort', 'Konto_Nr', 'Kostenstelle', 'Absatzkanal', 'Kostenträger']
  52. acct_pos = [(0, 2), (3, 5), (6, 10), (11, 13), (14, 16), (17, 19)]
  53. for info, pos in zip(acct_info, acct_pos):
  54. df[info] = df.index.str.slice(*pos)
  55. return df
  56. def export_month():
  57. # Übersetzungstabelle laden
  58. df_translate = pd.read_csv(account_translation, decimal=',', sep=';', encoding='latin-1',
  59. converters={i: str for i in range(0, 200)})
  60. logging.info(df_translate.shape)
  61. df_translate['duplicated'] = df_translate.duplicated()
  62. logging.info(df_translate[df_translate['duplicated']])
  63. df_translate.drop(columns=['duplicated'], inplace=True)
  64. df_translate.drop_duplicates(inplace=True)
  65. df_translate.set_index('Konto_Nr_Händler')
  66. # Kontensalden laden
  67. df_bookings = pd.read_csv(account_bookings, decimal=',', sep=';', encoding='latin-1', converters={0: str, 1: str})
  68. # Kontensalden auf gegebenen Monat filtern
  69. filter_from = current_year + first_month_of_financial_year
  70. filter_prev = last_year + first_month_of_financial_year
  71. if first_month_of_financial_year > current_month:
  72. filter_from = last_year + first_month_of_financial_year
  73. filter_prev = last_year2 + first_month_of_financial_year
  74. filter_to = current_year + current_month
  75. df_opening_balance = df_bookings[(df_bookings['Bookkeep Period'] >= filter_prev) & (df_bookings['Bookkeep Period'] <= filter_from)]
  76. df_opening_balance = df_opening_balance.merge(df_translate, how='inner', on='Konto_Nr_Händler')
  77. df_opening_balance = df_opening_balance[(df_opening_balance['Konto_Nr'].str.match(r'^[013]'))]
  78. df_opening_balance['amount'] = (df_opening_balance['Debit Amount'] + df_opening_balance['Credit Amount']).round(2)
  79. df_opening_balance.drop(columns=['Debit Amount', 'Credit Amount', 'Debit Quantity', 'Credit Quantity'], inplace=True)
  80. df_opening_balance = df_opening_balance.groupby(['Marke', 'Standort']).sum()
  81. logging.info('Gewinn/Verlustvortrag')
  82. logging.info(df_opening_balance)
  83. logging.info(df_opening_balance.sum())
  84. df_bookings = df_bookings[(df_bookings['Bookkeep Period'] >= filter_from) & (df_bookings['Bookkeep Period'] <= filter_to)]
  85. bk_filter = bookkeep_filter()
  86. period_no = list(bk_filter.keys()).index(filter_to) + 1
  87. # Spalten konvertieren
  88. df_bookings['period'] = df_bookings['Bookkeep Period'].apply(lambda x: bk_filter[x])
  89. df_bookings['amount'] = (df_bookings['Debit Amount'] + df_bookings['Credit Amount']).round(2)
  90. df_bookings['quantity'] = (df_bookings['Debit Quantity'] + df_bookings['Credit Quantity']).round(2)
  91. logging.info('df_bookings: ' + str(df_bookings.shape))
  92. # Join auf Übersetzung
  93. df_combined = df_bookings.merge(df_translate, how='inner', on='Konto_Nr_Händler')
  94. logging.info('df_combined: ' + str(df_combined.shape))
  95. makes = df_combined[['Marke', 'Marke_HBV']].drop_duplicates().sort_values(by=['Marke'])
  96. sites = df_combined[['Marke', 'Standort', 'Marke_HBV']].drop_duplicates().sort_values(by=['Marke', 'Standort'])
  97. # df_combined.to_csv(account_invalid, decimal=',', sep=';', encoding='latin-1', index=False)
  98. # Gruppieren
  99. # df_grouped = df_combined.groupby(['Konto_Nr_SKR51', 'period']).sum()
  100. df = df_combined.pivot_table(index=['Konto_Nr_SKR51'], columns=['period'], values='amount',
  101. aggfunc=np.sum, margins=True, margins_name='CumulatedYear')
  102. logging.info('df_pivot: ' + str(df.shape))
  103. df = extract_acct_info(df)
  104. # df = df_translate.reset_index(drop=True).drop(columns=['Kostenträger_Ebene']).drop_duplicates()
  105. logging.info(df.shape)
  106. logging.info(df.columns)
  107. logging.info(df.head())
  108. # df = df.merge(df_translate, how='inner', on='Konto_Nr_SKR51')
  109. logging.info('df: ' + str(df.shape))
  110. df['Bilanz'] = (df['Konto_Nr'].str.match(r'^[013]'))
  111. df['Kontoart'] = np.where(df['Bilanz'], '1', '2')
  112. df['Konto_1'] = (df['Konto_Nr'].str.slice(0, 1))
  113. df_debug = df.drop(columns=['Bilanz'])
  114. logging.info(df_debug.groupby(['Kontoart']).sum())
  115. logging.info(df_debug.groupby(['Konto_1']).sum())
  116. logging.info(df_debug.groupby(['Konto_Nr']).sum())
  117. df_debug.groupby(['Konto_Nr']).sum().to_csv(base_dir + 'debug.csv', decimal=',', sep=';', encoding='latin-1')
  118. # Bereinigung GW-Kostenträger
  119. df['GW_Verkauf_1'] = (df['Konto_Nr'].str.match(r'^[78]0')) & (df['Kostenstelle'].str.match(r'^[^1]\d'))
  120. df['Kostenstelle'] = np.where(df['GW_Verkauf_1'] == True, '11', df['Kostenstelle'])
  121. df['GW_Verkauf_2'] = (df['Konto_Nr'].str.match(r'^[78]1')) & (df['Kostenstelle'].str.match(r'^[^2]\d'))
  122. df['Kostenstelle'] = np.where(df['GW_Verkauf_2'] == True, '21', df['Kostenstelle'])
  123. df['GW_Verkauf_3'] = (df['Konto_Nr'].str.match(r'^[78]3')) & (df['Kostenstelle'].str.match(r'^[^3]\d'))
  124. df['Kostenstelle'] = np.where(df['GW_Verkauf_3'] == True, '31', df['Kostenstelle'])
  125. df['GW_Verkauf_4'] = (df['Konto_Nr'].str.match(r'^[78]4')) & (df['Kostenstelle'].str.match(r'^[^4]\d'))
  126. df['Kostenstelle'] = np.where(df['GW_Verkauf_4'] == True, '41', df['Kostenstelle'])
  127. df['GW_Verkauf_5'] = (df['Konto_Nr'].str.match(r'^[78]5')) & (df['Kostenstelle'].str.match(r'^[^5]\d'))
  128. df['Kostenstelle'] = np.where(df['GW_Verkauf_5'] == True, '51', df['Kostenstelle'])
  129. df['GW_Verkauf_50'] = (df['Konto_Nr'].str.match(r'^[78]')) & (df['Kostenstelle'].str.match(r'^2'))
  130. df['Kostenträger'] = np.where(df['GW_Verkauf_50'] == True, '52', df['Kostenträger'])
  131. df['Kostenträger'] = np.where((df['GW_Verkauf_50'] == True) & (df['Marke'] == '01'), '50', df['Kostenträger'])
  132. df['Kostenträger'] = np.where(df['Bilanz'] == True, '00', df['Kostenträger'])
  133. df['Konto_5er'] = (df['Konto_Nr'].str.match('^5'))
  134. df['Absatzkanal'] = np.where(df['Konto_5er'] == True, '99', df['Absatzkanal'])
  135. df['Teile_30_60'] = (df['Konto_Nr'].str.match(r'^[578]')) & \
  136. (df['Kostenstelle'].str.match(r'^[3]')) & \
  137. (df['Kostenträger'].str.match(r'^[^6]'))
  138. df['Kostenträger'] = np.where(df['Teile_30_60'] == True, '60', df['Kostenträger'])
  139. df['Service_40_70'] = (df['Konto_Nr'].str.match(r'^[578]')) & \
  140. (df['Kostenstelle'].str.match(r'^[4]')) & \
  141. (df['Kostenträger'].str.match(r'^[^7]'))
  142. df['Kostenträger'] = np.where(df['Service_40_70'] == True, '70', df['Kostenträger'])
  143. from_label = ['Marke', 'Standort', 'Konto_Nr', 'Kostenstelle', 'Absatzkanal', 'Kostenträger']
  144. to_label = ['Make', 'Site', 'Account', 'Origin', 'SalesChannel', 'CostCarrier']
  145. df = df.rename(columns=dict(zip(from_label, to_label)))
  146. makes = makes.rename(columns=dict(zip(from_label, to_label))).to_dict(orient='records')
  147. sites = sites.rename(columns=dict(zip(from_label, to_label))).to_dict(orient='records')
  148. df['CostAccountingString'] = df['Make'] + df['Site'] + df['Origin'] + \
  149. df['SalesChannel'] + df['CostCarrier']
  150. df['IsNumeric'] = df['CostAccountingString'].str.isdigit()
  151. df_invalid = df[df['IsNumeric'] == False]
  152. df_invalid.to_csv(account_invalid + '.2.csv', decimal=',', sep=';', encoding='latin-1', index=False)
  153. export_csv = base_dir + 'export_' + current_year + '-' + current_month + '.csv'
  154. xmlfile = export_csv[:-4] + '.xml'
  155. df.to_csv(export_csv, decimal=',', sep=';', encoding='latin-1', index=False)
  156. df = df[df['IsNumeric']].groupby(['Account', 'Make', 'Site', 'Origin', 'SalesChannel', 'CostCarrier', 'CostAccountingString'], as_index=False).sum()
  157. # Infos ergänzen
  158. df['Decimals'] = 2
  159. df['OpeningBalance'] = 0.0
  160. logging.info(df.shape)
  161. export_file = export_xml(df.to_dict(orient='records'), bk_filter, period_no, makes, sites, xmlfile)
  162. # Join auf Übersetzung - nicht zugeordnet
  163. df_ignored = df_bookings.merge(df_translate, how='left', on='Konto_Nr_Händler')
  164. df_ignored = df_ignored[df_ignored['Konto_Nr_SKR51'].isna()] # [['Konto_Nr_Händler', 'Bookkeep Period', 'amount', 'quantity']]
  165. if not df_ignored.empty:
  166. df_ignored = df_ignored.pivot_table(index=['Konto_Nr_Händler'], columns=['period'], values='amount',
  167. aggfunc=np.sum, margins=True, margins_name='CumulatedYear')
  168. df_ignored.to_csv(account_ignored, decimal=',', sep=';', encoding='latin-1')
  169. return export_file
  170. def export_xml(records, bk_filter, period_no, makes, sites, export_file):
  171. record_elements = ['Account', 'Make', 'Site', 'Origin', 'SalesChannel', 'CostCarrier', 'CostAccountingString',
  172. 'Decimals', 'OpeningBalance'] + list(bk_filter.values())[:period_no] + ['CumulatedYear']
  173. root = ET.Element('HbvData')
  174. h = ET.SubElement(root, 'Header')
  175. for k, v in header(makes, sites).items():
  176. ET.SubElement(h, k).text = str(v)
  177. make_list = ET.SubElement(root, 'MakeList')
  178. for m in makes:
  179. e = ET.SubElement(make_list, 'MakeListEntry')
  180. ET.SubElement(e, 'Make').text = m['Make']
  181. ET.SubElement(e, 'MakeCode').text = m['Marke_HBV']
  182. bm_code_list = ET.SubElement(root, 'BmCodeList')
  183. for s in sites:
  184. e = ET.SubElement(bm_code_list, 'BmCodeEntry')
  185. ET.SubElement(e, 'Make').text = s['Make']
  186. ET.SubElement(e, 'Site').text = s['Site']
  187. ET.SubElement(e, 'BmCode').text = s['Marke_HBV']
  188. record_list = ET.SubElement(root, 'RecordList')
  189. for row in records:
  190. record = ET.SubElement(record_list, 'Record')
  191. for e in record_elements:
  192. child = ET.SubElement(record, e)
  193. field = row.get(e, 0.0)
  194. if str(field) == 'nan':
  195. field = '0'
  196. elif type(field) is float:
  197. field = '{:.0f}'.format(field * 100)
  198. child.text = str(field)
  199. with open(export_file, 'w', encoding='utf-8') as fwh:
  200. fwh.write(minidom.parseString(ET.tostring(root)).toprettyxml(indent=' '))
  201. # with open(export_file, 'wb') as fwh:
  202. # fwh.write(ET.tostring(root))
  203. return export_file
  204. def convert_to_row(node):
  205. return [child.text for child in node]
  206. def convert_xml_to_csv(xmlfile, csvfile):
  207. record_list = ET.parse(xmlfile).getroot().find('RecordList')
  208. header = [child.tag for child in record_list.find('Record')]
  209. bookings = [convert_to_row(node) for node in record_list.findall('Record')]
  210. with open(csvfile, 'w') as fwh:
  211. cwh = csv.writer(fwh, delimiter=';')
  212. cwh.writerow(header)
  213. cwh.writerows(bookings)
  214. return True
  215. def convert_csv_to_xml(csvfile, xmlfile):
  216. makes = [{'Make': '01', 'Marke_HBV': '1844'}]
  217. sites = [{'Make': '01', 'Site': '01', 'Marke_HBV': '1844'}]
  218. with open(csvfile, 'r', encoding='latin-1') as frh:
  219. csv_reader = csv.DictReader(frh, delimiter=';')
  220. export_xml(csv_reader, bookkeep_filter(), 1, makes, sites, xmlfile)
  221. if __name__ == '__main__':
  222. export_file = export_month()
  223. # convert_xml_to_csv(export_file, export_file[:-4] + '.csv')
  224. # convert_xml_to_csv('DE2119_2105_150621_080944.xml', 'DE2119_2105_150621_080944.csv')
  225. # convert_csv_to_xml(base_dir + '../SKR51/maximum.csv', base_dir + '../maximum_2022-01.xml')