gchr.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. import pandas as pd
  2. import numpy as np
  3. import xml.etree.ElementTree as ET
  4. import csv
  5. from xml.dom import minidom
  6. from datetime import datetime
  7. import logging
  8. from pathlib import Path
  9. from enum import Enum, auto
  10. ACCOUNT_INFO = ['Account', 'Make', 'Site', 'Origin', 'SalesChannel', 'CostCarrier', 'CostAccountingString']
  11. class GCHR:
  12. def __init__(self, project_name) -> None:
  13. self.base_dir = f'/home/robert/projekte/python/gcstruct/Kunden/{project_name}/'
  14. self.account_translation = self.base_dir + 'Kontenrahmen_uebersetzt.csv'
  15. self.account_bookings = Path(self.base_dir).glob('GuV_Bilanz_Salden*.csv')
  16. self.first_month_of_financial_year = '01'
  17. pd.set_option('display.max_rows', 500)
  18. pd.set_option('display.float_format', lambda x: '%.2f' % x)
  19. def set_bookkeep_period(self, year, month):
  20. self.current_year = year
  21. self.current_month = month
  22. period = f'{year}-{month}'
  23. prot_file = self.base_dir + f'protokoll_{period}.log'
  24. logging.basicConfig(
  25. filename=prot_file,
  26. filemode='w',
  27. encoding='utf-8',
  28. level=logging.DEBUG
  29. )
  30. self.debug_file = self.base_dir + f'debug_{period}.csv'
  31. self.account_ignored = self.base_dir + f'ignoriert_{period}.csv'
  32. self.account_invalid = self.base_dir + f'ungueltig_{period}.csv'
  33. self.last_year = str(int(self.current_year) - 1)
  34. self.last_year2 = str(int(self.current_year) - 2)
  35. self.next_year = str(int(self.current_year) + 1)
  36. def header(self, makes, sites):
  37. return {
  38. 'Country': 'DE',
  39. 'MainBmCode': sites[0]['Standort_HBV'],
  40. 'Month': self.current_month,
  41. 'Year': self.current_year,
  42. 'Currency': 'EUR',
  43. 'NumberOfMakes': len(makes),
  44. 'NumberOfSites': len(sites),
  45. 'ExtractionDate': datetime.now().strftime('%d.%m.%Y'),
  46. 'ExtractionTime': datetime.now().strftime('%H:%M:%S'),
  47. 'BeginFiscalYear': self.first_month_of_financial_year
  48. }
  49. def bookkeep_filter(self):
  50. period = [self.current_year + str(i).zfill(2) for i in range(1, 13)]
  51. if self.first_month_of_financial_year != '01':
  52. if self.first_month_of_financial_year > self.current_month:
  53. period = [self.last_year + str(i).zfill(2) for i in range(1, 13)] + period
  54. else:
  55. period = period + [self.next_year + str(i).zfill(2) for i in range(1, 13)]
  56. fm = int(self.first_month_of_financial_year)
  57. period = period[fm - 1:fm + 12]
  58. period = [self.current_year + '00'] + period
  59. rename_to = ['OpeningBalance'] + ['Period' + str(i).zfill(2) for i in range(1, 13)]
  60. return dict(zip(period, rename_to))
  61. def extract_acct_info(self, df: pd.DataFrame):
  62. acct_info = ['Marke', 'Standort', 'Konto_Nr', 'Kostenstelle', 'Absatzkanal', 'Kostenträger']
  63. df['Konto_Nr_SKR51'] = df.index
  64. df[acct_info] = df['Konto_Nr_SKR51'].str.split('-', 6, expand=True)
  65. return df
  66. def export_period(self, year, month):
  67. self.set_bookkeep_period(year, month)
  68. # Übersetzungstabelle laden
  69. df_translate = pd.read_csv(self.account_translation, decimal=',', sep=';', encoding='latin-1',
  70. converters={i: str for i in range(0, 200)})
  71. logging.info(df_translate.shape)
  72. df_translate['duplicated'] = df_translate.duplicated()
  73. logging.info(df_translate[df_translate['duplicated']])
  74. df_translate = df_translate[['Konto_Nr_Händler', 'Konto_Nr_SKR51', 'Marke',
  75. 'Marke_HBV', 'Standort', 'Standort_HBV']]
  76. row = {
  77. 'Konto_Nr_Händler': '01-01-0861-00-00-00',
  78. 'Konto_Nr_SKR51': '01-01-0861-00-00-00',
  79. 'Marke': '01',
  80. 'Marke_HBV': '',
  81. 'Standort': '01',
  82. 'Standort_HBV': ''
  83. }
  84. df_translate.append(row, ignore_index=True)
  85. # df_translate.drop(columns=['duplicated'], inplace=True)
  86. df_translate.drop_duplicates(inplace=True)
  87. df_translate.set_index('Konto_Nr_Händler')
  88. # Kontensalden laden
  89. df2 = []
  90. for csv_file in self.account_bookings:
  91. df2.append(pd.read_csv(csv_file, decimal=',', sep=';', encoding='latin-1', converters={0: str, 1: str}))
  92. df_bookings = pd.concat(df2)
  93. # Kontensalden auf gegebenen Monat filtern
  94. filter_from = self.current_year + self.first_month_of_financial_year
  95. filter_prev = self.last_year + self.first_month_of_financial_year
  96. if self.first_month_of_financial_year > self.current_month:
  97. filter_from = self.last_year + self.first_month_of_financial_year
  98. filter_prev = self.last_year2 + self.first_month_of_financial_year
  99. filter_to = self.current_year + self.current_month
  100. filter_opening = self.current_year + '00'
  101. filter_prev_opening = self.last_year + '00'
  102. prev_year_closed = True
  103. df_opening_balance = df_bookings[(df_bookings['Bookkeep Period'] == filter_opening)]
  104. if df_opening_balance.shape[0] == 0:
  105. df_opening_balance = df_bookings[(df_bookings['Bookkeep Period'] == filter_prev_opening) |
  106. ((df_bookings['Bookkeep Period'] >= filter_prev) &
  107. (df_bookings['Bookkeep Period'] < filter_from))].copy()
  108. df_opening_balance['Bookkeep Period'] = filter_opening
  109. prev_year_closed = False
  110. # df_opening_balance = df_opening_balance.merge(df_translate, how='inner', on='Konto_Nr_Händler')
  111. df_opening_balance = df_opening_balance[(df_opening_balance['Konto_Nr_Händler'].str.contains(r'-[013]\d\d+-'))]
  112. df_opening_balance['amount'] = (df_opening_balance['Debit Amount'] + df_opening_balance['Credit Amount']).round(2)
  113. # df_opening_balance.drop(columns=['Debit Amount', 'Credit Amount', 'Debit Quantity', 'Credit Quantity'], inplace=True)
  114. # df_opening_balance = df_opening_balance.groupby(['Marke', 'Standort']).sum()
  115. opening_balance = df_opening_balance['amount'].sum().round(2)
  116. logging.info('Gewinn/Verlustvortrag')
  117. logging.info(opening_balance)
  118. if not prev_year_closed:
  119. row = {
  120. 'Konto_Nr_Händler': '01-01-0861-00-00-00',
  121. 'Bookkeep Period': filter_opening,
  122. 'Debit Amount': opening_balance,
  123. 'Credit Amount': 0,
  124. 'Debit Quantity': 0,
  125. 'Credit Quantity': 0,
  126. 'amount': opening_balance
  127. }
  128. df_opening_balance = df_opening_balance.append(row, ignore_index=True)
  129. df_bookings = df_bookings[(df_bookings['Bookkeep Period'] >= filter_from) & (df_bookings['Bookkeep Period'] <= filter_to)]
  130. df_bookings['amount'] = (df_bookings['Debit Amount'] + df_bookings['Credit Amount']).round(2)
  131. df_stats = df_bookings.copy()
  132. # df_stats = df_stats[df_stats['Konto_Nr_Händler'].str.match(r'-[24578]\d\d\d-')]
  133. df_stats['Konto_Nr_Händler'] = df_stats['Konto_Nr_Händler'].str.replace(r'-(\d\d\d+)-', r'-\1_STK-', regex=True)
  134. df_stats['amount'] = (df_bookings['Debit Quantity'] + df_bookings['Credit Quantity']).round(2)
  135. df_bookings = pd.concat([df_opening_balance, df_bookings, df_stats])
  136. df_bookings = df_bookings[df_bookings['amount'] != 0.00]
  137. bk_filter = self.bookkeep_filter()
  138. period_no = list(bk_filter.keys()).index(filter_to) + 1
  139. # Spalten konvertieren
  140. df_bookings['period'] = df_bookings['Bookkeep Period'].apply(lambda x: bk_filter[x])
  141. logging.info('df_bookings: ' + str(df_bookings.shape))
  142. # Join auf Übersetzung
  143. df_combined = df_bookings.merge(df_translate, how='inner', on='Konto_Nr_Händler')
  144. logging.info('df_combined: ' + str(df_combined.shape))
  145. # Hack für fehlende Markenzuordnung
  146. df_combined['Fremdmarke'] = (df_combined['Marke_HBV'].str.match(r'^0000'))
  147. df_combined['Marke'] = np.where(df_combined['Fremdmarke'], '99', df_combined['Marke'])
  148. df_combined['Standort_egal'] = (df_combined['Standort_HBV'].str.match(r'^\d\d_'))
  149. df_combined['Standort_HBV'] = np.where(df_combined['Fremdmarke'] | df_combined['Standort_egal'], '0000', df_combined['Standort_HBV'])
  150. makes = df_combined[['Marke', 'Marke_HBV']].drop_duplicates().sort_values(by=['Marke'])
  151. sites = df_combined[['Marke', 'Standort', 'Standort_HBV']].drop_duplicates().sort_values(by=['Marke', 'Standort'])
  152. # df_combined.to_csv(account_invalid, decimal=',', sep=';', encoding='latin-1', index=False)
  153. # Gruppieren
  154. # df_grouped = df_combined.groupby(['Konto_Nr_SKR51', 'period']).sum()
  155. df = df_combined.pivot_table(index=['Konto_Nr_SKR51'], columns=['period'], values='amount',
  156. aggfunc=np.sum, margins=True, margins_name='CumulatedYear')
  157. logging.info('df_pivot: ' + str(df.shape))
  158. df = self.extract_acct_info(df)
  159. # df = df_translate.reset_index(drop=True).drop(columns=['Kostenträger_Ebene']).drop_duplicates()
  160. logging.info(df.shape)
  161. logging.info(df.columns)
  162. logging.info(df.head())
  163. # df = df.merge(df_translate, how='inner', on='Konto_Nr_SKR51')
  164. logging.info('df: ' + str(df.shape))
  165. df['Bilanz'] = (df['Konto_Nr'].str.match(r'^[013]'))
  166. df['Kontoart'] = np.where(df['Bilanz'], '1', '2')
  167. df['Kontoart'] = np.where(df['Konto_Nr'].str.contains('_STK'), '3', df['Kontoart'])
  168. df['Kontoart'] = np.where(df['Konto_Nr'].str.match(r'^[9]'), '3', df['Kontoart'])
  169. df['Konto_1'] = (df['Konto_Nr'].str.slice(0, 1))
  170. # Hack für fehlende Markenzuordnung
  171. df = df.merge(makes, how='left', on='Marke')
  172. df['Marke'] = np.where(df['Marke_HBV'].isna(), '99', df['Marke'])
  173. df_debug = df.drop(columns=['Bilanz'])
  174. logging.info(df_debug.groupby(['Kontoart']).sum())
  175. logging.info(df_debug.groupby(['Kontoart', 'Konto_1']).sum())
  176. logging.info(df_debug.groupby(['Konto_Nr']).sum())
  177. df_debug.groupby(['Konto_Nr']).sum().to_csv(self.debug_file, decimal=',', sep=';', encoding='latin-1')
  178. # Bereinigung GW-Kostenträger
  179. df['GW_Verkauf_1'] = (df['Konto_Nr'].str.match(r'^[78]0')) & (df['Kostenstelle'].str.match(r'^[^1]\d'))
  180. df['Kostenstelle'] = np.where(df['GW_Verkauf_1'] == True, '11', df['Kostenstelle'])
  181. df['GW_Verkauf_2'] = (df['Konto_Nr'].str.match(r'^[78]1')) & (df['Kostenstelle'].str.match(r'^[^2]\d'))
  182. df['Kostenstelle'] = np.where(df['GW_Verkauf_2'] == True, '21', df['Kostenstelle'])
  183. df['GW_Verkauf_3'] = (df['Konto_Nr'].str.match(r'^[78]3')) & (df['Kostenstelle'].str.match(r'^[^3]\d'))
  184. df['Kostenstelle'] = np.where(df['GW_Verkauf_3'] == True, '31', df['Kostenstelle'])
  185. df['GW_Verkauf_4'] = (df['Konto_Nr'].str.match(r'^[78]4')) & (df['Kostenstelle'].str.match(r'^[^4]\d'))
  186. df['Kostenstelle'] = np.where(df['GW_Verkauf_4'] == True, '41', df['Kostenstelle'])
  187. df['GW_Verkauf_5'] = (df['Konto_Nr'].str.match(r'^[78]5')) & (df['Kostenstelle'].str.match(r'^[^5]\d'))
  188. df['Kostenstelle'] = np.where(df['GW_Verkauf_5'] == True, '51', df['Kostenstelle'])
  189. df['GW_Verkauf_50'] = (df['Konto_Nr'].str.match(r'^[78]')) & (df['Kostenstelle'].str.match(r'^2'))
  190. df['Kostenträger'] = np.where(df['GW_Verkauf_50'] == True, '52', df['Kostenträger'])
  191. df['Kostenträger'] = np.where((df['GW_Verkauf_50'] == True) & (df['Marke'] == '01'), '50', df['Kostenträger'])
  192. df['GW_Stk_50'] = (df['Konto_Nr'].str.match(r'^9130')) & (df['Kostenstelle'].str.match(r'^2'))
  193. df['Kostenträger'] = np.where(df['GW_Stk_50'] == True, '52', df['Kostenträger'])
  194. df['Kostenträger'] = np.where((df['GW_Stk_50'] == True) & (df['Marke'] == '01'), '50', df['Kostenträger'])
  195. df['Kostenträger'] = np.where(df['Bilanz'] == True, '00', df['Kostenträger'])
  196. df['Konto_5er'] = (df['Konto_Nr'].str.match('^5')) | (df['Konto_Nr'].str.match('^9143'))
  197. df['Absatzkanal'] = np.where(df['Konto_5er'] == True, '99', df['Absatzkanal'])
  198. df['Teile_30_60'] = (df['Konto_Nr'].str.match(r'^[578]')) & \
  199. (df['Kostenstelle'].str.match(r'^[3]')) & \
  200. (df['Kostenträger'].str.match(r'^[^6]'))
  201. df['Kostenträger'] = np.where(df['Teile_30_60'] == True, '60', df['Kostenträger'])
  202. df['Service_40_70'] = (df['Konto_Nr'].str.match(r'^[578]')) & \
  203. (df['Kostenstelle'].str.match(r'^[4]')) & \
  204. (df['Kostenträger'].str.match(r'^[^7]'))
  205. df['Kostenträger'] = np.where(df['Service_40_70'] == True, '70', df['Kostenträger'])
  206. from_label = ['Marke', 'Standort', 'Konto_Nr', 'Kostenstelle', 'Absatzkanal', 'Kostenträger']
  207. to_label = ['Make', 'Site', 'Account', 'Origin', 'SalesChannel', 'CostCarrier']
  208. df = df.rename(columns=dict(zip(from_label, to_label)))
  209. makes = makes.rename(columns=dict(zip(from_label, to_label))).to_dict(orient='records')
  210. sites = sites.rename(columns=dict(zip(from_label, to_label))).to_dict(orient='records')
  211. df['CostAccountingString'] = df['Make'] + df['Site'] + df['Origin'] + \
  212. df['SalesChannel'] + df['CostCarrier']
  213. df['IsNumeric'] = (df['CostAccountingString'].str.isdigit()) & (df['Account'].str.isdigit()) & (df['Account'].str.len() == 4)
  214. df_invalid = df[df['IsNumeric'] == False]
  215. df_invalid.to_csv(self.account_invalid, decimal=',', sep=';', encoding='latin-1', index=False)
  216. export_csv = self.base_dir + 'export_' + self.current_year + '-' + self.current_month + '.csv'
  217. xmlfile = export_csv[:-4] + '.xml'
  218. df.to_csv(export_csv, decimal=',', sep=';', encoding='latin-1', index=False)
  219. df = df[df['IsNumeric'] != False].groupby(ACCOUNT_INFO, as_index=False).sum()
  220. # Infos ergänzen
  221. df['Decimals'] = 2
  222. # df['OpeningBalance'] = 0.0
  223. logging.info(df.shape)
  224. export_file = self.export_xml(df.to_dict(orient='records'), bk_filter, period_no, makes, sites, xmlfile)
  225. # Join auf Übersetzung - nicht zugeordnet
  226. df_ignored = df_bookings.merge(df_translate, how='left', on='Konto_Nr_Händler')
  227. df_ignored = df_ignored[df_ignored['Konto_Nr_SKR51'].isna()] # [['Konto_Nr_Händler', 'Bookkeep Period', 'amount', 'quantity']]
  228. if not df_ignored.empty:
  229. df_ignored = df_ignored.pivot_table(index=['Konto_Nr_Händler'], columns=['period'], values='amount',
  230. aggfunc=np.sum, margins=True, margins_name='CumulatedYear')
  231. df_ignored.to_csv(self.account_ignored, decimal=',', sep=';', encoding='latin-1')
  232. return export_file
  233. def export_xml(self, records, bk_filter, period_no, makes, sites, export_file):
  234. record_elements = ACCOUNT_INFO + ['Decimals'] + \
  235. list(bk_filter.values())[:period_no] + ['CumulatedYear']
  236. root = ET.Element('HbvData')
  237. h = ET.SubElement(root, 'Header')
  238. for k, v in self.header(makes, sites).items():
  239. ET.SubElement(h, k).text = str(v)
  240. make_list = ET.SubElement(root, 'MakeList')
  241. for m in makes:
  242. e = ET.SubElement(make_list, 'MakeListEntry')
  243. ET.SubElement(e, 'Make').text = m['Make']
  244. ET.SubElement(e, 'MakeCode').text = m['Marke_HBV']
  245. bm_code_list = ET.SubElement(root, 'BmCodeList')
  246. for s in sites:
  247. e = ET.SubElement(bm_code_list, 'BmCodeEntry')
  248. ET.SubElement(e, 'Make').text = s['Make']
  249. ET.SubElement(e, 'Site').text = s['Site']
  250. ET.SubElement(e, 'BmCode').text = s['Standort_HBV']
  251. record_list = ET.SubElement(root, 'RecordList')
  252. for row in records:
  253. record = ET.SubElement(record_list, 'Record')
  254. for e in record_elements:
  255. child = ET.SubElement(record, e)
  256. field = row.get(e, 0.0)
  257. if str(field) == 'nan':
  258. field = '0'
  259. elif type(field) is float:
  260. field = '{:.0f}'.format(field * 100)
  261. child.text = str(field)
  262. with open(export_file, 'w', encoding='utf-8') as fwh:
  263. fwh.write(minidom.parseString(ET.tostring(root)).toprettyxml(indent=' '))
  264. # with open(export_file, 'wb') as fwh:
  265. # fwh.write(ET.tostring(root))
  266. return export_file
  267. def convert_to_row(self, node):
  268. return [child.text for child in node]
  269. def convert_xml_to_csv(self, xmlfile, csvfile):
  270. record_list = ET.parse(xmlfile).getroot().find('RecordList')
  271. header = [child.tag for child in record_list.find('Record')]
  272. bookings = [self.convert_to_row(node) for node in record_list.findall('Record')]
  273. with open(csvfile, 'w') as fwh:
  274. cwh = csv.writer(fwh, delimiter=';')
  275. cwh.writerow(header)
  276. cwh.writerows(bookings)
  277. return True
  278. def convert_csv_to_xml(self, csvfile, xmlfile):
  279. makes = [{'Make': '01', 'Marke_HBV': '1844'}]
  280. sites = [{'Make': '01', 'Site': '01', 'Marke_HBV': '1844'}]
  281. with open(csvfile, 'r', encoding='latin-1') as frh:
  282. csv_reader = csv.DictReader(frh, delimiter=';')
  283. self.export_xml(csv_reader, self.bookkeep_filter(), 1, makes, sites, xmlfile)
  284. class Kunden(Enum):
  285. Altermann = auto()
  286. Barth_und_Frey = auto()
  287. Hannuschka = auto()
  288. Koenig_und_Partner = auto()
  289. Luchtenberg = auto()
  290. Russig_Neustadt_deop01 = auto()
  291. Russig_Neustadt_deop02 = auto()
  292. Siebrecht = auto()
  293. if __name__ == '__main__':
  294. gchr = GCHR(Kunden.Russig_Neustadt_deop02.name)
  295. export_file = gchr.export_period('2022', '04')
  296. # convert_xml_to_csv(export_file, export_file[:-4] + '.csv')
  297. # convert_xml_to_csv('DE2119_2105_150621_080944.xml', 'DE2119_2105_150621_080944.csv')
  298. # convert_csv_to_xml(base_dir + '../SKR51/maximum.csv', base_dir + '../maximum_2022-01.xml')