gchr.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. import pandas as pd
  2. import numpy as np
  3. import xml.etree.ElementTree as ET
  4. import csv
  5. from xml.dom import minidom
  6. from datetime import datetime
  7. import logging
  8. from pathlib import Path
  9. import os
  10. from enum import Enum, auto
  11. ACCOUNT_INFO = ['Account', 'Make', 'Site', 'Origin', 'SalesChannel', 'CostCarrier', 'CostAccountingString']
  12. class GCHR:
  13. def __init__(self, base_dir) -> None:
  14. self.base_dir = base_dir
  15. self.account_translation = f"{self.base_dir}/data/Kontenrahmen_uebersetzt.csv"
  16. self.account_bookings = list(Path(self.base_dir).joinpath('data').glob('GuV_Bilanz_Salden*.csv'))
  17. self.first_month_of_financial_year = '01'
  18. pd.set_option('display.max_rows', 500)
  19. pd.set_option('display.float_format', lambda x: '%.2f' % x)
  20. def set_bookkeep_period(self, year, month):
  21. self.current_year = year
  22. self.current_month = month
  23. period = f"{year}-{month}"
  24. prot_file = f"{self.export_info_dir}/protokoll_{period}.log"
  25. logging.basicConfig(
  26. filename=prot_file,
  27. filemode='w',
  28. encoding='utf-8',
  29. level=logging.DEBUG,
  30. force=True
  31. )
  32. self.debug_file = f"{self.export_info_dir}/debug_{period}.csv"
  33. self.account_ignored = f"{self.export_info_dir}/ignoriert_{period}.csv"
  34. self.account_invalid = f"{self.export_info_dir}/ungueltig_{period}.csv"
  35. self.last_year = str(int(self.current_year) - 1)
  36. self.last_year2 = str(int(self.current_year) - 2)
  37. self.next_year = str(int(self.current_year) + 1)
  38. def header(self, makes, sites):
  39. return {
  40. 'Country': 'DE',
  41. 'MainBmCode': sites[0]['Standort_HBV'],
  42. 'Month': self.current_month,
  43. 'Year': self.current_year,
  44. 'Currency': 'EUR',
  45. 'NumberOfMakes': len(makes),
  46. 'NumberOfSites': len(sites),
  47. 'ExtractionDate': datetime.now().strftime('%d.%m.%Y'),
  48. 'ExtractionTime': datetime.now().strftime('%H:%M:%S'),
  49. 'BeginFiscalYear': self.first_month_of_financial_year
  50. }
  51. def bookkeep_filter(self):
  52. period = [self.current_year + str(i).zfill(2) for i in range(1, 13)]
  53. if self.first_month_of_financial_year != '01':
  54. if self.first_month_of_financial_year > self.current_month:
  55. period = [self.last_year + str(i).zfill(2) for i in range(1, 13)] + period
  56. else:
  57. period = period + [self.next_year + str(i).zfill(2) for i in range(1, 13)]
  58. fm = int(self.first_month_of_financial_year)
  59. period = period[fm - 1:fm + 12]
  60. period = [self.current_year + '00'] + period
  61. rename_to = ['OpeningBalance'] + ['Period' + str(i).zfill(2) for i in range(1, 13)]
  62. return dict(zip(period, rename_to))
  63. def extract_acct_info(self, df: pd.DataFrame):
  64. acct_info = ['Marke', 'Standort', 'Konto_Nr', 'Kostenstelle', 'Absatzkanal', 'Kostenträger']
  65. df['Konto_Nr_SKR51'] = df.index
  66. df[acct_info] = df['Konto_Nr_SKR51'].str.split('-', 6, expand=True)
  67. return df
  68. def export_all_periods(self):
  69. dt = datetime.now()
  70. prev = str(dt.year - 1)
  71. periods = [(prev, str(x).zfill(2)) for x in range(dt.month, 13)] + \
  72. [(str(dt.year), str(x).zfill(2)) for x in range(1, dt.month)]
  73. for year, month in periods:
  74. filename = self.export_filename_for_period(year, month)
  75. if not Path(filename).exists():
  76. os.makedirs(Path(filename).parent.joinpath('info'), exist_ok=True)
  77. self.export_period(year, month)
  78. def export_period(self, year, month):
  79. self.set_bookkeep_period(year, month)
  80. # Übersetzungstabelle laden
  81. df_translate = pd.read_csv(self.account_translation, decimal=',', sep=';', encoding='latin-1',
  82. converters={i: str for i in range(0, 200)})
  83. logging.info(df_translate.shape)
  84. df_translate['duplicated'] = df_translate.duplicated()
  85. logging.info(df_translate[df_translate['duplicated']])
  86. df_translate = df_translate[['Konto_Nr_Händler', 'Konto_Nr_SKR51', 'Marke',
  87. 'Marke_HBV', 'Standort', 'Standort_HBV']]
  88. row = {
  89. 'Konto_Nr_Händler': '01-01-0861-00-00-00',
  90. 'Konto_Nr_SKR51': '01-01-0861-00-00-00',
  91. 'Marke': '01',
  92. 'Marke_HBV': '',
  93. 'Standort': '01',
  94. 'Standort_HBV': ''
  95. }
  96. df_translate = pd.concat([df_translate, pd.DataFrame.from_records([row])])
  97. # df_translate.drop(columns=['duplicated'], inplace=True)
  98. df_translate.drop_duplicates(inplace=True)
  99. df_translate.set_index('Konto_Nr_Händler')
  100. # Kontensalden laden
  101. df2 = []
  102. for csv_file in self.account_bookings:
  103. df2.append(pd.read_csv(csv_file, decimal=',', sep=';', encoding='latin-1', converters={0: str, 1: str}))
  104. df_bookings = pd.concat(df2)
  105. # Kontensalden auf gegebenen Monat filtern
  106. filter_from = self.current_year + self.first_month_of_financial_year
  107. filter_prev = self.last_year + self.first_month_of_financial_year
  108. if self.first_month_of_financial_year > self.current_month:
  109. filter_from = self.last_year + self.first_month_of_financial_year
  110. filter_prev = self.last_year2 + self.first_month_of_financial_year
  111. filter_to = self.current_year + self.current_month
  112. filter_opening = self.current_year + '00'
  113. filter_prev_opening = self.last_year + '00'
  114. prev_year_closed = True
  115. df_opening_balance = df_bookings[(df_bookings['Bookkeep Period'] == filter_opening)]
  116. if df_opening_balance.shape[0] == 0:
  117. df_opening_balance = df_bookings[(df_bookings['Bookkeep Period'] == filter_prev_opening) |
  118. ((df_bookings['Bookkeep Period'] >= filter_prev) &
  119. (df_bookings['Bookkeep Period'] < filter_from))].copy()
  120. df_opening_balance['Bookkeep Period'] = filter_opening
  121. prev_year_closed = False
  122. # df_opening_balance = df_opening_balance.merge(df_translate, how='inner', on='Konto_Nr_Händler')
  123. df_opening_balance = df_opening_balance[(df_opening_balance['Konto_Nr_Händler'].str.contains(r'-[013]\d\d+-'))]
  124. df_opening_balance['amount'] = (df_opening_balance['Debit Amount'] + df_opening_balance['Credit Amount']).round(2)
  125. # df_opening_balance.drop(columns=['Debit Amount', 'Credit Amount', 'Debit Quantity', 'Credit Quantity'], inplace=True)
  126. # df_opening_balance = df_opening_balance.groupby(['Marke', 'Standort']).sum()
  127. opening_balance = df_opening_balance['amount'].sum().round(2)
  128. logging.info('Gewinn/Verlustvortrag')
  129. logging.info(opening_balance)
  130. if not prev_year_closed:
  131. row = {
  132. 'Konto_Nr_Händler': '01-01-0861-00-00-00',
  133. 'Bookkeep Period': filter_opening,
  134. 'Debit Amount': opening_balance,
  135. 'Credit Amount': 0,
  136. 'Debit Quantity': 0,
  137. 'Credit Quantity': 0,
  138. 'amount': opening_balance
  139. }
  140. df_opening_balance = pd.concat([df_opening_balance, pd.DataFrame.from_records([row])])
  141. df_bookings = df_bookings[(df_bookings['Bookkeep Period'] >= filter_from) & (df_bookings['Bookkeep Period'] <= filter_to)]
  142. df_bookings['amount'] = (df_bookings['Debit Amount'] + df_bookings['Credit Amount']).round(2)
  143. df_stats = df_bookings.copy()
  144. # df_stats = df_stats[df_stats['Konto_Nr_Händler'].str.match(r'-[24578]\d\d\d-')]
  145. df_stats['Konto_Nr_Händler'] = df_stats['Konto_Nr_Händler'].str.replace(r'-(\d\d\d+)-', r'-\1_STK-', regex=True)
  146. df_stats['amount'] = (df_bookings['Debit Quantity'] + df_bookings['Credit Quantity']).round(2)
  147. df_bookings = pd.concat([df_opening_balance, df_bookings, df_stats])
  148. df_bookings = df_bookings[df_bookings['amount'] != 0.00]
  149. if df_bookings.shape[0] == 0:
  150. logging.error('ABBRUCH!!! Keine Daten vorhanden!')
  151. return False
  152. bk_filter = self.bookkeep_filter()
  153. period_no = list(bk_filter.keys()).index(filter_to) + 1
  154. # Spalten konvertieren
  155. df_bookings['period'] = df_bookings['Bookkeep Period'].apply(lambda x: bk_filter[x])
  156. logging.info('df_bookings: ' + str(df_bookings.shape))
  157. # Join auf Übersetzung
  158. df_combined = df_bookings.merge(df_translate, how='inner', on='Konto_Nr_Händler')
  159. logging.info('df_combined: ' + str(df_combined.shape))
  160. # Hack für fehlende Markenzuordnung
  161. df_combined['Fremdmarke'] = (df_combined['Marke_HBV'].str.match(r'^0000'))
  162. df_combined['Marke'] = np.where(df_combined['Fremdmarke'], '99', df_combined['Marke'])
  163. df_combined['Standort_egal'] = (df_combined['Standort_HBV'].str.match(r'^\d\d_'))
  164. df_combined['Standort_HBV'] = np.where(df_combined['Fremdmarke'] | df_combined['Standort_egal'],
  165. '0000', df_combined['Standort_HBV'])
  166. makes = df_combined[['Marke', 'Marke_HBV']].drop_duplicates().sort_values(by=['Marke'])
  167. sites = df_combined[['Marke', 'Standort', 'Standort_HBV']].drop_duplicates().sort_values(by=['Marke', 'Standort'])
  168. # df_combined.to_csv(account_invalid, decimal=',', sep=';', encoding='latin-1', index=False)
  169. # Gruppieren
  170. # df_grouped = df_combined.groupby(['Konto_Nr_SKR51', 'period']).sum()
  171. df = df_combined.pivot_table(index=['Konto_Nr_SKR51'], columns=['period'], values='amount',
  172. aggfunc=np.sum, margins=True, margins_name='CumulatedYear')
  173. logging.info('df_pivot: ' + str(df.shape))
  174. df = self.extract_acct_info(df)
  175. # df = df_translate.reset_index(drop=True).drop(columns=['Kostenträger_Ebene']).drop_duplicates()
  176. logging.info(df.shape)
  177. logging.info(df.columns)
  178. logging.info(df.head())
  179. # df = df.merge(df_translate, how='inner', on='Konto_Nr_SKR51')
  180. logging.info('df: ' + str(df.shape))
  181. df['Bilanz'] = (df['Konto_Nr'].str.match(r'^[013]'))
  182. df['Kontoart'] = np.where(df['Bilanz'], '1', '2')
  183. df['Kontoart'] = np.where(df['Konto_Nr'].str.contains('_STK'), '3', df['Kontoart'])
  184. df['Kontoart'] = np.where(df['Konto_Nr'].str.match(r'^[9]'), '3', df['Kontoart'])
  185. df['Konto_1'] = (df['Konto_Nr'].str.slice(0, 1))
  186. # Hack für fehlende Markenzuordnung
  187. df = df.merge(makes, how='left', on='Marke')
  188. df['Marke'] = np.where(df['Marke_HBV'].isna(), '99', df['Marke'])
  189. df_debug = df.drop(columns=['Bilanz'])
  190. logging.info(df_debug.groupby(['Kontoart']).sum())
  191. logging.info(df_debug.groupby(['Kontoart', 'Konto_1']).sum())
  192. logging.info(df_debug.groupby(['Konto_Nr']).sum())
  193. df_debug.groupby(['Konto_Nr']).sum().to_csv(self.debug_file, decimal=',', sep=';', encoding='latin-1')
  194. # Bereinigung GW-Kostenträger
  195. df['GW_Verkauf_1'] = (df['Konto_Nr'].str.match(r'^[78]0')) & (df['Kostenstelle'].str.match(r'^[^1]\d'))
  196. df['Kostenstelle'] = np.where(df['GW_Verkauf_1'] == True, '11', df['Kostenstelle'])
  197. df['GW_Verkauf_2'] = (df['Konto_Nr'].str.match(r'^[78]1')) & (df['Kostenstelle'].str.match(r'^[^2]\d'))
  198. df['Kostenstelle'] = np.where(df['GW_Verkauf_2'] == True, '21', df['Kostenstelle'])
  199. df['GW_Verkauf_3'] = (df['Konto_Nr'].str.match(r'^[78]3')) & (df['Kostenstelle'].str.match(r'^[^3]\d'))
  200. df['Kostenstelle'] = np.where(df['GW_Verkauf_3'] == True, '31', df['Kostenstelle'])
  201. df['GW_Verkauf_4'] = (df['Konto_Nr'].str.match(r'^[78]4')) & (df['Kostenstelle'].str.match(r'^[^4]\d'))
  202. df['Kostenstelle'] = np.where(df['GW_Verkauf_4'] == True, '41', df['Kostenstelle'])
  203. df['GW_Verkauf_5'] = (df['Konto_Nr'].str.match(r'^[78]5')) & (df['Kostenstelle'].str.match(r'^[^5]\d'))
  204. df['Kostenstelle'] = np.where(df['GW_Verkauf_5'] == True, '51', df['Kostenstelle'])
  205. df['GW_Verkauf_50'] = (df['Konto_Nr'].str.match(r'^[78]')) & (df['Kostenstelle'].str.match(r'^2'))
  206. df['Kostenträger'] = np.where(df['GW_Verkauf_50'] == True, '52', df['Kostenträger'])
  207. df['Kostenträger'] = np.where((df['GW_Verkauf_50'] == True) & (df['Marke'] == '01'), '50', df['Kostenträger'])
  208. df['GW_Stk_50'] = (df['Konto_Nr'].str.match(r'^9130')) & (df['Kostenstelle'].str.match(r'^2'))
  209. df['Kostenträger'] = np.where(df['GW_Stk_50'] == True, '52', df['Kostenträger'])
  210. df['Kostenträger'] = np.where((df['GW_Stk_50'] == True) & (df['Marke'] == '01'), '50', df['Kostenträger'])
  211. df['Kostenträger'] = np.where(df['Bilanz'] == True, '00', df['Kostenträger'])
  212. df['Konto_5er'] = (df['Konto_Nr'].str.match('^5')) | (df['Konto_Nr'].str.match('^9143'))
  213. df['Absatzkanal'] = np.where(df['Konto_5er'] == True, '99', df['Absatzkanal'])
  214. df['Teile_30_60'] = (df['Konto_Nr'].str.match(r'^[578]')) & \
  215. (df['Kostenstelle'].str.match(r'^[3]')) & \
  216. (df['Kostenträger'].str.match(r'^[^6]'))
  217. df['Kostenträger'] = np.where(df['Teile_30_60'] == True, '60', df['Kostenträger'])
  218. df['Service_40_70'] = (df['Konto_Nr'].str.match(r'^[578]')) & \
  219. (df['Kostenstelle'].str.match(r'^[4]')) & \
  220. (df['Kostenträger'].str.match(r'^[^7]'))
  221. df['Kostenträger'] = np.where(df['Service_40_70'] == True, '70', df['Kostenträger'])
  222. from_label = ['Marke', 'Standort', 'Konto_Nr', 'Kostenstelle', 'Absatzkanal', 'Kostenträger']
  223. to_label = ['Make', 'Site', 'Account', 'Origin', 'SalesChannel', 'CostCarrier']
  224. df = df.rename(columns=dict(zip(from_label, to_label)))
  225. makes = makes.rename(columns=dict(zip(from_label, to_label))).to_dict(orient='records')
  226. sites = sites.rename(columns=dict(zip(from_label, to_label))).to_dict(orient='records')
  227. df['CostAccountingString'] = df['Make'] + df['Site'] + df['Origin'] + \
  228. df['SalesChannel'] + df['CostCarrier']
  229. df['IsNumeric'] = (df['CostAccountingString'].str.isdigit()) & (df['Account'].str.isdigit()) & (df['Account'].str.len() == 4)
  230. df_invalid = df[df['IsNumeric'] == False]
  231. df_invalid.to_csv(self.account_invalid, decimal=',', sep=';', encoding='latin-1', index=False)
  232. export_csv = self.export_filename[:-4] + '.csv'
  233. df.to_csv(export_csv, decimal=',', sep=';', encoding='latin-1', index=False)
  234. df = df[df['IsNumeric'] != False].groupby(ACCOUNT_INFO, as_index=False).sum()
  235. # Infos ergänzen
  236. df['Decimals'] = 2
  237. # df['OpeningBalance'] = 0.0
  238. logging.info(df.shape)
  239. self.export_xml(df.to_dict(orient='records'), bk_filter, period_no, makes, sites)
  240. # Join auf Übersetzung - nicht zugeordnet
  241. df_ignored = df_bookings.merge(df_translate, how='left', on='Konto_Nr_Händler')
  242. df_ignored = df_ignored[df_ignored['Konto_Nr_SKR51'].isna()] # [['Konto_Nr_Händler', 'Bookkeep Period', 'amount', 'quantity']]
  243. if not df_ignored.empty:
  244. df_ignored = df_ignored.pivot_table(index=['Konto_Nr_Händler'], columns=['period'], values='amount',
  245. aggfunc=np.sum, margins=True, margins_name='CumulatedYear')
  246. df_ignored.to_csv(self.account_ignored, decimal=',', sep=';', encoding='latin-1')
  247. return self.export_filename
  248. @property
  249. def export_filename(self):
  250. return self.export_filename_for_period(self.current_year, self.current_month)
  251. @property
  252. def export_info_dir(self):
  253. return f"{self.base_dir}/Export/{self.current_year}/info/"
  254. def export_filename_for_period(self, year, month):
  255. return f"{self.base_dir}/Export/{year}/export_{year}-{month}.xml"
  256. def export_xml(self, records, bk_filter, period_no, makes, sites):
  257. record_elements = ACCOUNT_INFO + ['Decimals'] + \
  258. list(bk_filter.values())[:period_no] + ['CumulatedYear']
  259. root = ET.Element('HbvData')
  260. h = ET.SubElement(root, 'Header')
  261. for k, v in self.header(makes, sites).items():
  262. ET.SubElement(h, k).text = str(v)
  263. make_list = ET.SubElement(root, 'MakeList')
  264. for m in makes:
  265. e = ET.SubElement(make_list, 'MakeListEntry')
  266. ET.SubElement(e, 'Make').text = m['Make']
  267. ET.SubElement(e, 'MakeCode').text = m['Marke_HBV']
  268. bm_code_list = ET.SubElement(root, 'BmCodeList')
  269. for s in sites:
  270. e = ET.SubElement(bm_code_list, 'BmCodeEntry')
  271. ET.SubElement(e, 'Make').text = s['Make']
  272. ET.SubElement(e, 'Site').text = s['Site']
  273. ET.SubElement(e, 'BmCode').text = s['Standort_HBV']
  274. record_list = ET.SubElement(root, 'RecordList')
  275. for row in records:
  276. record = ET.SubElement(record_list, 'Record')
  277. for e in record_elements:
  278. child = ET.SubElement(record, e)
  279. field = row.get(e, 0.0)
  280. if str(field) == 'nan':
  281. field = '0'
  282. elif type(field) is float:
  283. field = '{:.0f}'.format(field * 100)
  284. child.text = str(field)
  285. with open(self.export_filename, 'w', encoding='utf-8') as fwh:
  286. fwh.write(minidom.parseString(ET.tostring(root)).toprettyxml(indent=' '))
  287. def convert_to_row(self, node):
  288. return [child.text for child in node]
  289. def convert_xml_to_csv(self, xmlfile, csvfile):
  290. record_list = ET.parse(xmlfile).getroot().find('RecordList')
  291. header = [child.tag for child in record_list.find('Record')]
  292. bookings = [self.convert_to_row(node) for node in record_list.findall('Record')]
  293. with open(csvfile, 'w') as fwh:
  294. cwh = csv.writer(fwh, delimiter=';')
  295. cwh.writerow(header)
  296. cwh.writerows(bookings)
  297. return True
  298. def convert_csv_to_xml(self, csvfile, xmlfile):
  299. makes = [{'Make': '01', 'Marke_HBV': '1844'}]
  300. sites = [{'Make': '01', 'Site': '01', 'Marke_HBV': '1844'}]
  301. with open(csvfile, 'r', encoding='latin-1') as frh:
  302. csv_reader = csv.DictReader(frh, delimiter=';')
  303. self.export_xml(csv_reader, self.bookkeep_filter(), 1, makes, sites, xmlfile)
  304. class Kunden(Enum):
  305. Altermann = auto()
  306. Barth_und_Frey = auto()
  307. Hannuschka = auto()
  308. Koenig_und_Partner = auto()
  309. Luchtenberg = auto()
  310. Russig_Neustadt_deop01 = auto()
  311. Russig_Neustadt_deop02 = auto()
  312. Siebrecht = auto()
  313. def gchr_local(base_dir):
  314. for path in Path(base_dir).glob('*'):
  315. if path.is_dir():
  316. print(path.name)
  317. gchr_export(str(path))
  318. def gchr_export(base_dir):
  319. gchr = GCHR(base_dir)
  320. gchr.export_all_periods()
  321. if __name__ == '__main__':
  322. base_dir = os.getcwd() + '/gcstruct/Kunden'
  323. if Path(base_dir).exists():
  324. gchr_local(base_dir)
  325. else:
  326. gchr_export(os.getcwd())