gchr.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. import pandas as pd
  2. import numpy as np
  3. import xml.etree.ElementTree as ET
  4. import csv
  5. from xml.dom import minidom
  6. from datetime import datetime
  7. import logging
  8. from pathlib import Path
  9. import os
  10. from enum import Enum, auto
  11. ACCOUNT_INFO = ['Account', 'Make', 'Site', 'Origin', 'SalesChannel', 'CostCarrier', 'CostAccountingString']
  12. class GCHR:
  13. def __init__(self, base_dir) -> None:
  14. self.base_dir = base_dir
  15. self.account_translation = f"{self.base_dir}/data/Kontenrahmen_uebersetzt.csv"
  16. self.account_bookings = list(Path(self.base_dir).joinpath('data').glob('GuV_Bilanz_Salden*.csv'))
  17. self.first_month_of_financial_year = '01'
  18. pd.set_option('display.max_rows', 500)
  19. pd.set_option('display.float_format', lambda x: '%.2f' % x)
  20. def set_bookkeep_period(self, year, month):
  21. self.current_year = year
  22. self.current_month = month
  23. period = f"{year}-{month}"
  24. prot_file = f"{self.export_info_dir}/protokoll_{period}.log"
  25. logging.basicConfig(
  26. filename=prot_file,
  27. filemode='w',
  28. encoding='utf-8',
  29. level=logging.DEBUG,
  30. force=True
  31. )
  32. self.debug_file = f"{self.export_info_dir}/debug_{period}.csv"
  33. self.account_ignored = f"{self.export_info_dir}/ignoriert_{period}.csv"
  34. self.account_invalid = f"{self.export_info_dir}/ungueltig_{period}.csv"
  35. self.last_year = str(int(self.current_year) - 1)
  36. self.last_year2 = str(int(self.current_year) - 2)
  37. self.next_year = str(int(self.current_year) + 1)
  38. def header(self, makes, sites):
  39. return {
  40. 'Country': 'DE',
  41. 'MainBmCode': sites[0]['Standort_HBV'],
  42. 'Month': self.current_month,
  43. 'Year': self.current_year,
  44. 'Currency': 'EUR',
  45. 'NumberOfMakes': len(makes),
  46. 'NumberOfSites': len(sites),
  47. 'ExtractionDate': datetime.now().strftime('%d.%m.%Y'),
  48. 'ExtractionTime': datetime.now().strftime('%H:%M:%S'),
  49. 'BeginFiscalYear': self.first_month_of_financial_year
  50. }
  51. def bookkeep_filter(self):
  52. period = [self.current_year + str(i).zfill(2) for i in range(1, 13)]
  53. if self.first_month_of_financial_year != '01':
  54. if self.first_month_of_financial_year > self.current_month:
  55. period = [self.last_year + str(i).zfill(2) for i in range(1, 13)] + period
  56. else:
  57. period = period + [self.next_year + str(i).zfill(2) for i in range(1, 13)]
  58. fm = int(self.first_month_of_financial_year)
  59. period = period[fm - 1:fm + 12]
  60. period = [self.current_year + '00'] + period
  61. rename_to = ['OpeningBalance'] + ['Period' + str(i).zfill(2) for i in range(1, 13)]
  62. return dict(zip(period, rename_to))
  63. def extract_acct_info(self, df: pd.DataFrame):
  64. acct_info = ['Marke', 'Standort', 'Konto_Nr', 'Kostenstelle', 'Absatzkanal', 'Kostenträger']
  65. df['Konto_Nr_SKR51'] = df.index
  66. df[acct_info] = df['Konto_Nr_SKR51'].str.split('-', 6, expand=True)
  67. return df
  68. def export_all_periods(self):
  69. dt = datetime.now()
  70. prev = str(dt.year - 1)
  71. periods = [(prev, str(x).zfill(2)) for x in range(dt.month, 13)] + \
  72. [(str(dt.year), str(x).zfill(2)) for x in range(1, dt.month)]
  73. for year, month in periods:
  74. filename = self.export_filename_for_period(year, month)
  75. if not Path(filename).exists():
  76. os.makedirs(Path(filename).parent.joinpath('info'), exist_ok=True)
  77. self.export_period(year, month)
  78. def export_period(self, year, month):
  79. self.set_bookkeep_period(year, month)
  80. # Übersetzungstabelle laden
  81. df_translate = pd.read_csv(self.account_translation, decimal=',', sep=';', encoding='latin-1',
  82. converters={i: str for i in range(0, 200)})
  83. logging.info(df_translate.shape)
  84. df_translate['duplicated'] = df_translate.duplicated()
  85. logging.info(df_translate[df_translate['duplicated']])
  86. df_translate = df_translate[['Konto_Nr_Händler', 'Konto_Nr_SKR51', 'Marke',
  87. 'Marke_HBV', 'Standort', 'Standort_HBV']]
  88. row = df_translate[['Marke', 'Marke_HBV', 'Standort', 'Standort_HBV']] \
  89. .drop_duplicates().sort_values(by=['Marke', 'Standort']).iloc[:1].to_dict(orient='records')[0]
  90. row['Konto_Nr_Händler'] = '01-01-0861-00-00-00'
  91. row['Konto_Nr_SKR51'] = '01-01-0861-00-00-00'
  92. df_translate = pd.concat([df_translate, pd.DataFrame.from_records([row])])
  93. # print(df_translate.tail())
  94. # df_translate.drop(columns=['duplicated'], inplace=True)
  95. df_translate.drop_duplicates(inplace=True)
  96. df_translate.set_index('Konto_Nr_Händler')
  97. # Kontensalden laden
  98. df2 = []
  99. for csv_file in self.account_bookings:
  100. df2.append(pd.read_csv(csv_file, decimal=',', sep=';', encoding='latin-1', converters={0: str, 1: str}))
  101. df_bookings = pd.concat(df2)
  102. # Kontensalden auf gegebenen Monat filtern
  103. filter_from = self.current_year + self.first_month_of_financial_year
  104. filter_prev = self.last_year + self.first_month_of_financial_year
  105. if self.first_month_of_financial_year > self.current_month:
  106. filter_from = self.last_year + self.first_month_of_financial_year
  107. filter_prev = self.last_year2 + self.first_month_of_financial_year
  108. filter_to = self.current_year + self.current_month
  109. filter_opening = self.current_year + '00'
  110. filter_prev_opening = self.last_year + '00'
  111. prev_year_closed = True
  112. df_opening_balance = df_bookings[(df_bookings['Bookkeep Period'] == filter_opening)]
  113. if df_opening_balance.shape[0] == 0:
  114. df_opening_balance = df_bookings[(df_bookings['Bookkeep Period'] == filter_prev_opening) |
  115. ((df_bookings['Bookkeep Period'] >= filter_prev) &
  116. (df_bookings['Bookkeep Period'] < filter_from))].copy()
  117. df_opening_balance['Bookkeep Period'] = filter_opening
  118. prev_year_closed = False
  119. # df_opening_balance = df_opening_balance.merge(df_translate, how='inner', on='Konto_Nr_Händler')
  120. df_opening_balance = df_opening_balance[(df_opening_balance['Konto_Nr_Händler'].str.contains(r'-[013]\d\d+-'))]
  121. df_opening_balance['amount'] = (df_opening_balance['Debit Amount'] + df_opening_balance['Credit Amount']).round(2)
  122. # df_opening_balance.drop(columns=['Debit Amount', 'Credit Amount', 'Debit Quantity', 'Credit Quantity'], inplace=True)
  123. # df_opening_balance = df_opening_balance.groupby(['Marke', 'Standort']).sum()
  124. opening_balance = df_opening_balance['amount'].sum().round(2)
  125. logging.info('Gewinn/Verlustvortrag')
  126. logging.info(opening_balance)
  127. if not prev_year_closed:
  128. row = {
  129. 'Konto_Nr_Händler': '01-01-0861-00-00-00',
  130. 'Bookkeep Period': filter_opening,
  131. 'Debit Amount': opening_balance * -1,
  132. 'Credit Amount': 0,
  133. 'Debit Quantity': 0,
  134. 'Credit Quantity': 0,
  135. 'amount': opening_balance * -1
  136. }
  137. df_opening_balance = pd.concat([df_opening_balance, pd.DataFrame.from_records([row])])
  138. df_bookings = df_bookings[(df_bookings['Bookkeep Period'] >= filter_from) & (df_bookings['Bookkeep Period'] <= filter_to)]
  139. df_bookings['amount'] = (df_bookings['Debit Amount'] + df_bookings['Credit Amount']).round(2)
  140. df_stats = df_bookings.copy()
  141. # df_stats = df_stats[df_stats['Konto_Nr_Händler'].str.match(r'-[24578]\d\d\d-')]
  142. df_stats['Konto_Nr_Händler'] = df_stats['Konto_Nr_Händler'].str.replace(r'-(\d\d\d+)-', r'-\1_STK-', regex=True)
  143. df_stats['amount'] = (df_bookings['Debit Quantity'] + df_bookings['Credit Quantity']).round(2)
  144. df_bookings = pd.concat([df_opening_balance, df_bookings, df_stats])
  145. df_bookings = df_bookings[df_bookings['amount'] != 0.00]
  146. if df_bookings.shape[0] == 0:
  147. logging.error('ABBRUCH!!! Keine Daten vorhanden!')
  148. return False
  149. bk_filter = self.bookkeep_filter()
  150. period_no = list(bk_filter.keys()).index(filter_to) + 1
  151. # Spalten konvertieren
  152. df_bookings['period'] = df_bookings['Bookkeep Period'].apply(lambda x: bk_filter[x])
  153. logging.info('df_bookings: ' + str(df_bookings.shape))
  154. # Join auf Übersetzung
  155. df_combined = df_bookings.merge(df_translate, how='inner', on='Konto_Nr_Händler')
  156. logging.info('df_combined: ' + str(df_combined.shape))
  157. # Hack für fehlende Markenzuordnung
  158. df_combined['Fremdmarke'] = (df_combined['Marke_HBV'].str.match(r'^0000'))
  159. df_combined['Marke'] = np.where(df_combined['Fremdmarke'], '99', df_combined['Marke'])
  160. df_combined['Standort_egal'] = (df_combined['Standort_HBV'].str.match(r'^\d\d_'))
  161. df_combined['Standort_HBV'] = np.where(df_combined['Fremdmarke'] | df_combined['Standort_egal'],
  162. '0000', df_combined['Standort_HBV'])
  163. makes = df_combined[['Marke', 'Marke_HBV']].drop_duplicates().sort_values(by=['Marke'])
  164. sites = df_combined[['Marke', 'Standort', 'Standort_HBV']].drop_duplicates().sort_values(by=['Marke', 'Standort'])
  165. # df_combined.to_csv(account_invalid, decimal=',', sep=';', encoding='latin-1', index=False)
  166. # Gruppieren
  167. # df_grouped = df_combined.groupby(['Konto_Nr_SKR51', 'period']).sum()
  168. df = df_combined.pivot_table(index=['Konto_Nr_SKR51'], columns=['period'], values='amount',
  169. aggfunc=np.sum, margins=True, margins_name='CumulatedYear')
  170. logging.info('df_pivot: ' + str(df.shape))
  171. df = self.extract_acct_info(df)
  172. # df = df_translate.reset_index(drop=True).drop(columns=['Kostenträger_Ebene']).drop_duplicates()
  173. logging.info(df.shape)
  174. logging.info(df.columns)
  175. logging.info(df.head())
  176. # df = df.merge(df_translate, how='inner', on='Konto_Nr_SKR51')
  177. logging.info('df: ' + str(df.shape))
  178. df['Bilanz'] = (df['Konto_Nr'].str.match(r'^[013]'))
  179. df['Kontoart'] = np.where(df['Bilanz'], '1', '2')
  180. df['Kontoart'] = np.where(df['Konto_Nr'].str.contains('_STK'), '3', df['Kontoart'])
  181. df['Kontoart'] = np.where(df['Konto_Nr'].str.match(r'^[9]'), '3', df['Kontoart'])
  182. df['Konto_1'] = (df['Konto_Nr'].str.slice(0, 1))
  183. # Hack für fehlende Markenzuordnung
  184. df = df.merge(makes, how='left', on='Marke')
  185. df['Marke'] = np.where(df['Marke_HBV'].isna(), '99', df['Marke'])
  186. df_debug = df.drop(columns=['Bilanz'])
  187. logging.info(df_debug.groupby(['Kontoart']).sum())
  188. logging.info(df_debug.groupby(['Kontoart', 'Konto_1']).sum())
  189. logging.info(df_debug.groupby(['Konto_Nr']).sum())
  190. df_debug.groupby(['Konto_Nr']).sum().to_csv(self.debug_file, decimal=',', sep=';', encoding='latin-1')
  191. # Bereinigung GW-Kostenträger
  192. df['GW_Verkauf_1'] = (df['Konto_Nr'].str.match(r'^[78]0')) & (df['Kostenstelle'].str.match(r'^[^1]\d'))
  193. df['Kostenstelle'] = np.where(df['GW_Verkauf_1'] == True, '11', df['Kostenstelle'])
  194. df['GW_Verkauf_2'] = (df['Konto_Nr'].str.match(r'^[78]1')) & (df['Kostenstelle'].str.match(r'^[^2]\d'))
  195. df['Kostenstelle'] = np.where(df['GW_Verkauf_2'] == True, '21', df['Kostenstelle'])
  196. df['GW_Verkauf_3'] = (df['Konto_Nr'].str.match(r'^[78]3')) & (df['Kostenstelle'].str.match(r'^[^3]\d'))
  197. df['Kostenstelle'] = np.where(df['GW_Verkauf_3'] == True, '31', df['Kostenstelle'])
  198. df['GW_Verkauf_4'] = (df['Konto_Nr'].str.match(r'^[78]4')) & (df['Kostenstelle'].str.match(r'^[^4]\d'))
  199. df['Kostenstelle'] = np.where(df['GW_Verkauf_4'] == True, '41', df['Kostenstelle'])
  200. df['GW_Verkauf_x420'] = (df['Konto_Nr'].str.match(r'^[78]420'))
  201. df['Kostenstelle'] = np.where(df['GW_Verkauf_x420'] == True, '42', df['Kostenstelle'])
  202. df['GW_Verkauf_5'] = (df['Konto_Nr'].str.match(r'^[78]5')) & (df['Kostenstelle'].str.match(r'^[^5]\d'))
  203. df['Kostenstelle'] = np.where(df['GW_Verkauf_5'] == True, '51', df['Kostenstelle'])
  204. df['GW_Verkauf_50'] = (df['Konto_Nr'].str.match(r'^[78]')) & (df['Kostenstelle'].str.match(r'^2'))
  205. df['Kostenträger'] = np.where(df['GW_Verkauf_50'] == True, '52', df['Kostenträger'])
  206. df['Kostenträger'] = np.where((df['GW_Verkauf_50'] == True) & (df['Marke'] == '01'), '50', df['Kostenträger'])
  207. df['GW_Stk_50'] = (df['Konto_Nr'].str.match(r'^9130')) & (df['Kostenstelle'].str.match(r'^2'))
  208. df['Kostenträger'] = np.where(df['GW_Stk_50'] == True, '52', df['Kostenträger'])
  209. df['Kostenträger'] = np.where((df['GW_Stk_50'] == True) & (df['Marke'] == '01'), '50', df['Kostenträger'])
  210. df['Kostenträger'] = np.where(df['Bilanz'] == True, '00', df['Kostenträger'])
  211. df['Konto_5er'] = (df['Konto_Nr'].str.match('^5')) | (df['Konto_Nr'].str.match('^9143'))
  212. df['Absatzkanal'] = np.where(df['Konto_5er'] == True, '99', df['Absatzkanal'])
  213. df['Teile_30_60'] = (df['Konto_Nr'].str.match(r'^[578]')) & \
  214. (df['Kostenstelle'].str.match(r'^[3]')) & \
  215. (df['Kostenträger'].str.match(r'^[^6]'))
  216. df['Kostenträger'] = np.where(df['Teile_30_60'] == True, '60', df['Kostenträger'])
  217. df['Service_40_70'] = (df['Konto_Nr'].str.match(r'^[578]')) & \
  218. (df['Kostenstelle'].str.match(r'^[4]')) & \
  219. (df['Kostenträger'].str.match(r'^[^7]'))
  220. df['Kostenträger'] = np.where(df['Service_40_70'] == True, '70', df['Kostenträger'])
  221. from_label = ['Marke', 'Standort', 'Konto_Nr', 'Kostenstelle', 'Absatzkanal', 'Kostenträger']
  222. to_label = ['Make', 'Site', 'Account', 'Origin', 'SalesChannel', 'CostCarrier']
  223. df = df.rename(columns=dict(zip(from_label, to_label)))
  224. makes = makes.rename(columns=dict(zip(from_label, to_label))).to_dict(orient='records')
  225. sites = sites.rename(columns=dict(zip(from_label, to_label))).to_dict(orient='records')
  226. df['CostAccountingString'] = df['Make'] + df['Site'] + df['Origin'] + \
  227. df['SalesChannel'] + df['CostCarrier']
  228. df['IsNumeric'] = (df['CostAccountingString'].str.isdigit()) & (df['Account'].str.isdigit()) & (df['Account'].str.len() == 4)
  229. df_invalid = df[df['IsNumeric'] == False]
  230. df_invalid.to_csv(self.account_invalid, decimal=',', sep=';', encoding='latin-1', index=False)
  231. export_csv = self.export_filename[:-4] + '.csv'
  232. df.to_csv(export_csv, decimal=',', sep=';', encoding='latin-1', index=False)
  233. df = df[df['IsNumeric'] != False].groupby(ACCOUNT_INFO, as_index=False).sum()
  234. # Infos ergänzen
  235. df['Decimals'] = 2
  236. # df['OpeningBalance'] = 0.0
  237. logging.info(df.shape)
  238. self.export_xml(df.to_dict(orient='records'), bk_filter, period_no, makes, sites)
  239. # Join auf Übersetzung - nicht zugeordnet
  240. df_ignored = df_bookings.merge(df_translate, how='left', on='Konto_Nr_Händler')
  241. df_ignored = df_ignored[df_ignored['Konto_Nr_SKR51'].isna()] # [['Konto_Nr_Händler', 'Bookkeep Period', 'amount', 'quantity']]
  242. if not df_ignored.empty:
  243. df_ignored = df_ignored.pivot_table(index=['Konto_Nr_Händler'], columns=['period'], values='amount',
  244. aggfunc=np.sum, margins=True, margins_name='CumulatedYear')
  245. df_ignored.to_csv(self.account_ignored, decimal=',', sep=';', encoding='latin-1')
  246. return self.export_filename
  247. @property
  248. def export_filename(self):
  249. return self.export_filename_for_period(self.current_year, self.current_month)
  250. @property
  251. def export_info_dir(self):
  252. return f"{self.base_dir}/Export/{self.current_year}/info/"
  253. def export_filename_for_period(self, year, month):
  254. return f"{self.base_dir}/Export/{year}/export_{year}-{month}.xml"
  255. def export_xml(self, records, bk_filter, period_no, makes, sites):
  256. record_elements = ACCOUNT_INFO + ['Decimals'] + \
  257. list(bk_filter.values())[:period_no] + ['CumulatedYear']
  258. root = ET.Element('HbvData')
  259. h = ET.SubElement(root, 'Header')
  260. for k, v in self.header(makes, sites).items():
  261. ET.SubElement(h, k).text = str(v)
  262. make_list = ET.SubElement(root, 'MakeList')
  263. for m in makes:
  264. e = ET.SubElement(make_list, 'MakeListEntry')
  265. ET.SubElement(e, 'Make').text = m['Make']
  266. ET.SubElement(e, 'MakeCode').text = m['Marke_HBV']
  267. bm_code_list = ET.SubElement(root, 'BmCodeList')
  268. for s in sites:
  269. e = ET.SubElement(bm_code_list, 'BmCodeEntry')
  270. ET.SubElement(e, 'Make').text = s['Make']
  271. ET.SubElement(e, 'Site').text = s['Site']
  272. ET.SubElement(e, 'BmCode').text = s['Standort_HBV']
  273. record_list = ET.SubElement(root, 'RecordList')
  274. for row in records:
  275. record = ET.SubElement(record_list, 'Record')
  276. for e in record_elements:
  277. child = ET.SubElement(record, e)
  278. field = row.get(e, 0.0)
  279. if str(field) == 'nan':
  280. field = '0'
  281. elif type(field) is float:
  282. field = '{:.0f}'.format(field * 100)
  283. child.text = str(field)
  284. with open(self.export_filename, 'w', encoding='utf-8') as fwh:
  285. fwh.write(minidom.parseString(ET.tostring(root)).toprettyxml(indent=' '))
  286. def convert_to_row(self, node):
  287. return [child.text for child in node]
  288. def convert_xml_to_csv(self, xmlfile, csvfile):
  289. record_list = ET.parse(xmlfile).getroot().find('RecordList')
  290. header = [child.tag for child in record_list.find('Record')]
  291. bookings = [self.convert_to_row(node) for node in record_list.findall('Record')]
  292. with open(csvfile, 'w') as fwh:
  293. cwh = csv.writer(fwh, delimiter=';')
  294. cwh.writerow(header)
  295. cwh.writerows(bookings)
  296. return True
  297. def convert_csv_to_xml(self, csvfile, xmlfile):
  298. makes = [{'Make': '01', 'Marke_HBV': '1844'}]
  299. sites = [{'Make': '01', 'Site': '01', 'Marke_HBV': '1844'}]
  300. with open(csvfile, 'r', encoding='latin-1') as frh:
  301. csv_reader = csv.DictReader(frh, delimiter=';')
  302. self.export_xml(csv_reader, self.bookkeep_filter(), 1, makes, sites, xmlfile)
  303. class Kunden(Enum):
  304. Altermann = auto()
  305. Barth_und_Frey = auto()
  306. Hannuschka = auto()
  307. Koenig_und_Partner = auto()
  308. Luchtenberg = auto()
  309. Russig_Neustadt_deop01 = auto()
  310. Russig_Neustadt_deop02 = auto()
  311. Siebrecht = auto()
  312. def gchr_local(base_dir):
  313. for path in Path(base_dir).glob('*'):
  314. if path.is_dir():
  315. print(path.name)
  316. gchr_export(str(path))
  317. def gchr_export(base_dir):
  318. gchr = GCHR(base_dir)
  319. gchr.export_all_periods()
  320. if __name__ == '__main__':
  321. base_dir = os.getcwd() + '/gcstruct/Kunden'
  322. if Path(base_dir).exists():
  323. gchr_local(base_dir)
  324. else:
  325. gchr_export(os.getcwd())