1 год назад · 8fa843d748
--- a/gcstruct/gchr.py
+++ b/gcstruct/gchr.py
@@ -6,20 +6,16 @@ from dataclasses import dataclass
 
				 from datetime import datetime

			
 
				 from pathlib import Path

			
 
				 from typing import Callable

			
 
				-from xml.dom import minidom

			
 
				 

			
 
				 import numpy as np

			
 
				 import pandas as pd

			
 
				 

			
 
				-ACCOUNT_INFO = [

			
 
				-    "Account",

			
 
				-    "Make",

			
 
				-    "Site",

			
 
				-    "Origin",

			
 
				-    "SalesChannel",

			
 
				-    "CostCarrier",

			
 
				-    "CostAccountingString",

			
 
				-]

			
 
				+from gcstruct.gchr_export import (

			
 
				+    ACCOUNT_INFO,

			
 
				+    GchrExportConfig,

			
 
				+    export_skr51_xml,

			
 
				+    header,

			
 
				+)

			
 
				 

			
 
				 TRANSLATE = [

			
 
				     "Konto_Nr_Händler",

			
@@ -37,22 +33,6 @@ TRANSLATE = [
 
				 ]

			
 
				 

			
 
				 

			
 
				-@dataclass

			
 
				-class GchrExportConfig:

			
 
				-    main_site: str

			
 
				-    current_year: str

			
 
				-    current_month: str

			
 
				-    makes_used: dict[str, str]

			
 
				-    sites_used: dict[str, str]

			
 
				-    first_month: str

			
 
				-    period_no: str

			
 
				-    bookkeep_filter: dict[str, str]

			
 
				-    extraction_date: datetime

			
 
				-    export_file: str

			
 
				-    bookkeep_records = dict[str, list[str]]

			
 
				-    header: dict[str, str] | None = None

			
 
				-

			
 
				-

			
 
				 @dataclass

			
 
				 class GchrConfig:

			
 
				     first_month_of_financial_year: str

			
@@ -71,14 +51,20 @@ class GCHR:
 
				     sites: dict[str, str] = None

			
 
				     current_year: str

			
 
				     current_month: str

			
 
				+    timestamp: str

			
 
				 

			
 
				     def __init__(self, base_dir: str) -> None:

			
 
				         self.base_dir = base_dir

			
 
				+        os.makedirs(self.base_dir + "/data", exist_ok=True)

			
 
				+        os.makedirs(self.base_dir + "/export/temp", exist_ok=True)

			
 
				+        os.makedirs(self.base_dir + "/logs", exist_ok=True)

			
 
				 

			
 
				         self.account_translation = f"{self.base_dir}/data/Kontenrahmen_uebersetzt.csv"

			
 
				         self.account_bookings = list(Path(self.base_dir).joinpath("data").glob("GuV_Bilanz_Salden*.csv"))

			
 
				         self.first_month_of_financial_year = "10"

			
 
				 

			
 
				+        self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

			
 
				+

			
 
				         pd.set_option("display.max_rows", 500)

			
 
				         pd.set_option("display.float_format", lambda x: "%.2f" % x)

			
 
				 

			
@@ -97,7 +83,7 @@ class GCHR:
 
				 

			
 
				     @property

			
 
				     def debug_file(self) -> str:

			
 
				-        return f"{self.export_info_dir}/debug_{self.period}.csv"

			
 
				+        return f"{self.logs_dir}/debug_{self.timestamp}.csv"

			
 
				 

			
 
				     @property

			
 
				     def account_ignored(self) -> str:

			
@@ -117,21 +103,6 @@ class GCHR:
 
				     def next_year(self) -> str:

			
 
				         return str(int(self.current_year) + 1)

			
 
				 

			
 
				-    @staticmethod

			
 
				-    def header(export_cfg: GchrExportConfig) -> dict[str, str]:

			
 
				-        return {

			
 
				-            "Country": "DE",

			
 
				-            "MainBmCode": export_cfg.main_site,

			
 
				-            "Month": export_cfg.current_month,

			
 
				-            "Year": export_cfg.current_year,

			
 
				-            "Currency": "EUR",

			
 
				-            "NumberOfMakes": len(export_cfg.makes_used),

			
 
				-            "NumberOfSites": len(export_cfg.sites_used),

			
 
				-            "ExtractionDate": export_cfg.extraction_date.strftime("%d.%m.%Y"),

			
 
				-            "ExtractionTime": export_cfg.extraction_date.strftime("%H:%M:%S"),

			
 
				-            "BeginFiscalYear": export_cfg.first_month,

			
 
				-        }

			
 
				-

			
 
				     @property

			
 
				     def bookkeep_filter(self) -> dict[str, str]:

			
 
				         period = [self.current_year + str(i).zfill(2) for i in range(1, 13)]

			
@@ -256,8 +227,8 @@ class GCHR:
 
				                 filename,

			
 
				                 df.to_dict(orient="records"),

			
 
				             )

			
 
				-            export_cfg.header = self.header(export_cfg)

			
 
				-            GCHR.export_skr51_xml(export_cfg)

			
 
				+            export_cfg.header = header(export_cfg)

			
 
				+            export_skr51_xml(export_cfg)

			
 
				 

			
 
				         # Join auf Übersetzung - nicht zugeordnet

			
 
				         df_ignored = df_bookings.merge(self.df_translate, how="left", on="Konto_Nr_Händler")

			
@@ -277,27 +248,33 @@ class GCHR:
 
				     @property

			
 
				     def df_translate(self) -> pd.DataFrame:

			
 
				         if self._df_translate is None:

			
 
				-            df_translate_import = pd.read_csv(

			
 
				-                self.account_translation,

			
 
				-                decimal=",",

			
 
				-                sep=";",

			
 
				-                encoding="latin-1",

			
 
				-                converters={i: str for i in range(0, 200)},

			
 
				-            ).reset_index()

			
 
				-

			
 
				-            self.makes = GCHR.get_makes_from_translation(df_translate_import)

			
 
				-            self.sites = GCHR.get_sites_from_translation(df_translate_import)

			
 
				-

			
 
				-            df_prepared = GCHR.prepare_translation(df_translate_import)

			
 
				-            self._df_translate = self.special_translation(df_prepared)

			
 
				-            self.df_translate2 = (

			
 
				-                self._df_translate.copy()

			
 
				-                .drop(columns=["Konto_Nr_Händler"])

			
 
				-                .drop_duplicates()

			
 
				-                .set_index("Konto_Nr_SKR51")

			
 
				+            self.makes, self.sites, self._df_translate, self.df_translate2 = GCHR.load_translation(

			
 
				+                self.account_translation, self.debug_file, self.export_invalid_filename

			
 
				             )

			
 
				         return self._df_translate

			
 
				 

			
 
				+    @staticmethod

			
 
				+    def load_translation(

			
 
				+        account_translation: str, debug_file: str, export_invalid_filename: str

			
 
				+    ) -> tuple[dict, dict, pd.DataFrame, pd.DataFrame]:

			
 
				+        df_translate_import = pd.read_csv(

			
 
				+            account_translation,

			
 
				+            decimal=",",

			
 
				+            sep=";",

			
 
				+            encoding="latin-1",

			
 
				+            converters={i: str for i in range(0, 200)},

			
 
				+        ).reset_index()

			
 
				+

			
 
				+        makes = GCHR.get_makes_from_translation(df_translate_import)

			
 
				+        sites = GCHR.get_sites_from_translation(df_translate_import)

			
 
				+

			
 
				+        df_prepared = GCHR.prepare_translation(df_translate_import)

			
 
				+        df_translate = GCHR.special_translation(df_prepared, makes, sites, debug_file, export_invalid_filename)

			
 
				+        df_translate2 = (

			
 
				+            df_translate.copy().drop(columns=["Konto_Nr_Händler"]).drop_duplicates().set_index("Konto_Nr_SKR51")

			
 
				+        )

			
 
				+        return (makes, sites, df_translate, df_translate2)

			
 
				+

			
 
				     @staticmethod

			
 
				     def get_makes_from_translation(df_translate_import: pd.DataFrame) -> dict[str, str]:

			
 
				         df_makes = df_translate_import[["Marke", "Marke_HBV"]].copy().drop_duplicates()

			
@@ -333,7 +310,10 @@ class GCHR:
 
				         df.set_index("Konto_Nr_Händler")

			
 
				         return df

			
 
				 

			
 
				-    def special_translation(self, df: pd.DataFrame) -> pd.DataFrame:

			
 
				+    @staticmethod

			
 
				+    def special_translation(

			
 
				+        df: pd.DataFrame, makes: dict[str, str], sites: dict[str, str], debug_file: str, export_invalid_filename: str

			
 
				+    ) -> pd.DataFrame:

			
 
				         df["Konto_Nr_Händler"] = df["Konto_Nr_Händler"].str.upper()

			
 
				         df["Konto_Nr_SKR51"] = df["Konto_Nr_SKR51"].str.upper()

			
 
				         df = GCHR.extract_acct_info(df)

			
@@ -350,17 +330,15 @@ class GCHR:
 
				         df["Konto_1"] = df["Konto_Nr"].str.slice(0, 1)

			
 
				 

			
 
				         # fehlende Marken- und Standortzuordnung

			
 
				-        df["Marke"] = np.where(df["Marke"].isin(self.makes.keys()), df["Marke"], "99")

			
 
				+        df["Marke"] = np.where(df["Marke"].isin(makes.keys()), df["Marke"], "99")

			
 
				         df["Marke_Standort"] = df["Marke"] + "-" + df["Standort"]

			
 
				-        df["Standort"] = np.where(df["Marke_Standort"].isin(self.sites.keys()), df["Standort"], "01")

			
 
				+        df["Standort"] = np.where(df["Marke_Standort"].isin(sites.keys()), df["Standort"], "01")

			
 
				 

			
 
				         df_debug = df.drop(columns=["Bilanz"])

			
 
				         logging.info(df_debug.groupby(["Kontoart"]).aggregate("sum"))

			
 
				         logging.info(df_debug.groupby(["Kontoart", "Konto_1"]).aggregate("sum"))

			
 
				         logging.info(df_debug.groupby(["Konto_Nr"]).aggregate("sum"))

			
 
				-        df_debug.groupby(["Konto_Nr"]).aggregate("sum").to_csv(

			
 
				-            self.debug_file, decimal=",", sep=";", encoding="latin-1"

			
 
				-        )

			
 
				+        df_debug.groupby(["Konto_Nr"]).aggregate("sum").to_csv(debug_file, decimal=",", sep=";", encoding="latin-1")

			
 
				 

			
 
				         # Bereinigung GW-Kostenträger

			
 
				         df["NW_Verkauf_1"] = (df["Konto_Nr"].str.match(r"^[78]0")) & (df["Kostenstelle"].str.match(r"^[^1]\d"))

			
@@ -446,7 +424,7 @@ class GCHR:
 
				             # & (df["Konto_Nr_SKR51"].str.len() == 19)

			
 
				         )

			
 
				         df_invalid = df[df["IsNumeric"] == False]

			
 
				-        df_invalid.to_csv(self.export_invalid_filename, decimal=",", sep=";", encoding="latin-1", index=False)

			
 
				+        df_invalid.to_csv(export_invalid_filename, decimal=",", sep=";", encoding="latin-1", index=False)

			
 
				         return df[df["IsNumeric"] == True][TRANSLATE]

			
 
				 

			
 
				     def load_bookings_from_file(self) -> None:

			
@@ -541,6 +519,10 @@ class GCHR:
 
				     def export_info_dir(self) -> str:

			
 
				         return f"{self.base_dir}/Export/{self.current_year}/info/"

			
 
				 

			
 
				+    @property

			
 
				+    def logs_dir(self) -> str:

			
 
				+        return f"{self.base_dir}/Logs/"

			
 
				+

			
 
				     @property

			
 
				     def export_invalid_filename(self) -> str:

			
 
				         return f"{self.base_dir}/Export/ungueltig.csv"

			
@@ -548,48 +530,6 @@ class GCHR:
 
				     def export_filename_for_period(self, year: str, month: str) -> str:

			
 
				         return f"{self.base_dir}/Export/{year}/export_{year}-{month}.xml"

			
 
				 

			
 
				-    @staticmethod

			
 
				-    def export_skr51_xml(export_cfg: GchrExportConfig):

			
 
				-        record_elements = (

			
 
				-            ACCOUNT_INFO

			
 
				-            + ["Decimals"]

			
 
				-            + list(export_cfg.bookkeep_filter.values())[: export_cfg.period_no]

			
 
				-            + ["CumulatedYear"]

			
 
				-        )

			
 
				-        root = ET.Element("HbvData")

			
 
				-        h = ET.SubElement(root, "Header")

			
 
				-        for k, v in export_cfg.header.items():

			
 
				-            ET.SubElement(h, k).text = str(v)

			
 
				-

			
 
				-        make_list = ET.SubElement(root, "MakeList")

			
 
				-        for make, make_code in export_cfg.makes_used.items():

			
 
				-            e = ET.SubElement(make_list, "MakeListEntry")

			
 
				-            ET.SubElement(e, "Make").text = make

			
 
				-            ET.SubElement(e, "MakeCode").text = make_code

			
 
				-

			
 
				-        bm_code_list = ET.SubElement(root, "BmCodeList")

			
 
				-        for s, bmcode in export_cfg.sites_used.items():

			
 
				-            make, site = s.split("-")

			
 
				-            e = ET.SubElement(bm_code_list, "BmCodeEntry")

			
 
				-            ET.SubElement(e, "Make").text = make

			
 
				-            ET.SubElement(e, "Site").text = site

			
 
				-            ET.SubElement(e, "BmCode").text = bmcode

			
 
				-

			
 
				-        record_list = ET.SubElement(root, "RecordList")

			
 
				-        for row in export_cfg.bookkeep_records:

			
 
				-            record = ET.SubElement(record_list, "Record")

			
 
				-            for e in record_elements:

			
 
				-                child = ET.SubElement(record, e)

			
 
				-                field = row.get(e, 0.0)

			
 
				-                if str(field) == "nan":

			
 
				-                    field = "0"

			
 
				-                elif type(field) is float:

			
 
				-                    field = "{:.0f}".format(field * 100)

			
 
				-                child.text = str(field)

			
 
				-

			
 
				-        with open(export_cfg.export_file, "w", encoding="utf-8") as fwh:

			
 
				-            fwh.write(minidom.parseString(ET.tostring(root)).toprettyxml(indent="  "))

			
 
				-

			
 
				     @staticmethod

			
 
				     def convert_to_row(node: list[ET.Element]) -> list[str]:

			
 
				         return [child.text for child in node]

			
--- a/gcstruct/gchr_export.py
+++ b/gcstruct/gchr_export.py
@@ -0,0 +1,87 @@
 
				+import xml.etree.ElementTree as ET
			
 
				+from dataclasses import dataclass
			
 
				+from datetime import datetime
			
 
				+from xml.dom import minidom
			
 
				+
			
 
				+ACCOUNT_INFO = [
			
 
				+    "Account",
			
 
				+    "Make",
			
 
				+    "Site",
			
 
				+    "Origin",
			
 
				+    "SalesChannel",
			
 
				+    "CostCarrier",
			
 
				+    "CostAccountingString",
			
 
				+]
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class GchrExportConfig:
			
 
				+    main_site: str
			
 
				+    current_year: str
			
 
				+    current_month: str
			
 
				+    makes_used: dict[str, str]
			
 
				+    sites_used: dict[str, str]
			
 
				+    first_month: str
			
 
				+    period_no: str
			
 
				+    bookkeep_filter: dict[str, str]
			
 
				+    extraction_date: datetime
			
 
				+    export_file: str
			
 
				+    bookkeep_records = dict[str, list[str]]
			
 
				+    header: dict[str, str] | None = None
			
 
				+
			
 
				+
			
 
				+def export_skr51_xml(export_cfg: GchrExportConfig):
			
 
				+    record_elements = (
			
 
				+        ACCOUNT_INFO
			
 
				+        + ["Decimals"]
			
 
				+        + list(export_cfg.bookkeep_filter.values())[: export_cfg.period_no]
			
 
				+        + ["CumulatedYear"]
			
 
				+    )
			
 
				+    root = ET.Element("HbvData")
			
 
				+    h = ET.SubElement(root, "Header")
			
 
				+    for k, v in export_cfg.header.items():
			
 
				+        ET.SubElement(h, k).text = str(v)
			
 
				+
			
 
				+    make_list = ET.SubElement(root, "MakeList")
			
 
				+    for make, make_code in export_cfg.makes_used.items():
			
 
				+        e = ET.SubElement(make_list, "MakeListEntry")
			
 
				+        ET.SubElement(e, "Make").text = make
			
 
				+        ET.SubElement(e, "MakeCode").text = make_code
			
 
				+
			
 
				+    bm_code_list = ET.SubElement(root, "BmCodeList")
			
 
				+    for s, bmcode in export_cfg.sites_used.items():
			
 
				+        make, site = s.split("-")
			
 
				+        e = ET.SubElement(bm_code_list, "BmCodeEntry")
			
 
				+        ET.SubElement(e, "Make").text = make
			
 
				+        ET.SubElement(e, "Site").text = site
			
 
				+        ET.SubElement(e, "BmCode").text = bmcode
			
 
				+
			
 
				+    record_list = ET.SubElement(root, "RecordList")
			
 
				+    for row in export_cfg.bookkeep_records:
			
 
				+        record = ET.SubElement(record_list, "Record")
			
 
				+        for e in record_elements:
			
 
				+            child = ET.SubElement(record, e)
			
 
				+            field = row.get(e, 0.0)
			
 
				+            if str(field) == "nan":
			
 
				+                field = "0"
			
 
				+            elif type(field) is float:
			
 
				+                field = "{:.0f}".format(field * 100)
			
 
				+            child.text = str(field)
			
 
				+
			
 
				+    with open(export_cfg.export_file, "w", encoding="utf-8") as fwh:
			
 
				+        fwh.write(minidom.parseString(ET.tostring(root)).toprettyxml(indent="  "))
			
 
				+
			
 
				+
			
 
				+def header(export_cfg: GchrExportConfig) -> dict[str, str]:
			
 
				+    return {
			
 
				+        "Country": "DE",
			
 
				+        "MainBmCode": export_cfg.main_site,
			
 
				+        "Month": export_cfg.current_month,
			
 
				+        "Year": export_cfg.current_year,
			
 
				+        "Currency": "EUR",
			
 
				+        "NumberOfMakes": len(export_cfg.makes_used),
			
 
				+        "NumberOfSites": len(export_cfg.sites_used),
			
 
				+        "ExtractionDate": export_cfg.extraction_date.strftime("%d.%m.%Y"),
			
 
				+        "ExtractionTime": export_cfg.extraction_date.strftime("%H:%M:%S"),
			
 
				+        "BeginFiscalYear": export_cfg.first_month,
			
 
				+    }
			
--- a/gcstruct/tests/test_gchr.py
+++ b/gcstruct/tests/test_gchr.py
@@ -1,6 +1,8 @@
 
				 import unittest
			
 
				 
			
 
				-from gcstruct.gchr import GCHR
			
 
				+import pandas as pd
			
 
				+
			
 
				+from gcstruct.gchr import GCHR, TRANSLATE
			
 
				 
			
 
				 
			
 
				 class TestGchr(unittest.TestCase):
			
@@ -17,3 +19,11 @@ class TestGchr(unittest.TestCase):
 
				         self.assertEqual(len(gchr.account_bookings), 2)
			
 
				         self.assertEqual(gchr.account_bookings[0].name, "GuV_Bilanz_Salden.csv")
			
 
				         self.assertEqual(gchr.account_bookings[1].name, "GuV_Bilanz_Salden_deop03.csv")
			
 
				+
			
 
				+    def test_translation_existing(self):
			
 
				+        gchr = GCHR(self.base_dir_1)
			
 
				+        df = gchr.df_translate
			
 
				+        self.assertIsInstance(df, pd.DataFrame)
			
 
				+        self.assertEqual(df.shape[1], 12)
			
 
				+        self.assertListEqual(list(df.columns), TRANSLATE)
			
 
				+        self.assertGreater(df.shape[0], 0, "Translation not empty")