Переглянути джерело

Translation und Convert ausgelagert

gc-server3 3 місяців тому
батько
коміт
60cc306dc5
4 змінених файлів з 268 додано та 245 видалено
  1. 16 244
      gcstruct/gchr.py
  2. 28 0
      gcstruct/gchr_convert.py
  3. 216 0
      gcstruct/gchr_translate.py
  4. 8 1
      gcstruct/tests/test_gchr.py

+ 16 - 244
gcstruct/gchr.py

@@ -1,13 +1,10 @@
-import csv
 import logging
 import logging
 import os
 import os
-import xml.etree.ElementTree as ET
 from dataclasses import dataclass
 from dataclasses import dataclass
 from datetime import datetime
 from datetime import datetime
 from pathlib import Path
 from pathlib import Path
 from typing import Callable
 from typing import Callable
 
 
-import numpy as np
 import pandas as pd
 import pandas as pd
 
 
 from gcstruct.gchr_export import (
 from gcstruct.gchr_export import (
@@ -16,29 +13,15 @@ from gcstruct.gchr_export import (
     export_skr51_xml,
     export_skr51_xml,
     header,
     header,
 )
 )
-
-TRANSLATE = [
-    "Konto_Nr_Händler",
-    "Konto_Nr_SKR51",
-    "Marke",
-    "Standort",
-    "Konto_Nr",
-    "Kostenstelle",
-    "Absatzkanal",
-    "Kostenträger",
-    "Kontoart",
-    "Konto_1",
-    "KRM",
-    "IsNumeric",
-]
+from gcstruct.gchr_translate import load_translation
 
 
 
 
 @dataclass
 @dataclass
 class GchrConfig:
 class GchrConfig:
     first_month_of_financial_year: str
     first_month_of_financial_year: str
-    data_path: str
-    gcstruct_path: str
-    export_path: str
+    data_dir: str
+    gcstruct_dir: str
+    export_dir: str
     export_fn = Callable[[GchrExportConfig], None]
     export_fn = Callable[[GchrExportConfig], None]
 
 
 
 
@@ -117,27 +100,17 @@ class GCHR:
         rename_to = ["OpeningBalance"] + ["Period" + str(i).zfill(2) for i in range(1, 13)]
         rename_to = ["OpeningBalance"] + ["Period" + str(i).zfill(2) for i in range(1, 13)]
         return dict(zip(period, rename_to))
         return dict(zip(period, rename_to))
 
 
-    @staticmethod
-    def extract_acct_info(df: pd.DataFrame) -> pd.DataFrame:
-        acct_info = [
-            "Marke",
-            "Standort",
-            "Konto_Nr",
-            "Kostenstelle",
-            "Absatzkanal",
-            "Kostenträger",
-        ]
-        df["HasFiveDashes"] = df["Konto_Nr_SKR51"].str.count("-") == 5
-        df["Invalid"] = "XX-XX-XXXX-XX-XX-XX"
-        df["Konto_Nr_SKR51"] = np.where(
-            df["HasFiveDashes"],
-            df["Konto_Nr_SKR51"],
-            df["Invalid"],
-        )
-        df[acct_info] = df["Konto_Nr_SKR51"].str.split(pat="-", n=6, expand=True)
-        return df
-
     def export_all_periods(self, overwrite=False, today=None) -> None:
     def export_all_periods(self, overwrite=False, today=None) -> None:
+        periods = GCHR.get_all_periods(today)
+
+        for year, month in periods:
+            filename = self.export_filename_for_period(year, month)
+            if overwrite or not Path(filename).exists():
+                os.makedirs(Path(filename).parent.joinpath("info"), exist_ok=True)
+                self.export_period(year, month)
+
+    @staticmethod
+    def get_all_periods(today=None) -> list[tuple[str, str]]:
         dt = datetime.now()
         dt = datetime.now()
         if today is not None:
         if today is not None:
             dt = datetime.fromisoformat(today)
             dt = datetime.fromisoformat(today)
@@ -145,11 +118,7 @@ class GCHR:
         periods = [(prev, str(x).zfill(2)) for x in range(dt.month, 13)] + [
         periods = [(prev, str(x).zfill(2)) for x in range(dt.month, 13)] + [
             (str(dt.year), str(x).zfill(2)) for x in range(1, dt.month)
             (str(dt.year), str(x).zfill(2)) for x in range(1, dt.month)
         ]
         ]
-        for year, month in periods:
-            filename = self.export_filename_for_period(year, month)
-            if overwrite or not Path(filename).exists():
-                os.makedirs(Path(filename).parent.joinpath("info"), exist_ok=True)
-                self.export_period(year, month)
+        return periods
 
 
     def export_period(self, year: str, month: str) -> str:
     def export_period(self, year: str, month: str) -> str:
         self.set_bookkeep_period(year, month)
         self.set_bookkeep_period(year, month)
@@ -248,185 +217,11 @@ class GCHR:
     @property
     @property
     def df_translate(self) -> pd.DataFrame:
     def df_translate(self) -> pd.DataFrame:
         if self._df_translate is None:
         if self._df_translate is None:
-            self.makes, self.sites, self._df_translate, self.df_translate2 = GCHR.load_translation(
+            self.makes, self.sites, self._df_translate, self.df_translate2 = load_translation(
                 self.account_translation, self.debug_file, self.export_invalid_filename
                 self.account_translation, self.debug_file, self.export_invalid_filename
             )
             )
         return self._df_translate
         return self._df_translate
 
 
-    @staticmethod
-    def load_translation(
-        account_translation: str, debug_file: str, export_invalid_filename: str
-    ) -> tuple[dict, dict, pd.DataFrame, pd.DataFrame]:
-        df_translate_import = pd.read_csv(
-            account_translation,
-            decimal=",",
-            sep=";",
-            encoding="latin-1",
-            converters={i: str for i in range(0, 200)},
-        ).reset_index()
-
-        makes = GCHR.get_makes_from_translation(df_translate_import)
-        sites = GCHR.get_sites_from_translation(df_translate_import)
-
-        df_prepared = GCHR.prepare_translation(df_translate_import)
-        df_translate = GCHR.special_translation(df_prepared, makes, sites, debug_file, export_invalid_filename)
-        df_translate2 = (
-            df_translate.copy().drop(columns=["Konto_Nr_Händler"]).drop_duplicates().set_index("Konto_Nr_SKR51")
-        )
-        return (makes, sites, df_translate, df_translate2)
-
-    @staticmethod
-    def get_makes_from_translation(df_translate_import: pd.DataFrame) -> dict[str, str]:
-        df_makes = df_translate_import[["Marke", "Marke_HBV"]].copy().drop_duplicates()
-        df_makes = df_makes[df_makes["Marke_HBV"] != "0000"]
-        makes = dict([(e["Marke"], e["Marke_HBV"]) for e in df_makes.to_dict(orient="records")])
-        makes["99"] = "0000"
-        return makes
-
-    @staticmethod
-    def get_sites_from_translation(df_translate_import: pd.DataFrame) -> dict[str, str]:
-        df_sites = df_translate_import[["Marke", "Standort", "Standort_HBV"]].copy().drop_duplicates()
-        df_sites["Standort_HBV"] = np.where(df_sites["Standort_HBV"].str.len() != 6, "0000", df_sites["Standort_HBV"])
-        sites = dict(
-            [(e["Marke"] + "-" + e["Standort"], e["Standort_HBV"]) for e in df_sites.to_dict(orient="records")]
-        )
-        return sites
-
-    @staticmethod
-    def prepare_translation(df_translate_import: pd.DataFrame) -> pd.DataFrame:
-        df = df_translate_import[
-            [
-                "Konto_Nr_Händler",
-                "Konto_Nr_SKR51",
-            ]
-        ].drop_duplicates()
-        logging.info(df.shape)
-
-        row = {
-            "Konto_Nr_Händler": "01-01-0861-00-00-00",
-            "Konto_Nr_SKR51": "01-01-0861-00-00-00",
-        }
-        df = pd.concat([df, pd.DataFrame.from_records([row])])
-        df.set_index("Konto_Nr_Händler")
-        return df
-
-    @staticmethod
-    def special_translation(
-        df: pd.DataFrame, makes: dict[str, str], sites: dict[str, str], debug_file: str, export_invalid_filename: str
-    ) -> pd.DataFrame:
-        df["Konto_Nr_Händler"] = df["Konto_Nr_Händler"].str.upper()
-        df["Konto_Nr_SKR51"] = df["Konto_Nr_SKR51"].str.upper()
-        df = GCHR.extract_acct_info(df)
-        df["Konto_Nr"] = df["Konto_Nr"].str.upper()
-        logging.info(df.shape)
-        logging.info(df.columns)
-        logging.info(df.head())
-
-        logging.info("df: " + str(df.shape))
-        df["Bilanz"] = df["Konto_Nr"].str.match(r"^[013]")
-        df["Kontoart"] = np.where(df["Bilanz"], "1", "2")
-        df["Kontoart"] = np.where(df["Konto_Nr"].str.contains("_STK"), "3", df["Kontoart"])
-        df["Kontoart"] = np.where(df["Konto_Nr"].str.match(r"^[9]"), "3", df["Kontoart"])
-        df["Konto_1"] = df["Konto_Nr"].str.slice(0, 1)
-
-        # fehlende Marken- und Standortzuordnung
-        df["Marke"] = np.where(df["Marke"].isin(makes.keys()), df["Marke"], "99")
-        df["Marke_Standort"] = df["Marke"] + "-" + df["Standort"]
-        df["Standort"] = np.where(df["Marke_Standort"].isin(sites.keys()), df["Standort"], "01")
-
-        df_debug = df.drop(columns=["Bilanz"])
-        logging.info(df_debug.groupby(["Kontoart"]).aggregate("sum"))
-        logging.info(df_debug.groupby(["Kontoart", "Konto_1"]).aggregate("sum"))
-        logging.info(df_debug.groupby(["Konto_Nr"]).aggregate("sum"))
-        df_debug.groupby(["Konto_Nr"]).aggregate("sum").to_csv(debug_file, decimal=",", sep=";", encoding="latin-1")
-
-        # Bereinigung GW-Kostenträger
-        df["NW_Verkauf_1"] = (df["Konto_Nr"].str.match(r"^[78]0")) & (df["Kostenstelle"].str.match(r"^[^1]\d"))
-        df["Kostenstelle"] = np.where(df["NW_Verkauf_1"] == True, "11", df["Kostenstelle"])
-
-        df["Konto_7010"] = df["Konto_Nr"].str.match(r"^[78]01[01]")
-        df["Kostenstelle"] = np.where(df["Konto_7010"] == True, "14", df["Kostenstelle"])
-
-        df["GW_Verkauf_2"] = (df["Konto_Nr"].str.match(r"^[78]1")) & (df["Kostenstelle"].str.match(r"^[^2]\d"))
-        df["Kostenstelle"] = np.where(df["GW_Verkauf_2"] == True, "21", df["Kostenstelle"])
-
-        df["GW_Verkauf_3"] = (df["Konto_Nr"].str.match(r"^[78]3")) & (df["Kostenstelle"].str.match(r"^[^3]\d"))
-        df["Kostenstelle"] = np.where(df["GW_Verkauf_3"] == True, "31", df["Kostenstelle"])
-
-        df["GW_Verkauf_4"] = (df["Konto_Nr"].str.match(r"^[78]4")) & (df["Kostenstelle"].str.match(r"^[^4]\d"))
-        df["Kostenstelle"] = np.where(df["GW_Verkauf_4"] == True, "41", df["Kostenstelle"])
-
-        df["GW_Verkauf_x420"] = df["Konto_Nr"].str.match(r"^[78]420")
-        df["Kostenstelle"] = np.where(df["GW_Verkauf_x420"] == True, "42", df["Kostenstelle"])
-
-        df["GW_Verkauf_5"] = (df["Konto_Nr"].str.match(r"^[78]5")) & (df["Kostenstelle"].str.match(r"^[^5]\d"))
-        df["Kostenstelle"] = np.where(df["GW_Verkauf_5"] == True, "51", df["Kostenstelle"])
-
-        df["GW_Verkauf_50"] = (df["Konto_Nr"].str.match(r"^[78]")) & (df["Kostenstelle"].str.match(r"^2"))
-        df["Kostenträger"] = np.where(df["GW_Verkauf_50"] == True, "52", df["Kostenträger"])
-        df["Kostenträger"] = np.where(
-            (df["GW_Verkauf_50"] == True) & (df["Marke"] == "01"),
-            "50",
-            df["Kostenträger"],
-        )
-
-        df["NW_Verkauf_00"] = (
-            (df["Konto_Nr"].str.match(r"^[78]2"))
-            & (df["Kostenstelle"].str.match(r"^1"))
-            & (df["Kostenträger"].str.match(r"^[^01234]"))
-        )
-        df["Kostenträger"] = np.where(df["NW_Verkauf_00"] == True, "00", df["Kostenträger"])
-
-        df["GW_Stk_50"] = (df["Konto_Nr"].str.match(r"^9130")) & (df["Kostenstelle"].str.match(r"^2"))
-        df["Kostenträger"] = np.where(df["GW_Stk_50"] == True, "52", df["Kostenträger"])
-        df["Kostenträger"] = np.where((df["GW_Stk_50"] == True) & (df["Marke"] == "01"), "50", df["Kostenträger"])
-
-        df["Kostenträger"] = np.where(df["Bilanz"] == True, "00", df["Kostenträger"])
-
-        df["Konto_5er"] = (df["Konto_Nr"].str.match("^5")) | (df["Konto_Nr"].str.match("^9143"))
-        df["Absatzkanal"] = np.where(df["Konto_5er"] == True, "99", df["Absatzkanal"])
-
-        df["Konto_5005"] = (df["Konto_Nr"].str.match("^5005")) & (df["Kostenstelle"].str.match(r"^[^12]"))
-        df["Kostenstelle"] = np.where(df["Konto_5005"] == True, "20", df["Kostenstelle"])
-        df["Kostenträger"] = np.where(df["Konto_5005"] == True, "50", df["Kostenträger"])
-
-        df["Konto_5007"] = (df["Konto_Nr"].str.match("^5007")) & (df["Kostenstelle"].str.match(r"^([^4]|42)"))
-        df["Kostenstelle"] = np.where(df["Konto_5007"] == True, "41", df["Kostenstelle"])
-        df["Kostenträger"] = np.where(df["Konto_5007"] == True, "70", df["Kostenträger"])
-
-        df["Konto_914er"] = (df["Konto_Nr"].str.match("^914[34]")) & (df["Kostenträger"].str.match(r"^[^7]"))
-        df["Kostenträger"] = np.where(df["Konto_914er"] == True, "70", df["Kostenträger"])
-
-        df["Teile_30_60"] = (
-            (df["Konto_Nr"].str.match(r"^[578]"))
-            & (df["Kostenstelle"].str.match(r"^[3]"))
-            & (df["Kostenträger"].str.match(r"^[^6]"))
-        )
-        df["Kostenträger"] = np.where(df["Teile_30_60"] == True, "60", df["Kostenträger"])
-
-        df["Service_40_70"] = (
-            (df["Konto_Nr"].str.match(r"^[578]"))
-            & (df["Kostenstelle"].str.match(r"^[4]"))
-            & (df["Kostenträger"].str.match(r"^[^7]"))
-        )
-        df["Kostenträger"] = np.where(df["Service_40_70"] == True, "70", df["Kostenträger"])
-
-        df["KRM"] = df["Marke"] + df["Standort"] + df["Kostenstelle"] + df["Absatzkanal"] + df["Kostenträger"]
-        df["Konto_Nr_SKR51"] = (
-            (df["Marke"] + "-" + df["Standort"] + "-" + df["Konto_Nr"])
-            + "-"
-            + (df["Kostenstelle"] + "-" + df["Absatzkanal"] + "-" + df["Kostenträger"])
-        )
-        df["IsNumeric"] = (
-            (df["KRM"].str.isdigit())
-            & (df["Konto_Nr"].str.isdigit())
-            & (df["Konto_Nr"].str.len() == 4)
-            # & (df["Konto_Nr_SKR51"].str.len() == 19)
-        )
-        df_invalid = df[df["IsNumeric"] == False]
-        df_invalid.to_csv(export_invalid_filename, decimal=",", sep=";", encoding="latin-1", index=False)
-        return df[df["IsNumeric"] == True][TRANSLATE]
-
     def load_bookings_from_file(self) -> None:
     def load_bookings_from_file(self) -> None:
         df_list: list[pd.DataFrame] = []
         df_list: list[pd.DataFrame] = []
         timestamps: list[float] = []
         timestamps: list[float] = []
@@ -530,29 +325,6 @@ class GCHR:
     def export_filename_for_period(self, year: str, month: str) -> str:
     def export_filename_for_period(self, year: str, month: str) -> str:
         return f"{self.base_dir}/Export/{year}/export_{year}-{month}.xml"
         return f"{self.base_dir}/Export/{year}/export_{year}-{month}.xml"
 
 
-    @staticmethod
-    def convert_to_row(node: list[ET.Element]) -> list[str]:
-        return [child.text for child in node]
-
-    @staticmethod
-    def convert_xml_to_csv(xmlfile: str, csvfile: str) -> bool:
-        with open(xmlfile) as frh:
-            record_list = ET.parse(frh).getroot().find("RecordList")
-        header = [child.tag for child in record_list.find("Record")]
-        bookings = [GCHR.convert_to_row(node) for node in record_list.findall("Record")]
-        with open(csvfile, "w") as fwh:
-            cwh = csv.writer(fwh, delimiter=";")
-            cwh.writerow(header)
-            cwh.writerows(bookings)
-        return True
-
-    def convert_csv_to_xml(self, csvfile: str, xmlfile: str) -> None:
-        self.makes = {"01": "1844"}
-        self.sites = {"01-01": "1844"}
-        with open(csvfile, "r", encoding="latin-1") as frh:
-            csv_reader = csv.DictReader(frh, delimiter=";")
-            GCHR.export_skr51_xml(csv_reader, self.bookkeep_filter(), 1, list(self.sites.values())[0], xmlfile)
-
 
 
 def gchr_local() -> None:
 def gchr_local() -> None:
     base_dir = os.getcwd() + "/../GCHR2_Testdaten/Kunden"
     base_dir = os.getcwd() + "/../GCHR2_Testdaten/Kunden"

+ 28 - 0
gcstruct/gchr_convert.py

@@ -0,0 +1,28 @@
+import csv
+import xml.etree.ElementTree as ET
+
+from gcstruct.gchr import GCHR
+
+
+def convert_to_row(node: list[ET.Element]) -> list[str]:
+    return [child.text for child in node]
+
+
+def convert_xml_to_csv(xmlfile: str, csvfile: str) -> bool:
+    with open(xmlfile) as frh:
+        record_list = ET.parse(frh).getroot().find("RecordList")
+    header = [child.tag for child in record_list.find("Record")]
+    bookings = [GCHR.convert_to_row(node) for node in record_list.findall("Record")]
+    with open(csvfile, "w") as fwh:
+        cwh = csv.writer(fwh, delimiter=";")
+        cwh.writerow(header)
+        cwh.writerows(bookings)
+    return True
+
+
+def convert_csv_to_xml(self, csvfile: str, xmlfile: str) -> None:
+    self.makes = {"01": "1844"}
+    self.sites = {"01-01": "1844"}
+    with open(csvfile, "r", encoding="latin-1") as frh:
+        csv_reader = csv.DictReader(frh, delimiter=";")
+        GCHR.export_skr51_xml(csv_reader, self.bookkeep_filter(), 1, list(self.sites.values())[0], xmlfile)

+ 216 - 0
gcstruct/gchr_translate.py

@@ -0,0 +1,216 @@
+import logging
+
+import numpy as np
+import pandas as pd
+
+TRANSLATE = [
+    "Konto_Nr_Händler",
+    "Konto_Nr_SKR51",
+    "Marke",
+    "Standort",
+    "Konto_Nr",
+    "Kostenstelle",
+    "Absatzkanal",
+    "Kostenträger",
+    "Kontoart",
+    "Konto_1",
+    "KRM",
+    "IsNumeric",
+]
+
+
+def load_translation(
+    account_translation: str, debug_file: str, export_invalid_filename: str
+) -> tuple[dict[str, str], dict[str, str], pd.DataFrame, pd.DataFrame]:
+    df_translate_import = pd.read_csv(
+        account_translation,
+        decimal=",",
+        sep=";",
+        encoding="latin-1",
+        converters={i: str for i in range(0, 200)},
+    ).reset_index()
+
+    makes = get_makes_from_translation(df_translate_import)
+    sites = get_sites_from_translation(df_translate_import)
+
+    df_prepared = prepare_translation(df_translate_import)
+    df_translate = special_translation(df_prepared, makes, sites, debug_file, export_invalid_filename)
+    df_translate2 = df_translate.copy().drop(columns=["Konto_Nr_Händler"]).drop_duplicates().set_index("Konto_Nr_SKR51")
+
+    df_translate3 = (
+        df_translate[["Kontoart", "Konto_Nr_SKR51", "Konto_Nr_Händler"]]
+        .copy()
+        .sort_values(by=["Kontoart", "Konto_Nr_SKR51"])
+    )
+    df_translate3.to_csv(account_translation[:-4] + "_GCHR.csv", decimal=",", sep=";", encoding="latin-1", index=False)
+    return (makes, sites, df_translate, df_translate2)
+
+
+def get_makes_from_translation(df_translate_import: pd.DataFrame) -> dict[str, str]:
+    df_makes = df_translate_import[["Marke", "Marke_HBV"]].copy().drop_duplicates()
+    df_makes = df_makes[df_makes["Marke_HBV"] != "0000"]
+    makes = dict([(e["Marke"], e["Marke_HBV"]) for e in df_makes.to_dict(orient="records")])
+    makes["99"] = "0000"
+    return makes
+
+
+def get_sites_from_translation(df_translate_import: pd.DataFrame) -> dict[str, str]:
+    df_sites = df_translate_import[["Marke", "Standort", "Standort_HBV"]].copy().drop_duplicates()
+    df_sites["Standort_HBV"] = np.where(df_sites["Standort_HBV"].str.len() != 6, "0000", df_sites["Standort_HBV"])
+    sites = dict([(e["Marke"] + "-" + e["Standort"], e["Standort_HBV"]) for e in df_sites.to_dict(orient="records")])
+    return sites
+
+
+def prepare_translation(df_translate_import: pd.DataFrame) -> pd.DataFrame:
+    df = df_translate_import[
+        [
+            "Konto_Nr_Händler",
+            "Konto_Nr_SKR51",
+        ]
+    ].drop_duplicates()
+    logging.info(df.shape)
+
+    row = {
+        "Konto_Nr_Händler": "01-01-0861-00-00-00",
+        "Konto_Nr_SKR51": "01-01-0861-00-00-00",
+    }
+    df = pd.concat([df, pd.DataFrame.from_records([row])])
+    df.set_index("Konto_Nr_Händler")
+    return df
+
+
+def special_translation(
+    df: pd.DataFrame, makes: dict[str, str], sites: dict[str, str], debug_file: str, export_invalid_filename: str
+) -> pd.DataFrame:
+    df["Konto_Nr_Händler"] = df["Konto_Nr_Händler"].str.upper()
+    df["Konto_Nr_SKR51"] = df["Konto_Nr_SKR51"].str.upper()
+    df = extract_acct_info(df)
+    df["Konto_Nr"] = df["Konto_Nr"].str.upper()
+    logging.info(df.shape)
+    logging.info(df.columns)
+    logging.info(df.head())
+
+    logging.info("df: " + str(df.shape))
+    df["Bilanz"] = df["Konto_Nr"].str.match(r"^[013]")
+    df["Kontoart"] = np.where(df["Bilanz"], "1", "2")
+    df["Kontoart"] = np.where(df["Konto_Nr"].str.contains("_STK"), "3", df["Kontoart"])
+    df["Kontoart"] = np.where(df["Konto_Nr"].str.match(r"^[9]"), "3", df["Kontoart"])
+    df["Konto_1"] = df["Konto_Nr"].str.slice(0, 1)
+
+    # fehlende Marken- und Standortzuordnung
+    df["Marke"] = np.where(df["Marke"].isin(makes.keys()), df["Marke"], "99")
+    df["Marke_Standort"] = df["Marke"] + "-" + df["Standort"]
+    df["Standort"] = np.where(df["Marke_Standort"].isin(sites.keys()), df["Standort"], "01")
+
+    df_debug = df.drop(columns=["Bilanz"])
+    logging.info(df_debug.groupby(["Kontoart"]).aggregate("sum"))
+    logging.info(df_debug.groupby(["Kontoart", "Konto_1"]).aggregate("sum"))
+    logging.info(df_debug.groupby(["Konto_Nr"]).aggregate("sum"))
+    df_debug.groupby(["Konto_Nr"]).aggregate("sum").to_csv(debug_file, decimal=",", sep=";", encoding="latin-1")
+
+    # Bereinigung GW-Kostenträger
+    df["NW_Verkauf_1"] = (df["Konto_Nr"].str.match(r"^[78]0")) & (df["Kostenstelle"].str.match(r"^[^1]\d"))
+    df["Kostenstelle"] = np.where(df["NW_Verkauf_1"] == True, "11", df["Kostenstelle"])
+
+    df["Konto_7010"] = df["Konto_Nr"].str.match(r"^[78]01[01]")
+    df["Kostenstelle"] = np.where(df["Konto_7010"] == True, "14", df["Kostenstelle"])
+
+    df["GW_Verkauf_2"] = (df["Konto_Nr"].str.match(r"^[78]1")) & (df["Kostenstelle"].str.match(r"^[^2]\d"))
+    df["Kostenstelle"] = np.where(df["GW_Verkauf_2"] == True, "21", df["Kostenstelle"])
+
+    df["GW_Verkauf_3"] = (df["Konto_Nr"].str.match(r"^[78]3")) & (df["Kostenstelle"].str.match(r"^[^3]\d"))
+    df["Kostenstelle"] = np.where(df["GW_Verkauf_3"] == True, "31", df["Kostenstelle"])
+
+    df["GW_Verkauf_4"] = (df["Konto_Nr"].str.match(r"^[78]4")) & (df["Kostenstelle"].str.match(r"^[^4]\d"))
+    df["Kostenstelle"] = np.where(df["GW_Verkauf_4"] == True, "41", df["Kostenstelle"])
+
+    df["GW_Verkauf_x420"] = df["Konto_Nr"].str.match(r"^[78]420")
+    df["Kostenstelle"] = np.where(df["GW_Verkauf_x420"] == True, "42", df["Kostenstelle"])
+
+    df["GW_Verkauf_5"] = (df["Konto_Nr"].str.match(r"^[78]5")) & (df["Kostenstelle"].str.match(r"^[^5]\d"))
+    df["Kostenstelle"] = np.where(df["GW_Verkauf_5"] == True, "51", df["Kostenstelle"])
+
+    df["GW_Verkauf_50"] = (df["Konto_Nr"].str.match(r"^[78]")) & (df["Kostenstelle"].str.match(r"^2"))
+    df["Kostenträger"] = np.where(df["GW_Verkauf_50"] == True, "52", df["Kostenträger"])
+    df["Kostenträger"] = np.where(
+        (df["GW_Verkauf_50"] == True) & (df["Marke"] == "01"),
+        "50",
+        df["Kostenträger"],
+    )
+
+    df["NW_Verkauf_00"] = (
+        (df["Konto_Nr"].str.match(r"^[78]2"))
+        & (df["Kostenstelle"].str.match(r"^1"))
+        & (df["Kostenträger"].str.match(r"^[^01234]"))
+    )
+    df["Kostenträger"] = np.where(df["NW_Verkauf_00"] == True, "00", df["Kostenträger"])
+
+    df["GW_Stk_50"] = (df["Konto_Nr"].str.match(r"^9130")) & (df["Kostenstelle"].str.match(r"^2"))
+    df["Kostenträger"] = np.where(df["GW_Stk_50"] == True, "52", df["Kostenträger"])
+    df["Kostenträger"] = np.where((df["GW_Stk_50"] == True) & (df["Marke"] == "01"), "50", df["Kostenträger"])
+
+    df["Kostenträger"] = np.where(df["Bilanz"] == True, "00", df["Kostenträger"])
+
+    df["Konto_5er"] = (df["Konto_Nr"].str.match("^5")) | (df["Konto_Nr"].str.match("^9143"))
+    df["Absatzkanal"] = np.where(df["Konto_5er"] == True, "99", df["Absatzkanal"])
+
+    df["Konto_5005"] = (df["Konto_Nr"].str.match("^5005")) & (df["Kostenstelle"].str.match(r"^[^12]"))
+    df["Kostenstelle"] = np.where(df["Konto_5005"] == True, "20", df["Kostenstelle"])
+    df["Kostenträger"] = np.where(df["Konto_5005"] == True, "50", df["Kostenträger"])
+
+    df["Konto_5007"] = (df["Konto_Nr"].str.match("^5007")) & (df["Kostenstelle"].str.match(r"^([^4]|42)"))
+    df["Kostenstelle"] = np.where(df["Konto_5007"] == True, "41", df["Kostenstelle"])
+    df["Kostenträger"] = np.where(df["Konto_5007"] == True, "70", df["Kostenträger"])
+
+    df["Konto_914er"] = (df["Konto_Nr"].str.match("^914[34]")) & (df["Kostenträger"].str.match(r"^[^7]"))
+    df["Kostenträger"] = np.where(df["Konto_914er"] == True, "70", df["Kostenträger"])
+
+    df["Teile_30_60"] = (
+        (df["Konto_Nr"].str.match(r"^[578]"))
+        & (df["Kostenstelle"].str.match(r"^[3]"))
+        & (df["Kostenträger"].str.match(r"^[^6]"))
+    )
+    df["Kostenträger"] = np.where(df["Teile_30_60"] == True, "60", df["Kostenträger"])
+
+    df["Service_40_70"] = (
+        (df["Konto_Nr"].str.match(r"^[578]"))
+        & (df["Kostenstelle"].str.match(r"^[4]"))
+        & (df["Kostenträger"].str.match(r"^[^7]"))
+    )
+    df["Kostenträger"] = np.where(df["Service_40_70"] == True, "70", df["Kostenträger"])
+
+    df["KRM"] = df["Marke"] + df["Standort"] + df["Kostenstelle"] + df["Absatzkanal"] + df["Kostenträger"]
+    df["Konto_Nr_SKR51"] = (
+        (df["Marke"] + "-" + df["Standort"] + "-" + df["Konto_Nr"])
+        + "-"
+        + (df["Kostenstelle"] + "-" + df["Absatzkanal"] + "-" + df["Kostenträger"])
+    )
+    df["IsNumeric"] = (
+        (df["KRM"].str.isdigit())
+        & (df["Konto_Nr"].str.isdigit())
+        & (df["Konto_Nr"].str.len() == 4)
+        # & (df["Konto_Nr_SKR51"].str.len() == 19)
+    )
+    df_invalid = df[df["IsNumeric"] == False]
+    df_invalid.to_csv(export_invalid_filename, decimal=",", sep=";", encoding="latin-1", index=False)
+    return df[df["IsNumeric"] == True][TRANSLATE]
+
+
+def extract_acct_info(df: pd.DataFrame) -> pd.DataFrame:
+    acct_info = [
+        "Marke",
+        "Standort",
+        "Konto_Nr",
+        "Kostenstelle",
+        "Absatzkanal",
+        "Kostenträger",
+    ]
+    df["HasFiveDashes"] = df["Konto_Nr_SKR51"].str.count("-") == 5
+    df["Invalid"] = "XX-XX-XXXX-XX-XX-XX"
+    df["Konto_Nr_SKR51"] = np.where(
+        df["HasFiveDashes"],
+        df["Konto_Nr_SKR51"],
+        df["Invalid"],
+    )
+    df[acct_info] = df["Konto_Nr_SKR51"].str.split(pat="-", n=6, expand=True)
+    return df

+ 8 - 1
gcstruct/tests/test_gchr.py

@@ -2,7 +2,8 @@ import unittest
 
 
 import pandas as pd
 import pandas as pd
 
 
-from gcstruct.gchr import GCHR, TRANSLATE
+from gcstruct.gchr import GCHR
+from gcstruct.gchr_translate import TRANSLATE
 
 
 
 
 class TestGchr(unittest.TestCase):
 class TestGchr(unittest.TestCase):
@@ -27,3 +28,9 @@ class TestGchr(unittest.TestCase):
         self.assertEqual(df.shape[1], 12)
         self.assertEqual(df.shape[1], 12)
         self.assertListEqual(list(df.columns), TRANSLATE)
         self.assertListEqual(list(df.columns), TRANSLATE)
         self.assertGreater(df.shape[0], 0, "Translation not empty")
         self.assertGreater(df.shape[0], 0, "Translation not empty")
+
+    def test_all_periods(self):
+        periods = GCHR.get_all_periods("2024-12-23")
+        self.assertEqual(len(periods), 12)
+        self.assertEqual(periods[0], ("2023", "12"))
+        self.assertEqual(periods[-1], ("2024", "11"))