Browse Source

Translation und Convert ausgelagert

gc-server3 3 months ago
parent
commit
60cc306dc5
4 changed files with 268 additions and 245 deletions
  1. 16 244
      gcstruct/gchr.py
  2. 28 0
      gcstruct/gchr_convert.py
  3. 216 0
      gcstruct/gchr_translate.py
  4. 8 1
      gcstruct/tests/test_gchr.py

+ 16 - 244
gcstruct/gchr.py

@@ -1,13 +1,10 @@
-import csv
 import logging
 import os
-import xml.etree.ElementTree as ET
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
 from typing import Callable
 
-import numpy as np
 import pandas as pd
 
 from gcstruct.gchr_export import (
@@ -16,29 +13,15 @@ from gcstruct.gchr_export import (
     export_skr51_xml,
     header,
 )
-
-TRANSLATE = [
-    "Konto_Nr_Händler",
-    "Konto_Nr_SKR51",
-    "Marke",
-    "Standort",
-    "Konto_Nr",
-    "Kostenstelle",
-    "Absatzkanal",
-    "Kostenträger",
-    "Kontoart",
-    "Konto_1",
-    "KRM",
-    "IsNumeric",
-]
+from gcstruct.gchr_translate import load_translation
 
 
 @dataclass
 class GchrConfig:
     first_month_of_financial_year: str
-    data_path: str
-    gcstruct_path: str
-    export_path: str
+    data_dir: str
+    gcstruct_dir: str
+    export_dir: str
     export_fn = Callable[[GchrExportConfig], None]
 
 
@@ -117,27 +100,17 @@ class GCHR:
         rename_to = ["OpeningBalance"] + ["Period" + str(i).zfill(2) for i in range(1, 13)]
         return dict(zip(period, rename_to))
 
-    @staticmethod
-    def extract_acct_info(df: pd.DataFrame) -> pd.DataFrame:
-        acct_info = [
-            "Marke",
-            "Standort",
-            "Konto_Nr",
-            "Kostenstelle",
-            "Absatzkanal",
-            "Kostenträger",
-        ]
-        df["HasFiveDashes"] = df["Konto_Nr_SKR51"].str.count("-") == 5
-        df["Invalid"] = "XX-XX-XXXX-XX-XX-XX"
-        df["Konto_Nr_SKR51"] = np.where(
-            df["HasFiveDashes"],
-            df["Konto_Nr_SKR51"],
-            df["Invalid"],
-        )
-        df[acct_info] = df["Konto_Nr_SKR51"].str.split(pat="-", n=6, expand=True)
-        return df
-
     def export_all_periods(self, overwrite=False, today=None) -> None:
+        periods = GCHR.get_all_periods(today)
+
+        for year, month in periods:
+            filename = self.export_filename_for_period(year, month)
+            if overwrite or not Path(filename).exists():
+                os.makedirs(Path(filename).parent.joinpath("info"), exist_ok=True)
+                self.export_period(year, month)
+
+    @staticmethod
+    def get_all_periods(today=None) -> list[tuple[str, str]]:
         dt = datetime.now()
         if today is not None:
             dt = datetime.fromisoformat(today)
@@ -145,11 +118,7 @@ class GCHR:
         periods = [(prev, str(x).zfill(2)) for x in range(dt.month, 13)] + [
             (str(dt.year), str(x).zfill(2)) for x in range(1, dt.month)
         ]
-        for year, month in periods:
-            filename = self.export_filename_for_period(year, month)
-            if overwrite or not Path(filename).exists():
-                os.makedirs(Path(filename).parent.joinpath("info"), exist_ok=True)
-                self.export_period(year, month)
+        return periods
 
     def export_period(self, year: str, month: str) -> str:
         self.set_bookkeep_period(year, month)
@@ -248,185 +217,11 @@ class GCHR:
     @property
     def df_translate(self) -> pd.DataFrame:
         if self._df_translate is None:
-            self.makes, self.sites, self._df_translate, self.df_translate2 = GCHR.load_translation(
+            self.makes, self.sites, self._df_translate, self.df_translate2 = load_translation(
                 self.account_translation, self.debug_file, self.export_invalid_filename
             )
         return self._df_translate
 
-    @staticmethod
-    def load_translation(
-        account_translation: str, debug_file: str, export_invalid_filename: str
-    ) -> tuple[dict, dict, pd.DataFrame, pd.DataFrame]:
-        df_translate_import = pd.read_csv(
-            account_translation,
-            decimal=",",
-            sep=";",
-            encoding="latin-1",
-            converters={i: str for i in range(0, 200)},
-        ).reset_index()
-
-        makes = GCHR.get_makes_from_translation(df_translate_import)
-        sites = GCHR.get_sites_from_translation(df_translate_import)
-
-        df_prepared = GCHR.prepare_translation(df_translate_import)
-        df_translate = GCHR.special_translation(df_prepared, makes, sites, debug_file, export_invalid_filename)
-        df_translate2 = (
-            df_translate.copy().drop(columns=["Konto_Nr_Händler"]).drop_duplicates().set_index("Konto_Nr_SKR51")
-        )
-        return (makes, sites, df_translate, df_translate2)
-
-    @staticmethod
-    def get_makes_from_translation(df_translate_import: pd.DataFrame) -> dict[str, str]:
-        df_makes = df_translate_import[["Marke", "Marke_HBV"]].copy().drop_duplicates()
-        df_makes = df_makes[df_makes["Marke_HBV"] != "0000"]
-        makes = dict([(e["Marke"], e["Marke_HBV"]) for e in df_makes.to_dict(orient="records")])
-        makes["99"] = "0000"
-        return makes
-
-    @staticmethod
-    def get_sites_from_translation(df_translate_import: pd.DataFrame) -> dict[str, str]:
-        df_sites = df_translate_import[["Marke", "Standort", "Standort_HBV"]].copy().drop_duplicates()
-        df_sites["Standort_HBV"] = np.where(df_sites["Standort_HBV"].str.len() != 6, "0000", df_sites["Standort_HBV"])
-        sites = dict(
-            [(e["Marke"] + "-" + e["Standort"], e["Standort_HBV"]) for e in df_sites.to_dict(orient="records")]
-        )
-        return sites
-
-    @staticmethod
-    def prepare_translation(df_translate_import: pd.DataFrame) -> pd.DataFrame:
-        df = df_translate_import[
-            [
-                "Konto_Nr_Händler",
-                "Konto_Nr_SKR51",
-            ]
-        ].drop_duplicates()
-        logging.info(df.shape)
-
-        row = {
-            "Konto_Nr_Händler": "01-01-0861-00-00-00",
-            "Konto_Nr_SKR51": "01-01-0861-00-00-00",
-        }
-        df = pd.concat([df, pd.DataFrame.from_records([row])])
-        df.set_index("Konto_Nr_Händler")
-        return df
-
-    @staticmethod
-    def special_translation(
-        df: pd.DataFrame, makes: dict[str, str], sites: dict[str, str], debug_file: str, export_invalid_filename: str
-    ) -> pd.DataFrame:
-        df["Konto_Nr_Händler"] = df["Konto_Nr_Händler"].str.upper()
-        df["Konto_Nr_SKR51"] = df["Konto_Nr_SKR51"].str.upper()
-        df = GCHR.extract_acct_info(df)
-        df["Konto_Nr"] = df["Konto_Nr"].str.upper()
-        logging.info(df.shape)
-        logging.info(df.columns)
-        logging.info(df.head())
-
-        logging.info("df: " + str(df.shape))
-        df["Bilanz"] = df["Konto_Nr"].str.match(r"^[013]")
-        df["Kontoart"] = np.where(df["Bilanz"], "1", "2")
-        df["Kontoart"] = np.where(df["Konto_Nr"].str.contains("_STK"), "3", df["Kontoart"])
-        df["Kontoart"] = np.where(df["Konto_Nr"].str.match(r"^[9]"), "3", df["Kontoart"])
-        df["Konto_1"] = df["Konto_Nr"].str.slice(0, 1)
-
-        # fehlende Marken- und Standortzuordnung
-        df["Marke"] = np.where(df["Marke"].isin(makes.keys()), df["Marke"], "99")
-        df["Marke_Standort"] = df["Marke"] + "-" + df["Standort"]
-        df["Standort"] = np.where(df["Marke_Standort"].isin(sites.keys()), df["Standort"], "01")
-
-        df_debug = df.drop(columns=["Bilanz"])
-        logging.info(df_debug.groupby(["Kontoart"]).aggregate("sum"))
-        logging.info(df_debug.groupby(["Kontoart", "Konto_1"]).aggregate("sum"))
-        logging.info(df_debug.groupby(["Konto_Nr"]).aggregate("sum"))
-        df_debug.groupby(["Konto_Nr"]).aggregate("sum").to_csv(debug_file, decimal=",", sep=";", encoding="latin-1")
-
-        # Bereinigung GW-Kostenträger
-        df["NW_Verkauf_1"] = (df["Konto_Nr"].str.match(r"^[78]0")) & (df["Kostenstelle"].str.match(r"^[^1]\d"))
-        df["Kostenstelle"] = np.where(df["NW_Verkauf_1"] == True, "11", df["Kostenstelle"])
-
-        df["Konto_7010"] = df["Konto_Nr"].str.match(r"^[78]01[01]")
-        df["Kostenstelle"] = np.where(df["Konto_7010"] == True, "14", df["Kostenstelle"])
-
-        df["GW_Verkauf_2"] = (df["Konto_Nr"].str.match(r"^[78]1")) & (df["Kostenstelle"].str.match(r"^[^2]\d"))
-        df["Kostenstelle"] = np.where(df["GW_Verkauf_2"] == True, "21", df["Kostenstelle"])
-
-        df["GW_Verkauf_3"] = (df["Konto_Nr"].str.match(r"^[78]3")) & (df["Kostenstelle"].str.match(r"^[^3]\d"))
-        df["Kostenstelle"] = np.where(df["GW_Verkauf_3"] == True, "31", df["Kostenstelle"])
-
-        df["GW_Verkauf_4"] = (df["Konto_Nr"].str.match(r"^[78]4")) & (df["Kostenstelle"].str.match(r"^[^4]\d"))
-        df["Kostenstelle"] = np.where(df["GW_Verkauf_4"] == True, "41", df["Kostenstelle"])
-
-        df["GW_Verkauf_x420"] = df["Konto_Nr"].str.match(r"^[78]420")
-        df["Kostenstelle"] = np.where(df["GW_Verkauf_x420"] == True, "42", df["Kostenstelle"])
-
-        df["GW_Verkauf_5"] = (df["Konto_Nr"].str.match(r"^[78]5")) & (df["Kostenstelle"].str.match(r"^[^5]\d"))
-        df["Kostenstelle"] = np.where(df["GW_Verkauf_5"] == True, "51", df["Kostenstelle"])
-
-        df["GW_Verkauf_50"] = (df["Konto_Nr"].str.match(r"^[78]")) & (df["Kostenstelle"].str.match(r"^2"))
-        df["Kostenträger"] = np.where(df["GW_Verkauf_50"] == True, "52", df["Kostenträger"])
-        df["Kostenträger"] = np.where(
-            (df["GW_Verkauf_50"] == True) & (df["Marke"] == "01"),
-            "50",
-            df["Kostenträger"],
-        )
-
-        df["NW_Verkauf_00"] = (
-            (df["Konto_Nr"].str.match(r"^[78]2"))
-            & (df["Kostenstelle"].str.match(r"^1"))
-            & (df["Kostenträger"].str.match(r"^[^01234]"))
-        )
-        df["Kostenträger"] = np.where(df["NW_Verkauf_00"] == True, "00", df["Kostenträger"])
-
-        df["GW_Stk_50"] = (df["Konto_Nr"].str.match(r"^9130")) & (df["Kostenstelle"].str.match(r"^2"))
-        df["Kostenträger"] = np.where(df["GW_Stk_50"] == True, "52", df["Kostenträger"])
-        df["Kostenträger"] = np.where((df["GW_Stk_50"] == True) & (df["Marke"] == "01"), "50", df["Kostenträger"])
-
-        df["Kostenträger"] = np.where(df["Bilanz"] == True, "00", df["Kostenträger"])
-
-        df["Konto_5er"] = (df["Konto_Nr"].str.match("^5")) | (df["Konto_Nr"].str.match("^9143"))
-        df["Absatzkanal"] = np.where(df["Konto_5er"] == True, "99", df["Absatzkanal"])
-
-        df["Konto_5005"] = (df["Konto_Nr"].str.match("^5005")) & (df["Kostenstelle"].str.match(r"^[^12]"))
-        df["Kostenstelle"] = np.where(df["Konto_5005"] == True, "20", df["Kostenstelle"])
-        df["Kostenträger"] = np.where(df["Konto_5005"] == True, "50", df["Kostenträger"])
-
-        df["Konto_5007"] = (df["Konto_Nr"].str.match("^5007")) & (df["Kostenstelle"].str.match(r"^([^4]|42)"))
-        df["Kostenstelle"] = np.where(df["Konto_5007"] == True, "41", df["Kostenstelle"])
-        df["Kostenträger"] = np.where(df["Konto_5007"] == True, "70", df["Kostenträger"])
-
-        df["Konto_914er"] = (df["Konto_Nr"].str.match("^914[34]")) & (df["Kostenträger"].str.match(r"^[^7]"))
-        df["Kostenträger"] = np.where(df["Konto_914er"] == True, "70", df["Kostenträger"])
-
-        df["Teile_30_60"] = (
-            (df["Konto_Nr"].str.match(r"^[578]"))
-            & (df["Kostenstelle"].str.match(r"^[3]"))
-            & (df["Kostenträger"].str.match(r"^[^6]"))
-        )
-        df["Kostenträger"] = np.where(df["Teile_30_60"] == True, "60", df["Kostenträger"])
-
-        df["Service_40_70"] = (
-            (df["Konto_Nr"].str.match(r"^[578]"))
-            & (df["Kostenstelle"].str.match(r"^[4]"))
-            & (df["Kostenträger"].str.match(r"^[^7]"))
-        )
-        df["Kostenträger"] = np.where(df["Service_40_70"] == True, "70", df["Kostenträger"])
-
-        df["KRM"] = df["Marke"] + df["Standort"] + df["Kostenstelle"] + df["Absatzkanal"] + df["Kostenträger"]
-        df["Konto_Nr_SKR51"] = (
-            (df["Marke"] + "-" + df["Standort"] + "-" + df["Konto_Nr"])
-            + "-"
-            + (df["Kostenstelle"] + "-" + df["Absatzkanal"] + "-" + df["Kostenträger"])
-        )
-        df["IsNumeric"] = (
-            (df["KRM"].str.isdigit())
-            & (df["Konto_Nr"].str.isdigit())
-            & (df["Konto_Nr"].str.len() == 4)
-            # & (df["Konto_Nr_SKR51"].str.len() == 19)
-        )
-        df_invalid = df[df["IsNumeric"] == False]
-        df_invalid.to_csv(export_invalid_filename, decimal=",", sep=";", encoding="latin-1", index=False)
-        return df[df["IsNumeric"] == True][TRANSLATE]
-
     def load_bookings_from_file(self) -> None:
         df_list: list[pd.DataFrame] = []
         timestamps: list[float] = []
@@ -530,29 +325,6 @@ class GCHR:
     def export_filename_for_period(self, year: str, month: str) -> str:
         return f"{self.base_dir}/Export/{year}/export_{year}-{month}.xml"
 
-    @staticmethod
-    def convert_to_row(node: list[ET.Element]) -> list[str]:
-        return [child.text for child in node]
-
-    @staticmethod
-    def convert_xml_to_csv(xmlfile: str, csvfile: str) -> bool:
-        with open(xmlfile) as frh:
-            record_list = ET.parse(frh).getroot().find("RecordList")
-        header = [child.tag for child in record_list.find("Record")]
-        bookings = [GCHR.convert_to_row(node) for node in record_list.findall("Record")]
-        with open(csvfile, "w") as fwh:
-            cwh = csv.writer(fwh, delimiter=";")
-            cwh.writerow(header)
-            cwh.writerows(bookings)
-        return True
-
-    def convert_csv_to_xml(self, csvfile: str, xmlfile: str) -> None:
-        self.makes = {"01": "1844"}
-        self.sites = {"01-01": "1844"}
-        with open(csvfile, "r", encoding="latin-1") as frh:
-            csv_reader = csv.DictReader(frh, delimiter=";")
-            GCHR.export_skr51_xml(csv_reader, self.bookkeep_filter(), 1, list(self.sites.values())[0], xmlfile)
-
 
 def gchr_local() -> None:
     base_dir = os.getcwd() + "/../GCHR2_Testdaten/Kunden"

+ 28 - 0
gcstruct/gchr_convert.py

@@ -0,0 +1,28 @@
+import csv
+import xml.etree.ElementTree as ET
+
+from gcstruct.gchr import GCHR
+
+
+def convert_to_row(node: list[ET.Element]) -> list[str]:
+    return [child.text for child in node]
+
+
+def convert_xml_to_csv(xmlfile: str, csvfile: str) -> bool:
+    with open(xmlfile) as frh:
+        record_list = ET.parse(frh).getroot().find("RecordList")
+    header = [child.tag for child in record_list.find("Record")]
+    bookings = [GCHR.convert_to_row(node) for node in record_list.findall("Record")]
+    with open(csvfile, "w") as fwh:
+        cwh = csv.writer(fwh, delimiter=";")
+        cwh.writerow(header)
+        cwh.writerows(bookings)
+    return True
+
+
+def convert_csv_to_xml(self, csvfile: str, xmlfile: str) -> None:
+    self.makes = {"01": "1844"}
+    self.sites = {"01-01": "1844"}
+    with open(csvfile, "r", encoding="latin-1") as frh:
+        csv_reader = csv.DictReader(frh, delimiter=";")
+        GCHR.export_skr51_xml(csv_reader, self.bookkeep_filter(), 1, list(self.sites.values())[0], xmlfile)

+ 216 - 0
gcstruct/gchr_translate.py

@@ -0,0 +1,216 @@
+import logging
+
+import numpy as np
+import pandas as pd
+
+TRANSLATE = [
+    "Konto_Nr_Händler",
+    "Konto_Nr_SKR51",
+    "Marke",
+    "Standort",
+    "Konto_Nr",
+    "Kostenstelle",
+    "Absatzkanal",
+    "Kostenträger",
+    "Kontoart",
+    "Konto_1",
+    "KRM",
+    "IsNumeric",
+]
+
+
+def load_translation(
+    account_translation: str, debug_file: str, export_invalid_filename: str
+) -> tuple[dict[str, str], dict[str, str], pd.DataFrame, pd.DataFrame]:
+    df_translate_import = pd.read_csv(
+        account_translation,
+        decimal=",",
+        sep=";",
+        encoding="latin-1",
+        converters={i: str for i in range(0, 200)},
+    ).reset_index()
+
+    makes = get_makes_from_translation(df_translate_import)
+    sites = get_sites_from_translation(df_translate_import)
+
+    df_prepared = prepare_translation(df_translate_import)
+    df_translate = special_translation(df_prepared, makes, sites, debug_file, export_invalid_filename)
+    df_translate2 = df_translate.copy().drop(columns=["Konto_Nr_Händler"]).drop_duplicates().set_index("Konto_Nr_SKR51")
+
+    df_translate3 = (
+        df_translate[["Kontoart", "Konto_Nr_SKR51", "Konto_Nr_Händler"]]
+        .copy()
+        .sort_values(by=["Kontoart", "Konto_Nr_SKR51"])
+    )
+    df_translate3.to_csv(account_translation[:-4] + "_GCHR.csv", decimal=",", sep=";", encoding="latin-1", index=False)
+    return (makes, sites, df_translate, df_translate2)
+
+
+def get_makes_from_translation(df_translate_import: pd.DataFrame) -> dict[str, str]:
+    df_makes = df_translate_import[["Marke", "Marke_HBV"]].copy().drop_duplicates()
+    df_makes = df_makes[df_makes["Marke_HBV"] != "0000"]
+    makes = dict([(e["Marke"], e["Marke_HBV"]) for e in df_makes.to_dict(orient="records")])
+    makes["99"] = "0000"
+    return makes
+
+
+def get_sites_from_translation(df_translate_import: pd.DataFrame) -> dict[str, str]:
+    df_sites = df_translate_import[["Marke", "Standort", "Standort_HBV"]].copy().drop_duplicates()
+    df_sites["Standort_HBV"] = np.where(df_sites["Standort_HBV"].str.len() != 6, "0000", df_sites["Standort_HBV"])
+    sites = dict([(e["Marke"] + "-" + e["Standort"], e["Standort_HBV"]) for e in df_sites.to_dict(orient="records")])
+    return sites
+
+
+def prepare_translation(df_translate_import: pd.DataFrame) -> pd.DataFrame:
+    df = df_translate_import[
+        [
+            "Konto_Nr_Händler",
+            "Konto_Nr_SKR51",
+        ]
+    ].drop_duplicates()
+    logging.info(df.shape)
+
+    row = {
+        "Konto_Nr_Händler": "01-01-0861-00-00-00",
+        "Konto_Nr_SKR51": "01-01-0861-00-00-00",
+    }
+    df = pd.concat([df, pd.DataFrame.from_records([row])])
+    df.set_index("Konto_Nr_Händler")
+    return df
+
+
+def special_translation(
+    df: pd.DataFrame, makes: dict[str, str], sites: dict[str, str], debug_file: str, export_invalid_filename: str
+) -> pd.DataFrame:
+    df["Konto_Nr_Händler"] = df["Konto_Nr_Händler"].str.upper()
+    df["Konto_Nr_SKR51"] = df["Konto_Nr_SKR51"].str.upper()
+    df = extract_acct_info(df)
+    df["Konto_Nr"] = df["Konto_Nr"].str.upper()
+    logging.info(df.shape)
+    logging.info(df.columns)
+    logging.info(df.head())
+
+    logging.info("df: " + str(df.shape))
+    df["Bilanz"] = df["Konto_Nr"].str.match(r"^[013]")
+    df["Kontoart"] = np.where(df["Bilanz"], "1", "2")
+    df["Kontoart"] = np.where(df["Konto_Nr"].str.contains("_STK"), "3", df["Kontoart"])
+    df["Kontoart"] = np.where(df["Konto_Nr"].str.match(r"^[9]"), "3", df["Kontoart"])
+    df["Konto_1"] = df["Konto_Nr"].str.slice(0, 1)
+
+    # fehlende Marken- und Standortzuordnung
+    df["Marke"] = np.where(df["Marke"].isin(makes.keys()), df["Marke"], "99")
+    df["Marke_Standort"] = df["Marke"] + "-" + df["Standort"]
+    df["Standort"] = np.where(df["Marke_Standort"].isin(sites.keys()), df["Standort"], "01")
+
+    df_debug = df.drop(columns=["Bilanz"])
+    logging.info(df_debug.groupby(["Kontoart"]).aggregate("sum"))
+    logging.info(df_debug.groupby(["Kontoart", "Konto_1"]).aggregate("sum"))
+    logging.info(df_debug.groupby(["Konto_Nr"]).aggregate("sum"))
+    df_debug.groupby(["Konto_Nr"]).aggregate("sum").to_csv(debug_file, decimal=",", sep=";", encoding="latin-1")
+
+    # Bereinigung GW-Kostenträger
+    df["NW_Verkauf_1"] = (df["Konto_Nr"].str.match(r"^[78]0")) & (df["Kostenstelle"].str.match(r"^[^1]\d"))
+    df["Kostenstelle"] = np.where(df["NW_Verkauf_1"] == True, "11", df["Kostenstelle"])
+
+    df["Konto_7010"] = df["Konto_Nr"].str.match(r"^[78]01[01]")
+    df["Kostenstelle"] = np.where(df["Konto_7010"] == True, "14", df["Kostenstelle"])
+
+    df["GW_Verkauf_2"] = (df["Konto_Nr"].str.match(r"^[78]1")) & (df["Kostenstelle"].str.match(r"^[^2]\d"))
+    df["Kostenstelle"] = np.where(df["GW_Verkauf_2"] == True, "21", df["Kostenstelle"])
+
+    df["GW_Verkauf_3"] = (df["Konto_Nr"].str.match(r"^[78]3")) & (df["Kostenstelle"].str.match(r"^[^3]\d"))
+    df["Kostenstelle"] = np.where(df["GW_Verkauf_3"] == True, "31", df["Kostenstelle"])
+
+    df["GW_Verkauf_4"] = (df["Konto_Nr"].str.match(r"^[78]4")) & (df["Kostenstelle"].str.match(r"^[^4]\d"))
+    df["Kostenstelle"] = np.where(df["GW_Verkauf_4"] == True, "41", df["Kostenstelle"])
+
+    df["GW_Verkauf_x420"] = df["Konto_Nr"].str.match(r"^[78]420")
+    df["Kostenstelle"] = np.where(df["GW_Verkauf_x420"] == True, "42", df["Kostenstelle"])
+
+    df["GW_Verkauf_5"] = (df["Konto_Nr"].str.match(r"^[78]5")) & (df["Kostenstelle"].str.match(r"^[^5]\d"))
+    df["Kostenstelle"] = np.where(df["GW_Verkauf_5"] == True, "51", df["Kostenstelle"])
+
+    df["GW_Verkauf_50"] = (df["Konto_Nr"].str.match(r"^[78]")) & (df["Kostenstelle"].str.match(r"^2"))
+    df["Kostenträger"] = np.where(df["GW_Verkauf_50"] == True, "52", df["Kostenträger"])
+    df["Kostenträger"] = np.where(
+        (df["GW_Verkauf_50"] == True) & (df["Marke"] == "01"),
+        "50",
+        df["Kostenträger"],
+    )
+
+    df["NW_Verkauf_00"] = (
+        (df["Konto_Nr"].str.match(r"^[78]2"))
+        & (df["Kostenstelle"].str.match(r"^1"))
+        & (df["Kostenträger"].str.match(r"^[^01234]"))
+    )
+    df["Kostenträger"] = np.where(df["NW_Verkauf_00"] == True, "00", df["Kostenträger"])
+
+    df["GW_Stk_50"] = (df["Konto_Nr"].str.match(r"^9130")) & (df["Kostenstelle"].str.match(r"^2"))
+    df["Kostenträger"] = np.where(df["GW_Stk_50"] == True, "52", df["Kostenträger"])
+    df["Kostenträger"] = np.where((df["GW_Stk_50"] == True) & (df["Marke"] == "01"), "50", df["Kostenträger"])
+
+    df["Kostenträger"] = np.where(df["Bilanz"] == True, "00", df["Kostenträger"])
+
+    df["Konto_5er"] = (df["Konto_Nr"].str.match("^5")) | (df["Konto_Nr"].str.match("^9143"))
+    df["Absatzkanal"] = np.where(df["Konto_5er"] == True, "99", df["Absatzkanal"])
+
+    df["Konto_5005"] = (df["Konto_Nr"].str.match("^5005")) & (df["Kostenstelle"].str.match(r"^[^12]"))
+    df["Kostenstelle"] = np.where(df["Konto_5005"] == True, "20", df["Kostenstelle"])
+    df["Kostenträger"] = np.where(df["Konto_5005"] == True, "50", df["Kostenträger"])
+
+    df["Konto_5007"] = (df["Konto_Nr"].str.match("^5007")) & (df["Kostenstelle"].str.match(r"^([^4]|42)"))
+    df["Kostenstelle"] = np.where(df["Konto_5007"] == True, "41", df["Kostenstelle"])
+    df["Kostenträger"] = np.where(df["Konto_5007"] == True, "70", df["Kostenträger"])
+
+    df["Konto_914er"] = (df["Konto_Nr"].str.match("^914[34]")) & (df["Kostenträger"].str.match(r"^[^7]"))
+    df["Kostenträger"] = np.where(df["Konto_914er"] == True, "70", df["Kostenträger"])
+
+    df["Teile_30_60"] = (
+        (df["Konto_Nr"].str.match(r"^[578]"))
+        & (df["Kostenstelle"].str.match(r"^[3]"))
+        & (df["Kostenträger"].str.match(r"^[^6]"))
+    )
+    df["Kostenträger"] = np.where(df["Teile_30_60"] == True, "60", df["Kostenträger"])
+
+    df["Service_40_70"] = (
+        (df["Konto_Nr"].str.match(r"^[578]"))
+        & (df["Kostenstelle"].str.match(r"^[4]"))
+        & (df["Kostenträger"].str.match(r"^[^7]"))
+    )
+    df["Kostenträger"] = np.where(df["Service_40_70"] == True, "70", df["Kostenträger"])
+
+    df["KRM"] = df["Marke"] + df["Standort"] + df["Kostenstelle"] + df["Absatzkanal"] + df["Kostenträger"]
+    df["Konto_Nr_SKR51"] = (
+        (df["Marke"] + "-" + df["Standort"] + "-" + df["Konto_Nr"])
+        + "-"
+        + (df["Kostenstelle"] + "-" + df["Absatzkanal"] + "-" + df["Kostenträger"])
+    )
+    df["IsNumeric"] = (
+        (df["KRM"].str.isdigit())
+        & (df["Konto_Nr"].str.isdigit())
+        & (df["Konto_Nr"].str.len() == 4)
+        # & (df["Konto_Nr_SKR51"].str.len() == 19)
+    )
+    df_invalid = df[df["IsNumeric"] == False]
+    df_invalid.to_csv(export_invalid_filename, decimal=",", sep=";", encoding="latin-1", index=False)
+    return df[df["IsNumeric"] == True][TRANSLATE]
+
+
+def extract_acct_info(df: pd.DataFrame) -> pd.DataFrame:
+    acct_info = [
+        "Marke",
+        "Standort",
+        "Konto_Nr",
+        "Kostenstelle",
+        "Absatzkanal",
+        "Kostenträger",
+    ]
+    df["HasFiveDashes"] = df["Konto_Nr_SKR51"].str.count("-") == 5
+    df["Invalid"] = "XX-XX-XXXX-XX-XX-XX"
+    df["Konto_Nr_SKR51"] = np.where(
+        df["HasFiveDashes"],
+        df["Konto_Nr_SKR51"],
+        df["Invalid"],
+    )
+    df[acct_info] = df["Konto_Nr_SKR51"].str.split(pat="-", n=6, expand=True)
+    return df

+ 8 - 1
gcstruct/tests/test_gchr.py

@@ -2,7 +2,8 @@ import unittest
 
 import pandas as pd
 
-from gcstruct.gchr import GCHR, TRANSLATE
+from gcstruct.gchr import GCHR
+from gcstruct.gchr_translate import TRANSLATE
 
 
 class TestGchr(unittest.TestCase):
@@ -27,3 +28,9 @@ class TestGchr(unittest.TestCase):
         self.assertEqual(df.shape[1], 12)
         self.assertListEqual(list(df.columns), TRANSLATE)
         self.assertGreater(df.shape[0], 0, "Translation not empty")
+
+    def test_all_periods(self):
+        periods = GCHR.get_all_periods("2024-12-23")
+        self.assertEqual(len(periods), 12)
+        self.assertEqual(periods[0], ("2023", "12"))
+        self.assertEqual(periods[-1], ("2024", "11"))