Prechádzať zdrojové kódy

Filter für tatsächlich benutzte Marken und Standorte

gc-server3 1 rok pred
rodič
commit
ba5a84a467
1 zmenil súbory, kde vykonal 137 pridanie a 92 odobranie
  1. 137 92
      gcstruct/gchr.py

+ 137 - 92
gcstruct/gchr.py

@@ -19,10 +19,29 @@ ACCOUNT_INFO = [
     "CostAccountingString",
 ]
 
+TRANSLATE = [
+    "Konto_Nr_Händler",
+    "Konto_Nr_SKR51",
+    "Marke",
+    "Standort",
+    "Konto_Nr",
+    "Kostenstelle",
+    "Absatzkanal",
+    "Kostenträger",
+    "Kontoart",
+    "Konto_1",
+    "KRM",
+    "IsNumeric",
+]
+
 
 class GCHR:
     booking_date: datetime
     df_bookings: pd.DataFrame = None
+    df_translate: pd.DataFrame = None
+    df_translate2: pd.DataFrame = None
+    makes: dict[str, str] = None
+    sites: dict[str, str] = None
 
     def __init__(self, base_dir) -> None:
         self.base_dir = base_dir
@@ -48,21 +67,21 @@ class GCHR:
         )
         self.debug_file = f"{self.export_info_dir}/debug_{period}.csv"
         self.account_ignored = f"{self.export_info_dir}/ignoriert_{period}.csv"
-        self.account_invalid = f"{self.export_info_dir}/ungueltig_{period}.csv"
+        # self.account_invalid = f"{self.export_info_dir}/ungueltig_{period}.csv"
 
         self.last_year = str(int(self.current_year) - 1)
         self.last_year2 = str(int(self.current_year) - 2)
         self.next_year = str(int(self.current_year) + 1)
 
-    def header(self, makes, sites, main_site):
+    def header(self, makes_used, sites_used, main_site):
         return {
             "Country": "DE",
             "MainBmCode": main_site,
             "Month": self.current_month,
             "Year": self.current_year,
             "Currency": "EUR",
-            "NumberOfMakes": len(makes),
-            "NumberOfSites": len(sites),
+            "NumberOfMakes": len(makes_used),
+            "NumberOfSites": len(sites_used),
             "ExtractionDate": self.booking_date.strftime("%d.%m.%Y"),
             "ExtractionTime": self.booking_date.strftime("%H:%M:%S"),
             "BeginFiscalYear": self.first_month_of_financial_year,
@@ -91,7 +110,13 @@ class GCHR:
             "Absatzkanal",
             "Kostenträger",
         ]
-        df["Konto_Nr_SKR51"] = df.index
+        df["HasFiveDashes"] = df["Konto_Nr_SKR51"].str.count("-") == 5
+        df["Invalid"] = "XX-XX-XXXX-XX-XX-XX"
+        df["Konto_Nr_SKR51"] = np.where(
+            df["HasFiveDashes"],
+            df["Konto_Nr_SKR51"],
+            df["Invalid"],
+        )
         df[acct_info] = df["Konto_Nr_SKR51"].str.split(pat="-", n=6, expand=True)
         return df
 
@@ -112,14 +137,7 @@ class GCHR:
     def export_period(self, year, month):
         self.set_bookkeep_period(year, month)
         # Übersetzungstabelle laden
-        df_translate_import = pd.read_csv(
-            self.account_translation,
-            decimal=",",
-            sep=";",
-            encoding="latin-1",
-            converters={i: str for i in range(0, 200)},
-        )
-        df_translate = self.prepare_translation(df_translate_import)
+        self.get_translation()
 
         # Kontensalden laden
         df_bookings = self.filter_bookings()
@@ -135,28 +153,9 @@ class GCHR:
 
         logging.info("df_bookings: " + str(df_bookings.shape))
         # Join auf Übersetzung
-        df_combined = df_bookings.merge(df_translate, how="inner", on="Konto_Nr_Händler")
+        df_combined = df_bookings.merge(self.df_translate, how="inner", on="Konto_Nr_Händler")
         logging.info(f"df_combined: {df_combined.shape}")
 
-        # Hack für fehlende Markenzuordnung
-        df_combined["Fremdmarke"] = df_combined["Marke_HBV"].str.match(r"^0000")
-        df_combined["Marke"] = np.where(df_combined["Fremdmarke"], "99", df_combined["Marke"])
-
-        df_combined["Standort_egal"] = df_combined["Standort_HBV"].str.match(r"^\d\d_")
-        df_combined["Standort_HBV"] = np.where(
-            df_combined["Fremdmarke"] | df_combined["Standort_egal"],
-            "0000",
-            df_combined["Standort_HBV"],
-        )
-
-        makes = df_combined[["Marke", "Marke_HBV"]].drop_duplicates().sort_values(by=["Marke"])
-        sites = (
-            df_combined[["Marke", "Standort", "Standort_HBV"]].drop_duplicates().sort_values(by=["Marke", "Standort"])
-        )
-
-        # df_combined.to_csv(account_invalid, decimal=',', sep=';', encoding='latin-1', index=False)
-        # Gruppieren
-        # df_grouped = df_combined.groupby(['Konto_Nr_SKR51', 'period']).sum()
         df_pivot = df_combined.pivot_table(
             index=["Konto_Nr_SKR51"],
             columns=["period"],
@@ -169,85 +168,109 @@ class GCHR:
 
         logging.info("df_pivot: " + str(df_pivot.shape))
 
-        df = self.special_translation(df_pivot, makes)
+        df = df_pivot.merge(self.df_translate2, how="inner", on="Konto_Nr_SKR51")
+
+        makes_used = sorted(list(set(df["Marke"].to_list())))
+        sites_used = sorted(list(set((df["Marke"] + "-" + df["Standort"]).to_list())))
 
         from_label = ["Marke", "Standort", "Konto_Nr", "Kostenstelle", "Absatzkanal", "Kostenträger", "KRM"]
         to_label = ["Make", "Site", "Account", "Origin", "SalesChannel", "CostCarrier", "CostAccountingString"]
         col_dict = dict(zip(from_label, to_label))
         df = df.rename(columns=col_dict)
-        makes = makes.rename(columns=col_dict).to_dict(orient="records")
-        sites = sites.rename(columns=col_dict).to_dict(orient="records")
 
-        df_invalid = df[df["IsNumeric"] == False]
-        df_invalid.to_csv(self.account_invalid, decimal=",", sep=";", encoding="latin-1", index=False)
         export_csv = self.export_filename[:-4] + ".csv"
-
         df.to_csv(export_csv, decimal=",", sep=";", encoding="latin-1", index=False)
         df = df[df["IsNumeric"] != False].groupby(ACCOUNT_INFO, as_index=False).aggregate("sum")
+
         # Infos ergänzen
         df["Decimals"] = 2
-        # df['OpeningBalance'] = 0.0
+        # df.sort_values(by=["Konto_Nr_SKR51"], inplace=True)
         logging.info(df.shape)
-        self.export_xml(
-            df.to_dict(orient="records"), self.bookkeep_filter, period_no, makes, sites, sites[0]["Standort_HBV"]
-        )
+        main_sites = [self.sites[s] for s in sites_used if s in self.sites and self.sites[s] != "0000"]
+
+        for i, main_site in enumerate(main_sites):
+            filename = self.export_filename
+            if i > 0:
+                filename = f"{filename[:-4]}_{main_site}.xml"
+            self.export_xml(
+                df.to_dict(orient="records"),
+                self.bookkeep_filter,
+                period_no,
+                makes_used,
+                sites_used,
+                main_site,
+                filename,
+            )
 
         # Join auf Übersetzung - nicht zugeordnet
-        df_ignored = df_bookings.merge(df_translate, how="left", on="Konto_Nr_Händler")
-        df_ignored = df_ignored[
-            df_ignored["Konto_Nr_SKR51"].isna()
-        ]  # [['Konto_Nr_Händler', 'Bookkeep Period', 'amount', 'quantity']]
+        df_ignored = df_bookings.merge(self.df_translate, how="left", on="Konto_Nr_Händler")
+        df_ignored = df_ignored[df_ignored["Konto_Nr_SKR51"].isna()]
         if not df_ignored.empty:
             df_ignored = df_ignored.pivot_table(
                 index=["Konto_Nr_Händler"],
                 columns=["period"],
                 values="amount",
-                aggfunc=np.sum,
+                aggfunc="sum",
                 margins=True,
                 margins_name="CumulatedYear",
             )
             df_ignored.to_csv(self.account_ignored, decimal=",", sep=";", encoding="latin-1")
         return self.export_filename
 
-    def prepare_translation(self, df_translate: pd.DataFrame):
-        logging.info(df_translate.shape)
-        df_translate["duplicated"] = df_translate.duplicated()
-        logging.info(df_translate[df_translate["duplicated"]])
-        df_translate = df_translate[
+    def get_translation(self):
+        if self.df_translate is None:
+            df_translate_import = pd.read_csv(
+                self.account_translation,
+                decimal=",",
+                sep=";",
+                encoding="latin-1",
+                converters={i: str for i in range(0, 200)},
+            ).reset_index()
+
+            df_makes = df_translate_import[["Marke", "Marke_HBV"]].copy().drop_duplicates()
+            df_makes = df_makes[df_makes["Marke_HBV"] != "0000"]
+            self.makes = dict([(e["Marke"], e["Marke_HBV"]) for e in df_makes.to_dict(orient="records")])
+            self.makes["99"] = "0000"
+            df_sites = df_translate_import[["Marke", "Standort", "Standort_HBV"]].copy().drop_duplicates()
+            df_sites["Standort_HBV"] = np.where(
+                df_sites["Standort_HBV"].str.len() != 6, "0000", df_sites["Standort_HBV"]
+            )
+            self.sites = dict(
+                [(e["Marke"] + "-" + e["Standort"], e["Standort_HBV"]) for e in df_sites.to_dict(orient="records")]
+            )
+
+            df_prepared = self.prepare_translation(df_translate_import)
+            self.df_translate = self.special_translation(df_prepared)
+            self.df_translate2 = (
+                self.df_translate.drop(columns=["Konto_Nr_Händler"])
+                .copy()
+                .drop_duplicates()
+                .set_index("Konto_Nr_SKR51")
+            )
+        return self.df_translate
+
+    def prepare_translation(self, df_translate_import: pd.DataFrame):
+        df_translate = df_translate_import[
             [
                 "Konto_Nr_Händler",
                 "Konto_Nr_SKR51",
-                "Marke",
-                "Marke_HBV",
-                "Standort",
-                "Standort_HBV",
             ]
-        ]
-
-        row = (
-            df_translate[["Marke", "Marke_HBV", "Standort", "Standort_HBV"]]
-            .drop_duplicates()
-            .sort_values(by=["Marke", "Standort"])
-            .iloc[:1]
-            .to_dict(orient="records")[0]
-        )
-        row["Konto_Nr_Händler"] = "01-01-0861-00-00-00"
-        row["Konto_Nr_SKR51"] = "01-01-0861-00-00-00"
+        ].drop_duplicates()
+        logging.info(df_translate.shape)
 
+        row = {
+            "Konto_Nr_Händler": "01-01-0861-00-00-00",
+            "Konto_Nr_SKR51": "01-01-0861-00-00-00",
+        }
         df_translate = pd.concat([df_translate, pd.DataFrame.from_records([row])])
-        # print(df_translate.tail())
-        # df_translate.drop(columns=['duplicated'], inplace=True)
-        df_translate.drop_duplicates(inplace=True)
         df_translate.set_index("Konto_Nr_Händler")
         return df_translate
 
-    def special_translation(self, df: pd.DataFrame, makes: pd.DataFrame):
+    def special_translation(self, df: pd.DataFrame):
         df = self.extract_acct_info(df)
-        # df = df_translate.reset_index(drop=True).drop(columns=['Kostenträger_Ebene']).drop_duplicates()
         logging.info(df.shape)
         logging.info(df.columns)
         logging.info(df.head())
-        # df = df.merge(df_translate, how='inner', on='Konto_Nr_SKR51')
 
         logging.info("df: " + str(df.shape))
         df["Bilanz"] = df["Konto_Nr"].str.match(r"^[013]")
@@ -256,9 +279,10 @@ class GCHR:
         df["Kontoart"] = np.where(df["Konto_Nr"].str.match(r"^[9]"), "3", df["Kontoart"])
         df["Konto_1"] = df["Konto_Nr"].str.slice(0, 1)
 
-        # Hack für fehlende Markenzuordnung
-        df = df.merge(makes, how="left", on="Marke")
-        df["Marke"] = np.where(df["Marke_HBV"].isna(), "99", df["Marke"])
+        # fehlende Marken- und Standortzuordnung
+        df["Marke"] = np.where(df["Marke"].isin(self.makes.keys()), df["Marke"], "99")
+        df["Marke_Standort"] = df["Marke"] + "-" + df["Standort"]
+        df["Standort"] = np.where(df["Marke_Standort"].isin(self.sites.keys()), df["Standort"], "01")
 
         df_debug = df.drop(columns=["Bilanz"])
         logging.info(df_debug.groupby(["Kontoart"]).aggregate("sum"))
@@ -340,8 +364,20 @@ class GCHR:
         df["Kostenträger"] = np.where(df["Service_40_70"] == True, "70", df["Kostenträger"])
 
         df["KRM"] = df["Marke"] + df["Standort"] + df["Kostenstelle"] + df["Absatzkanal"] + df["Kostenträger"]
-        df["IsNumeric"] = (df["KRM"].str.isdigit()) & (df["Konto_Nr"].str.isdigit()) & (df["Konto_Nr"].str.len() == 4)
-        return df
+        df["Konto_Nr_SKR51"] = (
+            (df["Marke"] + "-" + df["Standort"] + "-" + df["Konto_Nr"])
+            + "-"
+            + (df["Kostenstelle"] + "-" + df["Absatzkanal"] + "-" + df["Kostenträger"])
+        )
+        df["IsNumeric"] = (
+            (df["KRM"].str.isdigit())
+            & (df["Konto_Nr"].str.isdigit())
+            & (df["Konto_Nr"].str.len() == 4)
+            # & (df["Konto_Nr_SKR51"].str.len() == 19)
+        )
+        df_invalid = df[df["IsNumeric"] == False]
+        df_invalid.to_csv(self.export_invalid_filename, decimal=",", sep=";", encoding="latin-1", index=False)
+        return df[df["IsNumeric"] == True][TRANSLATE]
 
     def load_bookings_from_file(self):
         df2 = []
@@ -430,28 +466,37 @@ class GCHR:
     def export_info_dir(self):
         return f"{self.base_dir}/Export/{self.current_year}/info/"
 
+    @property
+    def export_invalid_filename(self):
+        return f"{self.base_dir}/Export/ungueltig.csv"
+
     def export_filename_for_period(self, year, month):
         return f"{self.base_dir}/Export/{year}/export_{year}-{month}.xml"
 
-    def export_xml(self, records, bk_filter, period_no, makes, sites, main_site):
+    def export_xml(self, records, bk_filter, period_no, makes_used, sites_used, main_site, filename):
         record_elements = ACCOUNT_INFO + ["Decimals"] + list(bk_filter.values())[:period_no] + ["CumulatedYear"]
         root = ET.Element("HbvData")
         h = ET.SubElement(root, "Header")
-        for k, v in self.header(makes, sites, main_site).items():
+        for k, v in self.header(makes_used, sites_used, main_site).items():
             ET.SubElement(h, k).text = str(v)
 
         make_list = ET.SubElement(root, "MakeList")
-        for m in makes:
+        for make in makes_used:
+            if make not in self.makes:
+                continue
             e = ET.SubElement(make_list, "MakeListEntry")
-            ET.SubElement(e, "Make").text = m["Make"]
-            ET.SubElement(e, "MakeCode").text = m["Marke_HBV"]
+            ET.SubElement(e, "Make").text = make
+            ET.SubElement(e, "MakeCode").text = self.makes[make]
 
         bm_code_list = ET.SubElement(root, "BmCodeList")
-        for s in sites:
+        for s in sites_used:
+            make, site = s.split("-")
+            if s not in self.sites:
+                continue
             e = ET.SubElement(bm_code_list, "BmCodeEntry")
-            ET.SubElement(e, "Make").text = s["Make"]
-            ET.SubElement(e, "Site").text = s["Site"]
-            ET.SubElement(e, "BmCode").text = s["Standort_HBV"]
+            ET.SubElement(e, "Make").text = make
+            ET.SubElement(e, "Site").text = site
+            ET.SubElement(e, "BmCode").text = self.sites[s]
 
         record_list = ET.SubElement(root, "RecordList")
         for row in records:
@@ -465,7 +510,7 @@ class GCHR:
                     field = "{:.0f}".format(field * 100)
                 child.text = str(field)
 
-        with open(self.export_filename, "w", encoding="utf-8") as fwh:
+        with open(filename, "w", encoding="utf-8") as fwh:
             fwh.write(minidom.parseString(ET.tostring(root)).toprettyxml(indent="  "))
 
     def convert_to_row(self, node):
@@ -482,11 +527,11 @@ class GCHR:
         return True
 
     def convert_csv_to_xml(self, csvfile, xmlfile):
-        makes = [{"Make": "01", "Marke_HBV": "1844"}]
-        sites = [{"Make": "01", "Site": "01", "Marke_HBV": "1844"}]
+        self.makes = {"01": "1844"}
+        self.sites = {"01-01": "1844"}
         with open(csvfile, "r", encoding="latin-1") as frh:
             csv_reader = csv.DictReader(frh, delimiter=";")
-            self.export_xml(csv_reader, self.bookkeep_filter(), 1, makes, sites, sites[0]["Standort_HBV"], xmlfile)
+            self.export_xml(csv_reader, self.bookkeep_filter(), 1, list(self.sites.values())[0], xmlfile)
 
 
 def gchr_local():