import numpy as np import pandas as pd from datetime import datetime filter = 9 project_id = 10 first_seller_id = 1 submission_header = ["sub_field_id", "submission_id", "field_id", "form_id", "value"] fields_header = [ "field_id", "form_id", "page_no", "field_label", "field_type", "field_value", "field_order", "field_show_on_user_page", "is_field_primary", "field_is_editable", "field_options", "is_deletion_allowed", ] def multi_select(entry): if entry is np.nan: return "" entries = entry.split(":", 2)[-1][1:-1].split(";") return ",".join([e.split(":")[-1][1:-1] for e in entries if not e.startswith("i")]) submission_df = pd.read_csv( "sandbox/flohmarkt/qCBshrQfrm_submission_fields.csv", names=submission_header, index_col=["sub_field_id"], ) fields_df = pd.read_csv( "sandbox/flohmarkt/qCBshrQfrm_fields.csv", names=fields_header, index_col=["field_id"], ) # print(fields_df.head()) submission_df = submission_df[submission_df["form_id"] == filter] submission_df = pd.merge(submission_df, fields_df, how="left", on=["field_id"]) # print(submission_df.head()) submission_df = submission_df.pivot( index="submission_id", columns=["field_label"], values="value" ) # submission_df.columns = [108,109,110,111,113,114,115,117,118,120,121,122,123,125,126,128,129] submission_df["Artikel"] = submission_df["Artikel"].apply(multi_select) submission_df["Mithilfe"] = submission_df["Mithilfe"].apply(multi_select) submission_df["Freiwillige Mithilfe"] = submission_df["Freiwillige Mithilfe"].apply( multi_select ) submission_df["PayPal"] = submission_df["PayPal"].apply(multi_select) fields_df = fields_df[ (fields_df["form_id"] == filter) & (fields_df["field_label"] != "Divider") ] labels = fields_df["field_label"].to_dict().values() submission_df.drop_duplicates(inplace=True) submission_df[labels].to_csv(f"sandbox/flohmarkt/submission_{filter}.csv") submission_df[["E-Mail-Adresse", "Nachname", "Vorname"]].to_csv( f"sandbox/flohmarkt/newsletter_{filter}.csv", index=False, sep=";" ) # with open(f'sandbox/flohmarkt/emails_{filter}.txt', 'w') as fwh: # fwh.write(';'.join(emails)) submission_df.sort_values(by=["Nachname", "Vorname"], ignore_index=True, inplace=True) submission_df.reset_index(inplace=True) submission_df["project_id"] = project_id submission_df["seller_id"] = submission_df.index + first_seller_id submission_df["cdate"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") export_fields = [ "project_id", "seller_id", "Nachname", "Vorname", "Adresse", "PLZ", "Wohnort", "E-Mail-Adresse", "Telefon", "cdate", "cdate", ] submission_df[export_fields].to_csv( f"sandbox/flohmarkt/upload_{filter}.csv", header=False, index=False ) # project_id seller_id name first_name address zip_code place mail phone mdate cdate # a:8:{i:0;s:14:""Größen 50/56"";i:1;s:14:""Größen 62/68"";i:2;s:14:""Größen 74/80"";i:3;s:10:""Schlafsack""; # i:4;s:39:""Säuglingsbedarf (Flaschenwärmer etc.)"";i:5;s:9:""Spielzeug"";i:6;s:7:""Bücher"";i:7;s:15:""Winter-Fußsack"";}