2 years ago · 05e25f3df6
--- a/cognos7/iqd_convert.py
+++ b/cognos7/iqd_convert.py
@@ -1,102 +1,132 @@
 
				 from pathlib import Path
			
 
				 import re
			
 
				 import plac
			
 
				+from collections import defaultdict
			
 
				 
			
 
				 
			
 
				 class IqdConverter:
			
 
				-    commands = ['convert']
			
 
				-    target_schema_by_type = {
			
 
				-        'csv': 'staging',
			
 
				-        'ims': 'ims'
			
 
				-    }
			
 
				-    output_dir = 'C:\\GlobalCube_LOCOSOFT\\System\\LOCOSOFT\\SQL\\schema\\LOCOSOFT\\views'
			
 
				-
			
 
				-    def convert(self, iqd_file, target_type='csv'):
			
 
				+    commands = ["convert"]
			
 
				+    target_schema_by_type = {"csv": "staging", "ims": "ims"}
			
 
				+    output_dir = "C:\\GlobalCube\\System\\LOCOSOFT\\SQL\\schema\\LOCOSOFT\\views_imr"
			
 
				+
			
 
				+    def convert(self, iqd_file, target_type="csv"):
			
 
				+        if not Path(iqd_file).exists():
			
 
				+            print(f"File {iqd_file} does not exist!")
			
 
				+            return
			
 
				         query = self.get_query_from_iqdfile(iqd_file)
			
 
				         query = self.cleanup_query(query)
			
 
				 
			
 
				-        output_file = iqd_file[:-4] + '.sql'
			
 
				-        with open(output_file, 'w', encoding='latin-1') as fwh:
			
 
				+        output_file = iqd_file[:-4] + ".sql"
			
 
				+        with open(output_file, "w", encoding="latin-1") as fwh:
			
 
				             fwh.write(query)
			
 
				 
			
 
				-        table = Path(iqd_file).stem
			
 
				+        table = Path(iqd_file).stem.lower()
			
 
				+        if target_type == "csv":
			
 
				+            table += "_imr"
			
 
				         schema = self.target_schema_by_type[target_type]
			
 
				 
			
 
				-        create_view = "SET QUOTED_IDENTIFIER ON \nGO\nSET ANSI_NULLS ON \nGO\n" + \
			
 
				-            f"CREATE VIEW {schema}.{table} AS\n\n{query}\n" + \
			
 
				-            "GO\nSET QUOTED_IDENTIFIER OFF \nGO\nSET ANSI_NULLS OFF \nGO\n\nGO"
			
 
				+        create_view = (
			
 
				+            "SET QUOTED_IDENTIFIER ON \nGO\nSET ANSI_NULLS ON \nGO\n"
			
 
				+            + f"CREATE VIEW {schema}.{table} AS\n\n{query}\n"
			
 
				+            + "GO\nSET QUOTED_IDENTIFIER OFF \nGO\nSET ANSI_NULLS OFF \nGO\n\nGO"
			
 
				+        )
			
 
				 
			
 
				         view_file = f"{self.output_dir}\\{schema}.{table}.sql"
			
 
				-        with open(view_file, 'w', encoding='latin-1') as fwh:
			
 
				+        with open(view_file, "w", encoding="latin-1") as fwh:
			
 
				             fwh.write(create_view)
			
 
				 
			
 
				     def get_query_from_iqdfile(self, iqd_file):
			
 
				-        if iqd_file[-4:].lower() == '.imr':
			
 
				-            iqd_file = iqd_file[:-4] + '.iqd'
			
 
				-        iqd = open(iqd_file, 'r', encoding='latin-1').read()
			
 
				-        res = re.findall(r'BEGIN SQL\n(.*)\n\nEND SQL', iqd, re.MULTILINE | re.DOTALL)
			
 
				-        query = ''
			
 
				+        if iqd_file[-4:].lower() == ".imr":
			
 
				+            iqd_file = iqd_file[:-4] + ".iqd"
			
 
				+        iqd = open(iqd_file, "r", encoding="latin-1").read()
			
 
				+        res = re.findall(r"BEGIN SQL\n(.*)\n\nEND SQL", iqd, re.MULTILINE | re.DOTALL)
			
 
				+        query = ""
			
 
				         if len(res) > 0:
			
 
				             query = res[0]
			
 
				 
			
 
				-        columns = re.findall(r'COLUMN,\d+,(.*)', iqd)
			
 
				-        col_keys = [f'c{i}' for i in range(len(columns), 0, -1)]
			
 
				+        columns = re.findall(r"COLUMN,\d+,(.*)", iqd)
			
 
				+        col_keys = [f"c{i}" for i in range(len(columns), 0, -1)]
			
 
				         col_names = [f'"{c}"' for c in reversed(columns)]
			
 
				 
			
 
				+        used_cols = defaultdict(int)
			
 
				         for col_key, col_name in zip(col_keys, col_names):
			
 
				-            query = re.sub(col_key + r'([^\d])', col_name + r'\1', query)
			
 
				+            used_cols[col_name] += 1
			
 
				+            if used_cols[col_name] > 1:
			
 
				+                col_name = col_name[:-1] + "_" + str(used_cols[col_name]) + '"'
			
 
				+            query = re.sub(col_key + r"([^\d])", col_name + r"\1", query)
			
 
				 
			
 
				         columns2 = re.findall(r'\s+(c\d+) as (".*")', query)
			
 
				+        used_cols = defaultdict(int)
			
 
				         for col in columns2:
			
 
				-            query = re.sub(col[0] + r' as ' + col[1], col[1], query)
			
 
				-            query = re.sub(col[0] + r'([^\d])', col[1] + r'\1', query)
			
 
				+            used_cols[col[1]] += 1
			
 
				+            col_name = col[1]
			
 
				+            if used_cols[col[1]] > 1:
			
 
				+                col_name = col[1][:-1] + "_" + str(used_cols[col[1]]) + '"'
			
 
				+            query = re.sub(col[0] + r" as " + col[1], col_name, query)
			
 
				+            query = re.sub(col[0] + r"([^\d])", col_name + r"\1", query)
			
 
				         return query
			
 
				 
			
 
				     def cleanup_query(self, query):
			
 
				-        query = re.sub(r' from (\d+) for (\d+)\)', r', \1, \2)', query)
			
 
				-        query = query.replace(' || ', ' + ')
			
 
				-        query = query.replace('truncate(', 'rtrim(')
			
 
				-        query = query.replace('char_length(', 'len(')
			
 
				-        query = query.replace('ascii(', 'convert(varchar(50), ')
			
 
				-        query = query.replace('extract(DAY FROM ', 'day(')
			
 
				-        query = query.replace('extract(MONTH FROM ', 'month(')
			
 
				-        query = query.replace('extract(YEAR FROM ', 'year(')
			
 
				-        query = query.replace('cdatetime(', 'convert(datetime, ')
			
 
				-        query = query.replace('cast_float(', 'convert(float, ')
			
 
				-        query = query.replace('sy_right(', 'right(')
			
 
				-        query = query.replace('od_left(', 'left(')
			
 
				-        query = query.replace('length(', 'len(')
			
 
				-        query = query.replace('{hour}', 'hour')
			
 
				-        query = query.replace('{minute}', 'minute')
			
 
				-        query = query.replace('{weekday}', 'weekday')
			
 
				-        query = query.replace('@CURRENT_DATE', 'getdate()')
			
 
				-        query = query.replace('cdate(', '(')
			
 
				-
			
 
				-        query = re.sub(r'intdiv\(([^\)]+)\,1\)', r'\1', query)
			
 
				-        query = re.sub(r'XCOUNT\(([^\)]+) for ', r'COUNT(\1) OVER (partition by ', query)
			
 
				-        query = re.sub(r'XSUM\(([^\)]+) for ', r'SUM(\1) OVER (partition by ', query)
			
 
				-        query = re.sub(r'XMIN\(([^\)]+) for ', r'MIN(\1) OVER (partition by ', query)
			
 
				-        query = re.sub(r'XMAX\(([^\)]+) for ', r'MAX(\1) OVER (partition by ', query)
			
 
				-
			
 
				-        query = re.sub(r'QSS\.\"[^\"]+\\([^\\]+)\.ims\"', r'"ims"."\1"', query)
			
 
				+        query = re.sub(r" from (\d+) for (\d+)\)", r", \1, \2)", query)
			
 
				+        query = query.replace(" || ", " + ")
			
 
				+        query = query.replace("truncate(", "rtrim(")
			
 
				+        query = query.replace("char_length(", "len(")
			
 
				+        query = query.replace("database(", "db_name(")
			
 
				+        query = query.replace("ascii(", "convert(varchar(50), ")
			
 
				+        query = query.replace("extract(DAY FROM ", "day(")
			
 
				+        query = query.replace("extract(MONTH FROM ", "month(")
			
 
				+        query = query.replace("extract(YEAR FROM ", "year(")
			
 
				+        query = query.replace("od_year(", "year(")
			
 
				+        query = query.replace("od_month(", "month(")
			
 
				+        query = query.replace("lastday(", "eomonth(")
			
 
				+        query = query.replace("cdatetime(", "convert(datetime, ")
			
 
				+        query = query.replace("cast_float(", "convert(float, ")
			
 
				+        query = query.replace("sy_right(", "right(")
			
 
				+        query = query.replace("od_left(", "left(")
			
 
				+        query = query.replace("od_right(", "right(")
			
 
				+        query = query.replace("length(", "len(")
			
 
				+        query = query.replace("{hour}", "hour")
			
 
				+        query = query.replace("{minute}", "minute")
			
 
				+        query = query.replace("{weekday}", "weekday")
			
 
				+        query = query.replace("dayofweek(", "datepart(weekday, ")
			
 
				+        query = query.replace("cast_numberToString(cast_integer(", "((")
			
 
				+        query = query.replace("@CURRENT_DATE", "getdate()")
			
 
				+        query = query.replace("now()", "getdate()")
			
 
				+        query = query.replace("INTERVAL '001 10:00:00.000'", "1")
			
 
				+        query = query.replace("INTERVAL '001 00:00:00.000'", "1")
			
 
				+
			
 
				+        query = query.replace("cdate(", "(")
			
 
				+
			
 
				+        query = re.sub(r"intdiv\(([^\)]+)\,1\)", r"\1", query)
			
 
				+        query = re.sub(r"XCOUNT\(([^\)]+) for ", r"COUNT(\1) OVER (partition by ", query)
			
 
				+        query = re.sub(r"XSUM\(([^\)]+) for ", r"SUM(\1) OVER (partition by ", query)
			
 
				+        query = re.sub(r"RSUM\(([^\)]+) for ", r"SUM(\1) OVER (partition by ", query)
			
 
				+        query = re.sub(r"XMIN\(([^\)]+) for ", r"MIN(\1) OVER (partition by ", query)
			
 
				+        query = re.sub(r"XMAX\(([^\)]+) for ", r"MAX(\1) OVER (partition by ", query)
			
 
				+        query = re.sub(r"XRANK\(([^\)]+) for ([^\)]+)", r"RANK() OVER (partition by \2 order by \1", query)
			
 
				+
			
 
				+        query = re.sub(r"QSS\.\"[^\"]+\\([^\\]+)\.ims\"", r'"ims"."\1"', query)
			
 
				         query = re.sub(r"DATE '([\d-]+)'", r"convert(date, '\1')", query)
			
 
				         query = re.sub(r"TIMESTAMP '([\d\s\-\:\.]+)'", r"convert(datetime, '\1')", query)
			
 
				 
			
 
				-        query = re.sub(r'asciiz\(([^\,]*)\,\d+\)', r'convert(varchar(50), \1)', query)
			
 
				-        query = re.sub(r'asciiz\(([^\+]*)\)', r'convert(varchar(50), \1)', query)
			
 
				-        query = re.sub(r'convert\(varchar\(50\)\, ([^,]+)\,\d+\)', r'convert(varchar(50), \1)', query)
			
 
				-        query = query.replace('cdate((convert(float, ', 'convert(datetime, ((')
			
 
				+        query = re.sub(r"asciiz\(([^\,]*)\,\d+\)", r"convert(varchar(50), \1)", query)
			
 
				+        query = re.sub(r"asciiz\(([^\+]*)\)", r"convert(varchar(50), \1)", query)
			
 
				+        # query = re.sub(r'day\(([^\-\<\>\=]*) \- ([^\-\<\>\=]*)\)\) as', r'datediff(day, \2, \1)) as', query)
			
 
				+        # query = re.sub(r'day\(([^\-\<\>\=]*) \- ([^\-\<\>\=]*)\)\)\)', r'datediff(day, \2, \1)))', query)
			
 
				+        query = re.sub(r"day\(([^\-\,\']*) \- ", r"-1 * datediff(day, \1, ", query)
			
 
				+        query = re.sub(r"convert\(varchar\(50\)\, ([^,]+)\,\d+\)", r"convert(varchar(50), \1)", query)
			
 
				+        query = query.replace("cdate((convert(float, ", "convert(datetime, ((")
			
 
				 
			
 
				-        query = re.sub(r'(order by .*)', r'-- \1', query)
			
 
				+        query = re.sub(r"[^ ](order by .*)", r"\n-- \1", query)
			
 
				         return query
			
 
				 
			
 
				     def run_folder(self, base_dir: str):
			
 
				-        files = sorted([(f.stat().st_mtime, f) for f in Path(base_dir).rglob('*.iqd')])
			
 
				+        files = sorted([(f.stat().st_mtime, f) for f in Path(base_dir).rglob("*.iqd")])
			
 
				         for timestamp, iqd in files:
			
 
				             self.convert(str(iqd))
			
 
				 
			
 
				 
			
 
				-if __name__ == '__main__':
			
 
				+if __name__ == "__main__":
			
 
				     # IqdConverter().convert('C:\\GlobalCube_LOCOSOFT\\System\\LOCOSOFT\\IQD\\Serv_Teile\\offene_Auftraege_Ums_ben_AW.iqd')
			
 
				     # IqdConverter().run_folder('C:\\Projekte\\DWH\\CARLO\\IQD')
			
 
				     plac.Interpreter.call(IqdConverter)