123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175 |
- import re
- from collections import defaultdict
- from pathlib import Path
- class IqdConverter:
- commands = ["convert", "run_folder"]
- target_schema_by_type = {"csv": "staging", "ims": "ims"}
- output_dir = "C:\\GlobalCube\\System\\OPTIMA\\SQL\\schema\\OPTIMA\\views_imr"
- def convert(self, iqd_file, target_type="csv", force=True):
- if not Path(iqd_file).exists():
- print(f"File {iqd_file} does not exist!")
- return
- output_file = iqd_file[:-4] + ".sql"
- if (
- not force
- and Path(output_file).exists()
- and Path(output_file).stat().st_mtime >= Path(iqd_file).stat().st_mtime
- ):
- # no update needed
- return
- query = self.get_query_from_iqdfile(iqd_file)
- query = self.cleanup_query(query)
- with open(output_file, "w", encoding="latin-1") as fwh:
- fwh.write(query)
- create_view, view_file = self.get_create_view(iqd_file, target_type, query)
- with open(view_file, "w", encoding="latin-1") as fwh:
- fwh.write(create_view)
- # create_view_90 = self.get_create_view_90(query)
- # with open(view_file.replace("_imr.sql", "_90.sql"), "w", encoding="latin-1") as fwh:
- # fwh.write(create_view_90)
- def get_create_view(self, iqd_file, target_type, query):
- table = Path(iqd_file).stem.lower()
- if target_type == "csv":
- table += "_imr"
- schema = self.target_schema_by_type[target_type]
- create_view = (
- "SET QUOTED_IDENTIFIER ON \nGO\nSET ANSI_NULLS ON \nGO\n"
- + f"CREATE VIEW {schema}.{table} AS\n\n{query}\n"
- + "GO\nSET QUOTED_IDENTIFIER OFF \nGO\nSET ANSI_NULLS OFF \nGO\n\nGO"
- )
- view_file = f"{self.output_dir}\\{schema}.{table}.sql"
- return create_view, view_file
- def get_create_view_90(self, query):
- match = re.findall(r"(\"([^\"]+)\"\.)?\"([^\"]+)\"\ (T\d+)", query)
- tables = dict([(t[3], t[2]) for t in match])
- query_split = query.split("select")[-1]
- query_from_ori = re.search(r"from ([^\s].*)where", query_split, re.DOTALL)
- if not query_from_ori:
- return ""
- query_from = query_from_ori.group(1).replace("(", "").replace(")", "")
- query_where_ori = re.search(r"where (.*)", query_split, re.DOTALL).group(1)
- query_where = re.sub(r"-- order by.*", "", query_where_ori)
- # query_where = query_where_ori.replace("(", "").replace(")", "")
- match = re.findall(r"T\d+\.\"[^\"]+\"", query)
- columns = list(sorted(list(set(match))))
- cols_alias = []
- for col in columns:
- t_name, col_name = re.search(r"(T\d+)\.\"([^\"]+)\"", col).group(1, 2)
- if True or col_name in cols_alias:
- table_name = tables.get(t_name, t_name)
- cols_alias.append(f"{col_name}__{table_name}")
- else:
- cols_alias.append(col_name)
- columns_combined = [f'{c} AS "{a}"' for c, a in zip(columns, cols_alias)]
- return "SELECT " + ", ".join(columns_combined) + " FROM " + query_from + " WHERE " + query_where
- def get_query_from_iqdfile(self, iqd_file):
- if iqd_file[-4:].lower() == ".imr":
- iqd_file = iqd_file[:-4] + ".iqd"
- iqd = open(iqd_file, "r", encoding="latin-1").read()
- res = re.findall(r"BEGIN SQL\n(.*)\n\nEND SQL", iqd, re.MULTILINE | re.DOTALL)
- query = ""
- if len(res) > 0:
- query = res[0]
- columns = re.findall(r"COLUMN,\d+,(.*)", iqd)
- col_keys = [f"c{i}" for i in range(len(columns), 0, -1)]
- col_names = [f'"{c}"' for c in reversed(columns)]
- used_cols = defaultdict(int)
- for col_key, col_name in zip(col_keys, col_names):
- used_cols[col_name] += 1
- if used_cols[col_name] > 1:
- col_name = col_name[:-1] + "_" + str(used_cols[col_name]) + '"'
- query = re.sub(col_key + r"([^\d])", col_name + r"\1", query)
- columns2 = re.findall(r'\s+(c\d+) as (".*")', query)
- used_cols = defaultdict(int)
- for col in columns2:
- used_cols[col[1]] += 1
- col_name = col[1]
- if used_cols[col[1]] > 1:
- col_name = col[1][:-1] + "_" + str(used_cols[col[1]]) + '"'
- query = re.sub(col[0] + r" as " + col[1], col_name, query)
- query = re.sub(col[0] + r"([^\d])", col_name + r"\1", query)
- return query
- def cleanup_query(self, query):
- query = re.sub(r" from (\d+) for (\d+)\)", r", \1, \2)", query)
- query = query.replace(" || ", " + ")
- query = query.replace("truncate(", "rtrim(")
- query = query.replace("char_length(", "len(")
- query = query.replace("database(", "db_name(")
- query = query.replace("ascii(", "convert(varchar(50), ")
- query = query.replace("extract(DAY FROM ", "day(")
- query = query.replace("extract(MONTH FROM ", "month(")
- query = query.replace("extract(YEAR FROM ", "year(")
- query = query.replace("od_year(", "year(")
- query = query.replace("od_month(", "month(")
- query = query.replace("lastday(", "eomonth(")
- query = query.replace("cdatetime(", "convert(datetime, ")
- query = query.replace("cast_float(", "convert(float, ")
- query = query.replace("sy_right(", "right(")
- query = query.replace("od_left(", "left(")
- query = query.replace("od_right(", "right(")
- query = query.replace("length(", "len(")
- query = query.replace("{hour}", "hour")
- query = query.replace("{minute}", "minute")
- query = query.replace("{weekday}", "weekday")
- query = query.replace("dayofweek(", "datepart(weekday, ")
- query = query.replace("cast_numberToString(cast_integer(", "((")
- query = query.replace("@CURRENT_DATE", "getdate()")
- query = query.replace("now()", "getdate()")
- query = query.replace("INTERVAL '001 10:00:00.000'", "1")
- query = query.replace("INTERVAL '001 00:00:00.000'", "1")
- query = query.replace("cdate(", "(")
- query = re.sub(r"intdiv\(([^\)]+)\,1\)", r"\1", query)
- query = re.sub(r"XCOUNT\(([^\)]+) for ", r"COUNT(\1) OVER (partition by ", query)
- query = re.sub(r"XSUM\(([^\)]+) for ", r"SUM(\1) OVER (partition by ", query)
- query = re.sub(r"RSUM\(([^\)]+) for ", r"SUM(\1) OVER (partition by ", query)
- query = re.sub(r"XMIN\(([^\)]+) for ", r"MIN(\1) OVER (partition by ", query)
- query = re.sub(r"XMAX\(([^\)]+) for ", r"MAX(\1) OVER (partition by ", query)
- query = re.sub(r"XRANK\(([^\)]+) for ([^\)]+)", r"RANK() OVER (partition by \2 order by \1", query)
- query = re.sub(r"QSS\.\"[^\"]+\\([^\\]+)\.ims\"", r'"ims"."\1"', query)
- query = re.sub(r"DATE '([\d-]+)'", r"convert(date, '\1')", query)
- query = re.sub(r"TIMESTAMP '([\d\s\-\:\.]+)'", r"convert(datetime, '\1')", query)
- query = re.sub(r"asciiz\(([^\,]*)\,\d+\)", r"convert(varchar(50), \1)", query)
- query = re.sub(r"asciiz\(([^\+]*)\)", r"convert(varchar(50), \1)", query)
- # query = re.sub(r'day\(([^\-\<\>\=]*) \- ([^\-\<\>\=]*)\)\) as', r'datediff(day, \2, \1)) as', query)
- # query = re.sub(r'day\(([^\-\<\>\=]*) \- ([^\-\<\>\=]*)\)\)\)', r'datediff(day, \2, \1)))', query)
- query = re.sub(r"day\(([^\-\,\']*) \- ", r"-1 * datediff(day, \1, ", query)
- query = re.sub(r"convert\(varchar\(50\)\, ([^,]+)\,\d+\)", r"convert(varchar(50), \1)", query)
- query = query.replace("cdate((convert(float, ", "convert(datetime, ((")
- query = re.sub(r"[^ ](order by .*)", r"\n-- \1", query)
- return query
- def run_folder(self, base_dir):
- files = sorted([(f.stat().st_mtime, f) for f in Path(base_dir).rglob("*.iqd")])
- for timestamp, iqd in files:
- self.convert(str(iqd))
- if __name__ == "__main__":
- iqdconv = IqdConverter()
- iqdconv.output_dir = "C:\\GlobalCube_LOCOSOFT\\System\\LOCOSOFT\\SQL\\schema\\LOCOSOFT\\views_imr"
- iqdconv.run_folder("C:\\GlobalCube_LOCOSOFT\\System\\LOCOSOFT\\IQD")
|