Browse Source

Tests am Beispiel von csv_cleanup

gc-server3 2 months ago
parent
commit
2e6f8c6fed
4 changed files with 63 additions and 30 deletions
  1. 9 1
      .vscode/settings.json
  2. 0 0
      database/tests/__init__.py
  3. 17 29
      misc/csv_cleanup.py
  4. 37 0
      tests/test_csv_cleanup.py

+ 9 - 1
.vscode/settings.json

@@ -12,5 +12,13 @@
     "python.testing.unittestEnabled": true,
     "files.associations": {
         "*.mac": "vbs"
-    }
+    },
+    "python.testing.unittestArgs": [
+        "-v",
+        "-s",
+        "./tests",
+        "-p",
+        "test_*.py"
+    ],
+    "python.testing.autoTestDiscoverOnSaveEnabled": true    
 }

+ 0 - 0
database/tests/__init__.py


+ 17 - 29
misc/csv_cleanup.py

@@ -3,8 +3,6 @@ import re
 from datetime import datetime
 from pathlib import Path
 
-import plac
-
 MIN_AGE = datetime.now().timestamp() - 12 * 60 * 60
 
 
@@ -23,7 +21,7 @@ def csv_cleanup_file(csv_file: Path):
     if file_mtime < MIN_AGE:
         return
     print(csv_file.name)
-    with open(csv_file, "r", encoding="latin-1") as frh:
+    with open(csv_file, "r", encoding="latin-1", errors="ignore") as frh:
         with open(temp_file, "w", encoding="latin-1") as fwh:
             with open(error_file, "w", encoding="latin-1") as ewh:
                 header = frh.readline()
@@ -63,10 +61,15 @@ def csv_cleanup_file(csv_file: Path):
                         buffer_count = 0
                 if error_count + solved_count > 0:
                     print("")
-                if error_count > 0:
-                    print(f"!! Anzahl Fehler: {error_count} !!")
 
     os.utime(temp_file, (file_mtime, file_mtime))
+    if solved_count > 0:
+        print(f"Zeilenverschiebungen behoben: {solved_count}")
+
+    if error_count == 0:
+        error_file.unlink()
+    else:
+        print(f"!! Anzahl Fehler in '{csv_file.name}': {error_count} !!")
     csv_file.unlink()
     temp_file.rename(csv_file)
 
@@ -75,32 +78,17 @@ def cleanup_line(line):
     res = []
     buffer = ""
 
-    for col in line.strip("\r\n").split(";"):
+    line_iter = iter(line.strip("\r\n").split(";"))
+
+    for col in line_iter:
         if '"' not in col:
+            # no string
             res.append(col)
             continue
-        if col == '"':
-            # special case
-            if buffer == "":
-                buffer = col
-            else:
-                res.append(buffer + col)
-                buffer = ""
-            continue
-        if col[0] == '"' and col[-1] == '"':
-            res.append(col)
-            continue
-        if col[0] == '"':
-            buffer = col
-            continue
-        if col[-1] == '"':
-            res.append(buffer + col)
-            buffer = ""
-            continue
-
-    return ";".join(res) + "\n"
 
+        buffer = col
+        while buffer == '"' or buffer[-1] != '"':
+            buffer += next(line_iter)
+        res.append(buffer)
 
-if __name__ == "__main__":
-    plac.call(csv_cleanup)
-    # csv_cleanup("C:\\Users\\GAPS\\Desktop\\GuV_8_O21_csv.csv")
+    return ";".join(res) + "\n"

+ 37 - 0
tests/test_csv_cleanup.py

@@ -0,0 +1,37 @@
+import unittest
+
+from misc.csv_cleanup import cleanup_line
+
+
+class TestCsvCleanup(unittest.TestCase):
+
+    def test_cleanup_line_ident(self):
+        simple_int = "1;2;3\n"
+        simple_float = "1,20;2,322;3,434\n"
+        simple_string = '"Test";"Default";"Simple Test"\n'
+
+        self.assertEqual(cleanup_line(simple_int), simple_int)
+        self.assertEqual(cleanup_line(simple_float), simple_float)
+        self.assertEqual(cleanup_line(simple_string), simple_string)
+
+    def test_cleanup_line_semicolon(self):
+        string_1_in = '"String 1";"String;2";"Test 3"\n'
+        string_1_out = '"String 1";"String2";"Test 3"\n'
+
+        string_2_in = '"1;2;3"\n'
+        string_2_out = '"123"\n'
+
+        string_3_in = '"1";";";3\n'
+        string_3_out = '"1";"";3\n'
+
+        string_4_in = '";;;;;;;"\n'
+        string_4_out = '""\n'
+
+        string_5_in = '"";"";"";"";";;;"\n'
+        string_5_out = '"";"";"";"";""\n'
+
+        self.assertEqual(cleanup_line(string_1_in), string_1_out)
+        self.assertEqual(cleanup_line(string_2_in), string_2_out)
+        self.assertEqual(cleanup_line(string_3_in), string_3_out)
+        self.assertEqual(cleanup_line(string_4_in), string_4_out)
+        self.assertEqual(cleanup_line(string_5_in), string_5_out)