update to new CdE CSV format

2022-10-28 16:38:33 +02:00 · 2022-10-28 16:38:33 +02:00 · 524a2e5787
commit 524a2e5787
parent c576c54ed3
1 changed files with 29 additions and 97 deletions
--- a/beancount_cde_importer/init.py
+++ b/beancount_cde_importer/init.py
@ -11,44 +11,20 @@ from beancount.core.number import Decimal  # type: ignore
 from beancount.ingest import cache, importer  # type: ignore


-INDEX_DATE = 0
-INDEX_TRANSACTION_NUMBER = 1
-INDEX_LABEL = 2
-INDEX_DEBIT = 3
-INDEX_CREDIT = 4
-INDEX_DETAIL = 5
+COL_DATE = "Date de comptabilisation"
+COL_LABEL = "Libelle operation"
+COL_DEBIT = "Debit"
+COL_CREDIT = "Credit"
+COL_DETAIL = "Informations complementaires"

 END_DATE_REGEX = "Date de fin de téléchargement : ([0-3][0-9]/[0-1][0-9]/[0-9]{4})"
 START_DATE_REGEX = "Date de début de téléchargement : ([0-3][0-9]/[0-1][0-9]/[0-9]{4})"

-
-def is_valid_header(header: list[str]) -> bool:
-    return (
-        header[INDEX_DATE] == "Date"
-        and header[INDEX_TRANSACTION_NUMBER] == "Numéro d'opération"
-        and header[INDEX_LABEL] == "Libellé"
-        and header[INDEX_DEBIT] == "Débit"
-        and header[INDEX_CREDIT] == "Crédit"
-        and header[INDEX_DETAIL] == "Détail"
-    )
+EXPECTED_HEADER = "Date de comptabilisation;Libelle simplifie;Libelle operation;Reference;Informations complementaires;Type operation;Categorie;Sous categorie;Debit;Credit;Date operation;Date de valeur;Pointage operation"


-def get_date(file: cache._FileMemo, regex: str) -> Optional[date]:
-    match: Optional[re.Match] = re.search(regex, file.head())
-    if match is None:
-        return None
-    date_str: Optional[str] = match.group(1)
-    if date_str is None:
-        return None
-    return datetime.strptime(date_str, "%d/%m/%Y").date()
-
-
-def get_end_date(file: cache._FileMemo) -> Optional[date]:
-    return get_date(file, END_DATE_REGEX)
-
-
-def get_start_date(file: cache._FileMemo) -> Optional[date]:
-    return get_date(file, START_DATE_REGEX)
+def is_valid_header(header: str) -> bool:
+    return header == EXPECTED_HEADER


 class CDEImporter(importer.ImporterProtocol):
@ -60,16 +36,7 @@ class CDEImporter(importer.ImporterProtocol):
            # NOTE: beancount.ingest.cache._FileMemo handles automatic encoding
            # detection
            lines: list[str] = file.head().splitlines()
-            csv_reader = csv.reader(
-                lines, delimiter=";", strict=True, quoting=csv.QUOTE_NONE
-            )
-
-            # header is actually on the 5th line, the previous ones contain
-            # miscellaneous information
-            header: Optional[list[str]] = next(islice(csv_reader, 4, None))
-
-            if header is None:
-                return False
+            header: str = lines[0]
            return is_valid_header(header)

        except:
@ -82,73 +49,38 @@ class CDEImporter(importer.ImporterProtocol):
        return "CaisseEpargne_Statement.csv"

    def file_date(self, file: cache._FileMemo) -> Optional[date]:
-        return get_end_date(file)
+        lines: list[str] = file.contents().splitlines()
+        csv_reader: csv.DictReader = csv.DictReader(
+            lines, delimiter=";", strict=True, quoting=csv.QUOTE_NONE
+        )
+
+        row: Optional[dict[str, str]] = next(csv_reader)
+        if row is None:
+            return None
+
+        return datetime.strptime(
+            row[COL_DATE], "%d/%m/%Y"
+        ).date()

    def extract(self, file: cache._FileMemo, existing_entries=None) -> list[Any]:
        directives: list[Any] = []

-        end_date: Optional[date] = get_end_date(file)
-        start_date: Optional[date] = get_start_date(file)
-        if end_date is None or start_date is None:
-            return directives
-
        lines: list[str] = file.contents().splitlines()
-        csv_reader = csv.reader(
+        csv_reader = csv.DictReader(
            lines, delimiter=";", strict=True, quoting=csv.QUOTE_NONE
        )

-        # first 3 lines are useless
-        for _ in range(3):
-            next(csv_reader)
-
-        # 4th line is usually the final balance
-        row: Optional[list[str]] = next(csv_reader)
-        if row is None:
-            return directives
-
-        if row[0] == "Solde en fin de période":
-            meta = data.new_metadata(file.name, 4)
-            balance = Decimal(row[4].replace(",", "."))
-            directives.append(
-                data.Balance(
-                    meta=meta,
-                    date=end_date,
-                    account=self.account,
-                    amount=Amount(balance, "EUR"),
-                    tolerance=None,
-                    diff_amount=None,
-                )
-            )
-
-        # skip headings
-        next(csv_reader)
-
        for index, row in enumerate(csv_reader):
-            lineno: int = index + 6  # entries start at line 6
+            lineno: int = index + 2  # entries start at line 2
            meta = data.new_metadata(file.name, lineno)

-            if row[0] == "Solde en début de période":
-                balance = Decimal(row[4].replace(",", "."))
-                directives.append(
-                    data.Balance(
-                        meta=meta,
-                        date=start_date,
-                        account=self.account,
-                        amount=Amount(balance, "EUR"),
-                        tolerance=None,
-                        diff_amount=None,
-                    )
-                )
-
-                # should be the last line anyway
-                continue
-
            transaction_date: date = datetime.strptime(
-                row[INDEX_DATE], "%d/%m/%y"
+                row[COL_DATE], "%d/%m/%Y"
            ).date()
-            label: str = row[INDEX_LABEL]
-            debit: str = row[INDEX_DEBIT]
-            credit: str = row[INDEX_CREDIT]
+            label: str = row[COL_LABEL]
+            debit: str = row[COL_DEBIT]
+            credit: str = row[COL_CREDIT]
+            detail: str = row[COL_DETAIL] if row[COL_DETAIL] else ""

            postings: list[data.Posting] = []

@ -174,7 +106,7 @@ class CDEImporter(importer.ImporterProtocol):
                    transaction_date,
                    self.FLAG,
                    label,
-                    "",
+                    detail,
                    data.EMPTY_SET,
                    data.EMPTY_SET,
                    postings,