update to new CdE CSV format

2022-10-28 16:38:33 +02:00 · 2022-10-28 16:38:33 +02:00 · 524a2e5787
commit 524a2e5787
parent c576c54ed3
1 changed files with 29 additions and 97 deletions
--- a/beancount_cde_importer/init.py
+++ b/beancount_cde_importer/init.py
@ -11,44 +11,20 @@ from beancount.core.number import Decimal  # type: ignore
 from beancount.ingest import cache, importer  # type: ignore
-INDEX_DATE = 0
+COL_DATE = "Date de comptabilisation"
-INDEX_TRANSACTION_NUMBER = 1
+COL_LABEL = "Libelle operation"
-INDEX_LABEL = 2
+COL_DEBIT = "Debit"
-INDEX_DEBIT = 3
+COL_CREDIT = "Credit"
-INDEX_CREDIT = 4
+COL_DETAIL = "Informations complementaires"
 INDEX_DETAIL = 5
 END_DATE_REGEX = "Date de fin de téléchargement : ([0-3][0-9]/[0-1][0-9]/[0-9]{4})"
 START_DATE_REGEX = "Date de début de téléchargement : ([0-3][0-9]/[0-1][0-9]/[0-9]{4})"
-
+EXPECTED_HEADER = "Date de comptabilisation;Libelle simplifie;Libelle operation;Reference;Informations complementaires;Type operation;Categorie;Sous categorie;Debit;Credit;Date operation;Date de valeur;Pointage operation"
 def is_valid_header(header: list[str]) -> bool:
    return (
        header[INDEX_DATE] == "Date"
        and header[INDEX_TRANSACTION_NUMBER] == "Numéro d'opération"
        and header[INDEX_LABEL] == "Libellé"
        and header[INDEX_DEBIT] == "Débit"
        and header[INDEX_CREDIT] == "Crédit"
        and header[INDEX_DETAIL] == "Détail"
    )
-def get_date(file: cache._FileMemo, regex: str) -> Optional[date]:
+def is_valid_header(header: str) -> bool:
-    match: Optional[re.Match] = re.search(regex, file.head())
+    return header == EXPECTED_HEADER
    if match is None:
        return None
    date_str: Optional[str] = match.group(1)
    if date_str is None:
        return None
    return datetime.strptime(date_str, "%d/%m/%Y").date()
 def get_end_date(file: cache._FileMemo) -> Optional[date]:
    return get_date(file, END_DATE_REGEX)
 def get_start_date(file: cache._FileMemo) -> Optional[date]:
    return get_date(file, START_DATE_REGEX)
 class CDEImporter(importer.ImporterProtocol):
@ -60,16 +36,7 @@ class CDEImporter(importer.ImporterProtocol):
            # NOTE: beancount.ingest.cache._FileMemo handles automatic encoding
            # detection
            lines: list[str] = file.head().splitlines()
-            csv_reader = csv.reader(
+            header: str = lines[0]
                lines, delimiter=";", strict=True, quoting=csv.QUOTE_NONE
            )
            # header is actually on the 5th line, the previous ones contain
            # miscellaneous information
            header: Optional[list[str]] = next(islice(csv_reader, 4, None))
            if header is None:
                return False
            return is_valid_header(header)
        except:
@ -82,73 +49,38 @@ class CDEImporter(importer.ImporterProtocol):
        return "CaisseEpargne_Statement.csv"
    def file_date(self, file: cache._FileMemo) -> Optional[date]:
-        return get_end_date(file)
+        lines: list[str] = file.contents().splitlines()
        csv_reader: csv.DictReader = csv.DictReader(
            lines, delimiter=";", strict=True, quoting=csv.QUOTE_NONE
        )
        row: Optional[dict[str, str]] = next(csv_reader)
        if row is None:
            return None
        return datetime.strptime(
            row[COL_DATE], "%d/%m/%Y"
        ).date()
    def extract(self, file: cache._FileMemo, existing_entries=None) -> list[Any]:
        directives: list[Any] = []
        end_date: Optional[date] = get_end_date(file)
        start_date: Optional[date] = get_start_date(file)
        if end_date is None or start_date is None:
            return directives
        lines: list[str] = file.contents().splitlines()
-        csv_reader = csv.reader(
+        csv_reader = csv.DictReader(
            lines, delimiter=";", strict=True, quoting=csv.QUOTE_NONE
        )
        # first 3 lines are useless
        for _ in range(3):
            next(csv_reader)
        # 4th line is usually the final balance
        row: Optional[list[str]] = next(csv_reader)
        if row is None:
            return directives
        if row[0] == "Solde en fin de période":
            meta = data.new_metadata(file.name, 4)
            balance = Decimal(row[4].replace(",", "."))
            directives.append(
                data.Balance(
                    meta=meta,
                    date=end_date,
                    account=self.account,
                    amount=Amount(balance, "EUR"),
                    tolerance=None,
                    diff_amount=None,
                )
            )
        # skip headings
        next(csv_reader)
        for index, row in enumerate(csv_reader):
-            lineno: int = index + 6  # entries start at line 6
+            lineno: int = index + 2  # entries start at line 2
            meta = data.new_metadata(file.name, lineno)
            if row[0] == "Solde en début de période":
                balance = Decimal(row[4].replace(",", "."))
                directives.append(
                    data.Balance(
                        meta=meta,
                        date=start_date,
                        account=self.account,
                        amount=Amount(balance, "EUR"),
                        tolerance=None,
                        diff_amount=None,
                    )
                )
                # should be the last line anyway
                continue
            transaction_date: date = datetime.strptime(
-                row[INDEX_DATE], "%d/%m/%y"
+                row[COL_DATE], "%d/%m/%Y"
            ).date()
-            label: str = row[INDEX_LABEL]
+            label: str = row[COL_LABEL]
-            debit: str = row[INDEX_DEBIT]
+            debit: str = row[COL_DEBIT]
-            credit: str = row[INDEX_CREDIT]
+            credit: str = row[COL_CREDIT]
            detail: str = row[COL_DETAIL] if row[COL_DETAIL] else ""
            postings: list[data.Posting] = []
@ -174,7 +106,7 @@ class CDEImporter(importer.ImporterProtocol):
                    transaction_date,
                    self.FLAG,
                    label,
-                    "",
+                    detail,
                    data.EMPTY_SET,
                    data.EMPTY_SET,
                    postings,