beancount-cde-importer/beancount_cde_importer/__init__.py

import csv
import re

from datetime import datetime, date
from itertools import islice
from typing import Any, Optional

from beancount.core import data  # type: ignore
from beancount.core.amount import Amount  # type: ignore
from beancount.core.number import Decimal  # type: ignore
from beancount.ingest import cache, importer  # type: ignore


COL_DATE = "Date de comptabilisation"
COL_LABEL = "Libelle simplifie"
COL_DETAILED_LABEL = "Libelle operation"
COL_DEBIT = "Debit"
COL_CREDIT = "Credit"
COL_DETAIL = "Informations complementaires"
COL_DATE_OP = "Date operation"
COL_CATEGORY = "Categorie"
COL_SUBCATEGORY = "Sous categorie"

END_DATE_REGEX = "Date de fin de téléchargement : ([0-3][0-9]/[0-1][0-9]/[0-9]{4})"
START_DATE_REGEX = "Date de début de téléchargement : ([0-3][0-9]/[0-1][0-9]/[0-9]{4})"

EXPECTED_HEADER = "Date de comptabilisation;Libelle simplifie;Libelle operation;Reference;Informations complementaires;Type operation;Categorie;Sous categorie;Debit;Credit;Date operation;Date de valeur;Pointage operation"


def is_valid_header(header: str) -> bool:
    return header == EXPECTED_HEADER


class CDEImporter(importer.ImporterProtocol):
    def __init__(self, account: str):
        self.account = account

    def identify(self, file: cache._FileMemo) -> bool:
        try:
            # NOTE: beancount.ingest.cache._FileMemo handles automatic encoding
            # detection
            lines: list[str] = file.head().splitlines()
            header: str = lines[0]
            return is_valid_header(header)

        except:
            return False

    def file_account(self, file: cache._FileMemo) -> Optional[str]:
        return self.account

    def file_name(self, file: cache._FileMemo) -> Optional[str]:
        return "CaisseEpargne_Statement.csv"

    def file_date(self, file: cache._FileMemo) -> Optional[date]:
        lines: list[str] = file.contents().splitlines()
        csv_reader: csv.DictReader = csv.DictReader(
            lines, delimiter=";", strict=True, quoting=csv.QUOTE_NONE
        )

        row: Optional[dict[str, str]] = next(csv_reader)
        if row is None:
            return None

        return datetime.strptime(row[COL_DATE], "%d/%m/%Y").date()

    def extract(self, file: cache._FileMemo, existing_entries=None) -> list[Any]:
        directives: list[Any] = []

        lines: list[str] = file.contents().splitlines()
        csv_reader = csv.DictReader(
            lines, delimiter=";", strict=True, quoting=csv.QUOTE_NONE
        )

        for index, row in enumerate(csv_reader):
            lineno: int = index + 2  # entries start at line 2
            meta = data.new_metadata(file.name, lineno)

            transaction_date: date = datetime.strptime(row[COL_DATE], "%d/%m/%Y").date()
            op_date: date = datetime.strptime(row[COL_DATE_OP], "%d/%m/%Y").date()
            category: str = row[COL_CATEGORY]
            subcategory: str = row[COL_SUBCATEGORY]
            label: str = row[COL_LABEL]
            detailed_label: str = row[COL_DETAILED_LABEL]
            debit: str = row[COL_DEBIT]
            credit: str = row[COL_CREDIT]
            detail: str = row[COL_DETAIL] if row[COL_DETAIL] else ""

            postings: list[data.Posting] = []

            if debit:
                amount = Decimal(debit.replace(",", "."))
                postings.append(
                    data.Posting(
                        self.account,
                        Amount(amount, "EUR"),
                        None,
                        None,
                        None,
                        None,
                    )
                )

            if credit:
                amount = Decimal(credit.replace(",", "."))
                postings.append(
                    data.Posting(
                        self.account,
                        Amount(amount, "EUR"),
                        None,
                        None,
                        None,
                        None,
                    )
                )

            meta["op_date"] = op_date
            meta["ce_category"] = f"{category} - {subcategory}"
            meta["detailed_label"] = detailed_label

            directives.append(
                data.Transaction(
                    meta,
                    transaction_date,
                    self.FLAG,
                    label,
                    detail,
                    data.EMPTY_SET,
                    data.EMPTY_SET,
                    postings,
                )
            )

        return directives
implement identify 2022-02-03 00:05:59 +01:00			`import csv`
implement extraction 2022-02-05 19:39:01 +01:00			`import re`

			`from datetime import datetime, date`
implement identify 2022-02-03 00:05:59 +01:00			`from itertools import islice`
implement extraction 2022-02-05 19:39:01 +01:00			`from typing import Any, Optional`
implement identify 2022-02-03 00:05:59 +01:00
implement extraction 2022-02-05 19:39:01 +01:00			`from beancount.core import data # type: ignore`
			`from beancount.core.amount import Amount # type: ignore`
			`from beancount.core.number import Decimal # type: ignore`
implement identify 2022-02-03 00:05:59 +01:00			`from beancount.ingest import cache, importer # type: ignore`


update to new CdE CSV format 2022-10-28 16:38:33 +02:00			`COL_DATE = "Date de comptabilisation"`
use simplified label 2022-10-29 16:35:17 +02:00			`COL_LABEL = "Libelle simplifie"`
add CE detailed label as metadata 2022-11-10 19:34:47 +01:00			`COL_DETAILED_LABEL = "Libelle operation"`
update to new CdE CSV format 2022-10-28 16:38:33 +02:00			`COL_DEBIT = "Debit"`
			`COL_CREDIT = "Credit"`
			`COL_DETAIL = "Informations complementaires"`
add operation real date as metadata 2022-11-01 23:25:59 +01:00			`COL_DATE_OP = "Date operation"`
add CE category as metadata 2022-11-10 19:27:58 +01:00			`COL_CATEGORY = "Categorie"`
			`COL_SUBCATEGORY = "Sous categorie"`
implement identify 2022-02-03 00:05:59 +01:00
implement extraction 2022-02-05 19:39:01 +01:00			`END_DATE_REGEX = "Date de fin de téléchargement : ([0-3][0-9]/[0-1][0-9]/[0-9]{4})"`
			`START_DATE_REGEX = "Date de début de téléchargement : ([0-3][0-9]/[0-1][0-9]/[0-9]{4})"`

update to new CdE CSV format 2022-10-28 16:38:33 +02:00			`EXPECTED_HEADER = "Date de comptabilisation;Libelle simplifie;Libelle operation;Reference;Informations complementaires;Type operation;Categorie;Sous categorie;Debit;Credit;Date operation;Date de valeur;Pointage operation"`
implement identify 2022-02-03 00:05:59 +01:00

update to new CdE CSV format 2022-10-28 16:38:33 +02:00			`def is_valid_header(header: str) -> bool:`
			`return header == EXPECTED_HEADER`
implement extraction 2022-02-05 19:39:01 +01:00

shorter importer name 2022-02-05 20:09:12 +01:00			`class CDEImporter(importer.ImporterProtocol):`
implement extraction 2022-02-05 19:39:01 +01:00			`def __init__(self, account: str):`
			`self.account = account`

implement identify 2022-02-03 00:05:59 +01:00			`def identify(self, file: cache._FileMemo) -> bool:`
			`try:`
			`# NOTE: beancount.ingest.cache._FileMemo handles automatic encoding`
			`# detection`
			`lines: list[str] = file.head().splitlines()`
update to new CdE CSV format 2022-10-28 16:38:33 +02:00			`header: str = lines[0]`
implement identify 2022-02-03 00:05:59 +01:00			`return is_valid_header(header)`
setup package build 2022-02-01 23:17:34 +01:00
implement identify 2022-02-03 00:05:59 +01:00			`except:`
			`return False`
implement extraction 2022-02-05 19:39:01 +01:00
			`def file_account(self, file: cache._FileMemo) -> Optional[str]:`
			`return self.account`

			`def file_name(self, file: cache._FileMemo) -> Optional[str]:`
			`return "CaisseEpargne_Statement.csv"`

			`def file_date(self, file: cache._FileMemo) -> Optional[date]:`
			`lines: list[str] = file.contents().splitlines()`
update to new CdE CSV format 2022-10-28 16:38:33 +02:00			`csv_reader: csv.DictReader = csv.DictReader(`
implement extraction 2022-02-05 19:39:01 +01:00			`lines, delimiter=";", strict=True, quoting=csv.QUOTE_NONE`
			`)`

update to new CdE CSV format 2022-10-28 16:38:33 +02:00			`row: Optional[dict[str, str]] = next(csv_reader)`
implement extraction 2022-02-05 19:39:01 +01:00			`if row is None:`
update to new CdE CSV format 2022-10-28 16:38:33 +02:00			`return None`
implement extraction 2022-02-05 19:39:01 +01:00
add operation real date as metadata 2022-11-01 23:25:59 +01:00			`return datetime.strptime(row[COL_DATE], "%d/%m/%Y").date()`
implement extraction 2022-02-05 19:39:01 +01:00
update to new CdE CSV format 2022-10-28 16:38:33 +02:00			`def extract(self, file: cache._FileMemo, existing_entries=None) -> list[Any]:`
			`directives: list[Any] = []`

			`lines: list[str] = file.contents().splitlines()`
			`csv_reader = csv.DictReader(`
			`lines, delimiter=";", strict=True, quoting=csv.QUOTE_NONE`
			`)`
implement extraction 2022-02-05 19:39:01 +01:00
			`for index, row in enumerate(csv_reader):`
update to new CdE CSV format 2022-10-28 16:38:33 +02:00			`lineno: int = index + 2 # entries start at line 2`
implement extraction 2022-02-05 19:39:01 +01:00			`meta = data.new_metadata(file.name, lineno)`
handle start date balance 2022-02-05 19:50:59 +01:00
add operation real date as metadata 2022-11-01 23:25:59 +01:00			`transaction_date: date = datetime.strptime(row[COL_DATE], "%d/%m/%Y").date()`
			`op_date: date = datetime.strptime(row[COL_DATE_OP], "%d/%m/%Y").date()`
add CE category as metadata 2022-11-10 19:27:58 +01:00			`category: str = row[COL_CATEGORY]`
			`subcategory: str = row[COL_SUBCATEGORY]`
update to new CdE CSV format 2022-10-28 16:38:33 +02:00			`label: str = row[COL_LABEL]`
add CE detailed label as metadata 2022-11-10 19:34:47 +01:00			`detailed_label: str = row[COL_DETAILED_LABEL]`
update to new CdE CSV format 2022-10-28 16:38:33 +02:00			`debit: str = row[COL_DEBIT]`
			`credit: str = row[COL_CREDIT]`
			`detail: str = row[COL_DETAIL] if row[COL_DETAIL] else ""`
implement extraction 2022-02-05 19:39:01 +01:00
			`postings: list[data.Posting] = []`

			`if debit:`
			`amount = Decimal(debit.replace(",", "."))`
			`postings.append(`
			`data.Posting(`
add operation real date as metadata 2022-11-01 23:25:59 +01:00			`self.account,`
			`Amount(amount, "EUR"),`
			`None,`
			`None,`
			`None,`
			`None,`
implement extraction 2022-02-05 19:39:01 +01:00			`)`
			`)`

			`if credit:`
			`amount = Decimal(credit.replace(",", "."))`
			`postings.append(`
			`data.Posting(`
add operation real date as metadata 2022-11-01 23:25:59 +01:00			`self.account,`
			`Amount(amount, "EUR"),`
			`None,`
			`None,`
			`None,`
			`None,`
implement extraction 2022-02-05 19:39:01 +01:00			`)`
			`)`

add operation real date as metadata 2022-11-01 23:25:59 +01:00			`meta["op_date"] = op_date`
add CE category as metadata 2022-11-10 19:27:58 +01:00			`meta["ce_category"] = f"{category} - {subcategory}"`
add CE detailed label as metadata 2022-11-10 19:34:47 +01:00			`meta["detailed_label"] = detailed_label`
add operation real date as metadata 2022-11-01 23:25:59 +01:00
implement extraction 2022-02-05 19:39:01 +01:00			`directives.append(`
			`data.Transaction(`
			`meta,`
			`transaction_date,`
			`self.FLAG,`
			`label,`
update to new CdE CSV format 2022-10-28 16:38:33 +02:00			`detail,`
implement extraction 2022-02-05 19:39:01 +01:00			`data.EMPTY_SET,`
			`data.EMPTY_SET,`
			`postings,`
			`)`
			`)`

			`return directives`