beancount-cde-importer/beancount_cde_importer/__init__.py

117 lines
3.8 KiB
Python
Raw Normal View History

2022-02-03 00:05:59 +01:00
import csv
2022-02-05 19:39:01 +01:00
import re
from datetime import datetime, date
2022-02-03 00:05:59 +01:00
from itertools import islice
2022-02-05 19:39:01 +01:00
from typing import Any, Optional
2022-02-03 00:05:59 +01:00
2022-02-05 19:39:01 +01:00
from beancount.core import data # type: ignore
from beancount.core.amount import Amount # type: ignore
from beancount.core.number import Decimal # type: ignore
2022-02-03 00:05:59 +01:00
from beancount.ingest import cache, importer # type: ignore
2022-10-28 16:38:33 +02:00
COL_DATE = "Date de comptabilisation"
2022-10-29 16:35:17 +02:00
COL_LABEL = "Libelle simplifie"
2022-10-28 16:38:33 +02:00
COL_DEBIT = "Debit"
COL_CREDIT = "Credit"
COL_DETAIL = "Informations complementaires"
2022-02-03 00:05:59 +01:00
2022-02-05 19:39:01 +01:00
END_DATE_REGEX = "Date de fin de téléchargement : ([0-3][0-9]/[0-1][0-9]/[0-9]{4})"
START_DATE_REGEX = "Date de début de téléchargement : ([0-3][0-9]/[0-1][0-9]/[0-9]{4})"
2022-10-28 16:38:33 +02:00
EXPECTED_HEADER = "Date de comptabilisation;Libelle simplifie;Libelle operation;Reference;Informations complementaires;Type operation;Categorie;Sous categorie;Debit;Credit;Date operation;Date de valeur;Pointage operation"
2022-02-03 00:05:59 +01:00
2022-10-28 16:38:33 +02:00
def is_valid_header(header: str) -> bool:
return header == EXPECTED_HEADER
2022-02-05 19:39:01 +01:00
2022-02-05 20:09:12 +01:00
class CDEImporter(importer.ImporterProtocol):
2022-02-05 19:39:01 +01:00
def __init__(self, account: str):
self.account = account
2022-02-03 00:05:59 +01:00
def identify(self, file: cache._FileMemo) -> bool:
try:
# NOTE: beancount.ingest.cache._FileMemo handles automatic encoding
# detection
lines: list[str] = file.head().splitlines()
2022-10-28 16:38:33 +02:00
header: str = lines[0]
2022-02-03 00:05:59 +01:00
return is_valid_header(header)
2022-02-01 23:17:34 +01:00
2022-02-03 00:05:59 +01:00
except:
return False
2022-02-05 19:39:01 +01:00
def file_account(self, file: cache._FileMemo) -> Optional[str]:
return self.account
def file_name(self, file: cache._FileMemo) -> Optional[str]:
return "CaisseEpargne_Statement.csv"
def file_date(self, file: cache._FileMemo) -> Optional[date]:
lines: list[str] = file.contents().splitlines()
2022-10-28 16:38:33 +02:00
csv_reader: csv.DictReader = csv.DictReader(
2022-02-05 19:39:01 +01:00
lines, delimiter=";", strict=True, quoting=csv.QUOTE_NONE
)
2022-10-28 16:38:33 +02:00
row: Optional[dict[str, str]] = next(csv_reader)
2022-02-05 19:39:01 +01:00
if row is None:
2022-10-28 16:38:33 +02:00
return None
2022-02-05 19:39:01 +01:00
2022-10-28 16:38:33 +02:00
return datetime.strptime(
row[COL_DATE], "%d/%m/%Y"
).date()
2022-02-05 19:39:01 +01:00
2022-10-28 16:38:33 +02:00
def extract(self, file: cache._FileMemo, existing_entries=None) -> list[Any]:
directives: list[Any] = []
lines: list[str] = file.contents().splitlines()
csv_reader = csv.DictReader(
lines, delimiter=";", strict=True, quoting=csv.QUOTE_NONE
)
2022-02-05 19:39:01 +01:00
for index, row in enumerate(csv_reader):
2022-10-28 16:38:33 +02:00
lineno: int = index + 2 # entries start at line 2
2022-02-05 19:39:01 +01:00
meta = data.new_metadata(file.name, lineno)
2022-02-05 19:50:59 +01:00
2022-02-05 19:39:01 +01:00
transaction_date: date = datetime.strptime(
2022-10-28 16:38:33 +02:00
row[COL_DATE], "%d/%m/%Y"
2022-02-05 19:39:01 +01:00
).date()
2022-10-28 16:38:33 +02:00
label: str = row[COL_LABEL]
debit: str = row[COL_DEBIT]
credit: str = row[COL_CREDIT]
detail: str = row[COL_DETAIL] if row[COL_DETAIL] else ""
2022-02-05 19:39:01 +01:00
postings: list[data.Posting] = []
if debit:
amount = Decimal(debit.replace(",", "."))
postings.append(
data.Posting(
self.account, Amount(amount, "EUR"), None, None, None, None
)
)
if credit:
amount = Decimal(credit.replace(",", "."))
postings.append(
data.Posting(
self.account, Amount(amount, "EUR"), None, None, None, None
)
)
directives.append(
data.Transaction(
meta,
transaction_date,
self.FLAG,
label,
2022-10-28 16:38:33 +02:00
detail,
2022-02-05 19:39:01 +01:00
data.EMPTY_SET,
data.EMPTY_SET,
postings,
)
)
return directives