import csv import re from datetime import datetime, date from itertools import islice from typing import Any, Optional from beancount.core import data # type: ignore from beancount.core.amount import Amount # type: ignore from beancount.core.number import Decimal # type: ignore from beancount.ingest import cache, importer # type: ignore INDEX_DATE = 0 INDEX_TRANSACTION_NUMBER = 1 INDEX_LABEL = 2 INDEX_DEBIT = 3 INDEX_CREDIT = 4 INDEX_DETAIL = 5 END_DATE_REGEX = "Date de fin de téléchargement : ([0-3][0-9]/[0-1][0-9]/[0-9]{4})" START_DATE_REGEX = "Date de début de téléchargement : ([0-3][0-9]/[0-1][0-9]/[0-9]{4})" def is_valid_header(header: list[str]) -> bool: return ( header[INDEX_DATE] == "Date" and header[INDEX_TRANSACTION_NUMBER] == "Numéro d'opération" and header[INDEX_LABEL] == "Libellé" and header[INDEX_DEBIT] == "Débit" and header[INDEX_CREDIT] == "Crédit" and header[INDEX_DETAIL] == "Détail" ) def get_date(file: cache._FileMemo, regex: str) -> Optional[date]: match: Optional[re.Match] = re.search(regex, file.head()) if match is None: return None date_str: Optional[str] = match.group(1) if date_str is None: return None return datetime.strptime(date_str, "%d/%m/%Y").date() def get_end_date(file: cache._FileMemo) -> Optional[date]: return get_date(file, END_DATE_REGEX) def get_start_date(file: cache._FileMemo) -> Optional[date]: return get_date(file, START_DATE_REGEX) class CDEImporter(importer.ImporterProtocol): def __init__(self, account: str): self.account = account def identify(self, file: cache._FileMemo) -> bool: try: # NOTE: beancount.ingest.cache._FileMemo handles automatic encoding # detection lines: list[str] = file.head().splitlines() csv_reader = csv.reader( lines, delimiter=";", strict=True, quoting=csv.QUOTE_NONE ) # header is actually on the 5th line, the previous ones contain # miscellaneous information header: Optional[list[str]] = next(islice(csv_reader, 4, None)) if header is None: return False return is_valid_header(header) except: return False def file_account(self, file: cache._FileMemo) -> Optional[str]: return self.account def file_name(self, file: cache._FileMemo) -> Optional[str]: return "CaisseEpargne_Statement.csv" def file_date(self, file: cache._FileMemo) -> Optional[date]: return get_end_date(file) def extract(self, file: cache._FileMemo, existing_entries=None) -> list[Any]: directives: list[Any] = [] end_date: Optional[date] = get_end_date(file) start_date: Optional[date] = get_start_date(file) if end_date is None or start_date is None: return directives lines: list[str] = file.contents().splitlines() csv_reader = csv.reader( lines, delimiter=";", strict=True, quoting=csv.QUOTE_NONE ) # first 3 lines are useless for _ in range(3): next(csv_reader) # 4th line is usually the final balance row: Optional[list[str]] = next(csv_reader) if row is None: return directives if row[0] == "Solde en fin de période": meta = data.new_metadata(file.name, 4) balance = Decimal(row[4].replace(",", ".")) directives.append( data.Balance( meta=meta, date=end_date, account=self.account, amount=Amount(balance, "EUR"), tolerance=None, diff_amount=None, ) ) # skip headings next(csv_reader) for index, row in enumerate(csv_reader): lineno: int = index + 6 # entries start at line 6 meta = data.new_metadata(file.name, lineno) if row[0] == "Solde en début de période": balance = Decimal(row[4].replace(",", ".")) directives.append( data.Balance( meta=meta, date=start_date, account=self.account, amount=Amount(balance, "EUR"), tolerance=None, diff_amount=None, ) ) # should be the last line anyway continue transaction_date: date = datetime.strptime( row[INDEX_DATE], "%d/%m/%y" ).date() label: str = row[INDEX_LABEL] debit: str = row[INDEX_DEBIT] credit: str = row[INDEX_CREDIT] postings: list[data.Posting] = [] if debit: amount = Decimal(debit.replace(",", ".")) postings.append( data.Posting( self.account, Amount(amount, "EUR"), None, None, None, None ) ) if credit: amount = Decimal(credit.replace(",", ".")) postings.append( data.Posting( self.account, Amount(amount, "EUR"), None, None, None, None ) ) directives.append( data.Transaction( meta, transaction_date, self.FLAG, label, "", data.EMPTY_SET, data.EMPTY_SET, postings, ) ) return directives