diff --git a/ledger/camt/camt.go b/ledger/camt/camt.go new file mode 100644 index 0000000..fa26f6a --- /dev/null +++ b/ledger/camt/camt.go @@ -0,0 +1,91 @@ +package camt + +import ( + "encoding/xml" + "io" +) + +// XML structures for CAMT.053 format +type Document struct { + XMLName xml.Name `xml:"Document"` + BkToCstmrStmt BkToCstmrStmt `xml:"BkToCstmrStmt"` +} + +type BkToCstmrStmt struct { + Stmt Stmt `xml:"Stmt"` +} + +type Stmt struct { + Acct Acct `xml:"Acct"` + Ntry []Ntry `xml:"Ntry"` +} + +type Acct struct { + Id Id `xml:"Id"` + Ccy string `xml:"Ccy"` + Ownr Ownr `xml:"Ownr"` +} + +type Id struct { + IBAN string `xml:"IBAN"` +} + +type Ownr struct { + Nm string `xml:"Nm"` +} + +type Ntry struct { + Amt Amount `xml:"Amt"` + CdtDbtInd string `xml:"CdtDbtInd"` + BookgDt BookgDt `xml:"BookgDt"` + BkTxCd BkTxCd `xml:"BkTxCd"` + NtryRef string `xml:"NtryRef"` + AddtlNtryInf string `xml:"AddtlNtryInf"` + NtryDtls *NtryDtls `xml:"NtryDtls"` +} + +type Amount struct { + Value string `xml:",chardata"` + Ccy string `xml:"Ccy,attr"` +} + +type BookgDt struct { + DtTm string `xml:"DtTm"` +} + +type BkTxCd struct { + Prtry Prtry `xml:"Prtry"` +} + +type Prtry struct { + Cd string `xml:"Cd"` +} + +type NtryDtls struct { + TxDtls TxDtls `xml:"TxDtls"` +} + +type TxDtls struct { + RltdPties RltdPties `xml:"RltdPties"` +} + +type RltdPties struct { + Cdtr *Cdtr `xml:"Cdtr"` +} + +type Cdtr struct { + Pty Pty `xml:"Pty"` +} + +type Pty struct { + Nm string `xml:"Nm"` +} + +func ParseCamt(reader io.Reader) ([]Ntry, error) { + var doc Document + if err := xml.NewDecoder(reader).Decode(&doc); err != nil { + return nil, err + } + + return doc.BkToCstmrStmt.Stmt.Ntry, nil +} diff --git a/ledger/camt/camt_test.go b/ledger/camt/camt_test.go new file mode 100644 index 0000000..602fd98 --- /dev/null +++ b/ledger/camt/camt_test.go @@ -0,0 +1,22 @@ +package camt_test + +import ( + "bytes" + _ "embed" + "testing" + + "github.com/howeyc/ledger/ledger/camt" +) + +//go:embed sample.xml +var camtSample []byte + +func TestParseCamt(t *testing.T) { + entries, err := camt.ParseCamt(bytes.NewBuffer(camtSample)) + if err != nil { + t.Error(err) + } + if len(entries) != 2 { + t.Error("Expected 2 got ", len(entries)) + } +} diff --git a/ledger/camt/sample.xml b/ledger/camt/sample.xml new file mode 100644 index 0000000..11456d1 --- /dev/null +++ b/ledger/camt/sample.xml @@ -0,0 +1,155 @@ + + + + + 1111111-000000 + 2025-07-31T12:37:01.152446900Z + + + 1111111-000000-99999999 + 2025-07-31T12:37:01.152446900Z + + 2025-07-12T00:00:00+01:00 + 2025-07-14T00:00:00+01:00 + + + + BE00000000000 + + EUR + + Sample + + + ADDR + + EU-0000 + Fake + Happy lane + + + + + 0000000001234 + + COID + + + + + + + + Wise Europe SA + + + ADDR + + 1050 + Brussels + Rue du TrĂ´ne 100, 3rd floor + + + + + + + + CLBD + + + 67.71 + CRDT +
+ 2025-07-14T00:00:00+01:00 +
+
+ + + + OPBD + + + 306.61 + CRDT +
+ 2025-07-12T00:00:00+01:00 +
+
+ + + + Unrealised gains and losses + + + 2.26 + CRDT +
+ 2025-07-14T00:00:00+01:00 +
+
+ + + 2 + -38.90 + + 38.90 + DBIT + + + + 0 + 0 + + + 2 + -38.90 + + + + 3.90 + DBIT + + BOOK + + + 2025-07-13T05:32:45.916737+01:00 + + + + CARD-675 + + + Card transaction of EUR issued + + + 00001/2025 + 35.00 + DBIT + + BOOK + + + 2025-07-12T08:58:01.327701+01:00 + + + + TRANSFER-0000 + + + + + + + + LLC Company + + + + + + Sent money to LLC Company + +
+
+
diff --git a/ledger/cmd/import.go b/ledger/cmd/import.go index eadb1fb..52e50a6 100644 --- a/ledger/cmd/import.go +++ b/ledger/cmd/import.go @@ -2,6 +2,7 @@ package cmd import ( "encoding/csv" + "errors" "fmt" "math" "os" @@ -11,174 +12,287 @@ import ( "github.com/howeyc/ledger" "github.com/howeyc/ledger/decimal" + "github.com/howeyc/ledger/ledger/camt" "github.com/jbrukh/bayesian" "github.com/spf13/cobra" ) +var ( + ErrNoMatchingAccount = errors.New("Unable to find matching account.") +) + var csvDateFormat string var negateAmount bool var allowMatching bool var fieldDelimiter string var scaleFactor float64 -// importCmd represents the import command -var importCmd = &cobra.Command{ - Use: "import ", - Args: cobra.ExactArgs(2), - Short: "Import transactions from csv to ledger format", - Run: func(_ *cobra.Command, args []string) { - var accountSubstring, csvFileName string - accountSubstring = args[0] - csvFileName = args[1] +func trainClassifier(generalLedger []*ledger.Transaction, matchingAccount string) *bayesian.Classifier { + allAccounts := ledger.GetBalances(generalLedger, []string{}) + classes := make([]bayesian.Class, len(allAccounts)) + for i, bal := range allAccounts { + classes[i] = bayesian.Class(bal.Name) + } + classifier := bayesian.NewClassifier(classes...) + for _, tran := range generalLedger { + payeeWords := strings.Fields(tran.Payee) + // learn accounts names (except matchingAccount) for transactions where matchingAccount is present + learnName := false + for _, accChange := range tran.AccountChanges { + if accChange.Name == matchingAccount { + learnName = true + break + } + } + if learnName { + for _, accChange := range tran.AccountChanges { + if accChange.Name != matchingAccount { + classifier.Learn(payeeWords, bayesian.Class(accChange.Name)) + } + } + } + } - decScale := decimal.NewFromFloat(scaleFactor) + return classifier +} - csvFileReader, err := os.Open(csvFileName) - if err != nil { - fmt.Println("CSV: ", err) - return - } - defer csvFileReader.Close() +func predictAccount(classifier *bayesian.Classifier, inputPayeeWords []string) string { + // Classify into expense account - generalLedger, parseError := ledger.ParseLedgerFile(ledgerFilePath) - if parseError != nil { - fmt.Printf("%s:%s\n", ledgerFilePath, parseError.Error()) - return + // Find the highest and second highest scores + highScore1 := math.Inf(-1) + highScore2 := math.Inf(-1) + matchIdx := 0 + scores, _, _ := classifier.LogScores(inputPayeeWords) + for j, score := range scores { + if score > highScore1 { + highScore2 = highScore1 + highScore1 = score + matchIdx = j } + } + // If the difference between the highest and second highest scores is greater than 10 + // then it indicates that highscore is a high confidence match + if highScore1-highScore2 > 10 { + return string(classifier.Classes[matchIdx]) + } else { + return "unknown:unknown" + } +} - var matchingAccount string - matchingAccounts := ledger.GetBalances(generalLedger, []string{accountSubstring}) - if len(matchingAccounts) < 1 { - fmt.Println("Unable to find matching account.") - return - } - for _, m := range matchingAccounts { - if strings.EqualFold(m.Name, accountSubstring) { - matchingAccount = m.Name - break - } - } - if matchingAccount == "" { - matchingAccount = matchingAccounts[len(matchingAccounts)-1].Name +func findMatchingAccount(generalLedger []*ledger.Transaction, accountSubstring string) (string, error) { + var matchingAccount string + matchingAccounts := ledger.GetBalances(generalLedger, []string{accountSubstring}) + if len(matchingAccounts) < 1 { + return "", ErrNoMatchingAccount + } + for _, m := range matchingAccounts { + if strings.EqualFold(m.Name, accountSubstring) { + matchingAccount = m.Name + break } + } + if matchingAccount == "" { + matchingAccount = matchingAccounts[len(matchingAccounts)-1].Name + } - allAccounts := ledger.GetBalances(generalLedger, []string{}) + return matchingAccount, nil +} - csvReader := csv.NewReader(csvFileReader) - csvReader.Comma, _ = utf8.DecodeRuneInString(fieldDelimiter) - csvRecords, cerr := csvReader.ReadAll() - if cerr != nil { - fmt.Println("CSV parse error:", cerr.Error()) - return - } +func importCSV(accountSubstring, csvFileName string) { + decScale := decimal.NewFromFloat(scaleFactor) + + csvFileReader, err := os.Open(csvFileName) + if err != nil { + fmt.Println("CSV: ", err) + return + } + defer csvFileReader.Close() + + generalLedger, parseError := ledger.ParseLedgerFile(ledgerFilePath) + if parseError != nil { + fmt.Printf("%s:%s\n", ledgerFilePath, parseError.Error()) + return + } - classes := make([]bayesian.Class, len(allAccounts)) - for i, bal := range allAccounts { - classes[i] = bayesian.Class(bal.Name) + matchingAccount, err := findMatchingAccount(generalLedger, accountSubstring) + if err != nil { + fmt.Println(err) + return + } + + csvReader := csv.NewReader(csvFileReader) + csvReader.Comma, _ = utf8.DecodeRuneInString(fieldDelimiter) + csvRecords, cerr := csvReader.ReadAll() + if cerr != nil { + fmt.Println("CSV parse error:", cerr.Error()) + return + } + + classifier := trainClassifier(generalLedger, matchingAccount) + + // Find columns from header + var dateColumn, payeeColumn, amountColumn, commentColumn int + dateColumn, payeeColumn, amountColumn, commentColumn = -1, -1, -1, -1 + for fieldIndex, fieldName := range csvRecords[0] { + fieldName = strings.ToLower(fieldName) + if strings.Contains(fieldName, "date") { + dateColumn = fieldIndex + } else if strings.Contains(fieldName, "description") { + payeeColumn = fieldIndex + } else if strings.Contains(fieldName, "payee") { + payeeColumn = fieldIndex + } else if strings.Contains(fieldName, "amount") { + amountColumn = fieldIndex + } else if strings.Contains(fieldName, "expense") { + amountColumn = fieldIndex + } else if strings.Contains(fieldName, "note") { + commentColumn = fieldIndex + } else if strings.Contains(fieldName, "comment") { + commentColumn = fieldIndex } - classifier := bayesian.NewClassifier(classes...) - for _, tran := range generalLedger { - payeeWords := strings.Fields(tran.Payee) - // learn accounts names (except matchingAccount) for transactions where matchingAccount is present - learnName := false - for _, accChange := range tran.AccountChanges { - if accChange.Name == matchingAccount { - learnName = true - break - } + } + + if dateColumn < 0 || payeeColumn < 0 || amountColumn < 0 { + fmt.Println("Unable to find columns required from header field names.") + return + } + + expenseAccount := ledger.Account{Name: "unknown:unknown", Balance: decimal.Zero} + csvAccount := ledger.Account{Name: matchingAccount, Balance: decimal.Zero} + for _, record := range csvRecords[1:] { + inputPayeeWords := strings.Fields(record[payeeColumn]) + csvDate, _ := time.Parse(csvDateFormat, record[dateColumn]) + if allowMatching || !existingTransaction(generalLedger, csvDate, record[payeeColumn]) { + expenseAccount.Name = predictAccount(classifier, inputPayeeWords) + + // Parse error, set to zero + if dec, derr := decimal.NewFromString(record[amountColumn]); derr != nil { + expenseAccount.Balance = decimal.Zero + } else { + expenseAccount.Balance = dec } - if learnName { - for _, accChange := range tran.AccountChanges { - if accChange.Name != matchingAccount { - classifier.Learn(payeeWords, bayesian.Class(accChange.Name)) - } - } + + // Negate amount if required + if negateAmount { + expenseAccount.Balance = expenseAccount.Balance.Neg() + } + + // Apply scale + expenseAccount.Balance = expenseAccount.Balance.Mul(decScale) + + // Csv amount is the negative of the expense amount + csvAccount.Balance = expenseAccount.Balance.Neg() + + // Create valid transaction for print in ledger format + trans := &ledger.Transaction{Date: csvDate, Payee: record[payeeColumn]} + trans.AccountChanges = []ledger.Account{csvAccount, expenseAccount} + + // Comment + if commentColumn >= 0 && record[commentColumn] != "" { + trans.Comments = []string{";" + record[commentColumn]} } + WriteTransaction(os.Stdout, trans, 80) } + } +} + +func importCamt(accountSubstring, camtFileName string) { + decScale := decimal.NewFromFloat(scaleFactor) - // Find columns from header - var dateColumn, payeeColumn, amountColumn, commentColumn int - dateColumn, payeeColumn, amountColumn, commentColumn = -1, -1, -1, -1 - for fieldIndex, fieldName := range csvRecords[0] { - fieldName = strings.ToLower(fieldName) - if strings.Contains(fieldName, "date") { - dateColumn = fieldIndex - } else if strings.Contains(fieldName, "description") { - payeeColumn = fieldIndex - } else if strings.Contains(fieldName, "payee") { - payeeColumn = fieldIndex - } else if strings.Contains(fieldName, "amount") { - amountColumn = fieldIndex - } else if strings.Contains(fieldName, "expense") { - amountColumn = fieldIndex - } else if strings.Contains(fieldName, "note") { - commentColumn = fieldIndex - } else if strings.Contains(fieldName, "comment") { - commentColumn = fieldIndex + fileReader, err := os.Open(camtFileName) + if err != nil { + fmt.Println("CAMT: ", err, camtFileName) + return + } + defer fileReader.Close() + + generalLedger, parseError := ledger.ParseLedgerFile(ledgerFilePath) + if parseError != nil { + fmt.Printf("%s:%s\n", ledgerFilePath, parseError.Error()) + return + } + + matchingAccount, err := findMatchingAccount(generalLedger, accountSubstring) + if err != nil { + fmt.Println(err) + return + } + + classifier := trainClassifier(generalLedger, matchingAccount) + + entries, err := camt.ParseCamt(fileReader) + expenseAccount := ledger.Account{Name: "unknown:unknown", Balance: decimal.Zero} + camtAccount := ledger.Account{Name: matchingAccount, Balance: decimal.Zero} + for _, entry := range entries { + dateTime, err := time.Parse(time.RFC3339, entry.BookgDt.DtTm) + if err != nil { + // Try another format if RFC3339 fails + dateTime, err = time.Parse("2006-01-02T15:04:05.999999-07:00", entry.BookgDt.DtTm) + if err != nil { + fmt.Println("CAMT parse error:", err.Error()) } } - if dateColumn < 0 || payeeColumn < 0 || amountColumn < 0 { - fmt.Println("Unable to find columns required from header field names.") - return + // Parse amount + amount, err := decimal.NewFromString(entry.Amt.Value) + if err != nil { + fmt.Println("CAMT parse error:", err.Error()) } - expenseAccount := ledger.Account{Name: "unknown:unknown", Balance: decimal.Zero} - csvAccount := ledger.Account{Name: matchingAccount, Balance: decimal.Zero} - for _, record := range csvRecords[1:] { - inputPayeeWords := strings.Fields(record[payeeColumn]) - csvDate, _ := time.Parse(csvDateFormat, record[dateColumn]) - if allowMatching || !existingTransaction(generalLedger, csvDate, record[payeeColumn]) { - // Classify into expense account - - // Find the highest and second highest scores - highScore1 := math.Inf(-1) - highScore2 := math.Inf(-1) - matchIdx := 0 - scores, _, _ := classifier.LogScores(inputPayeeWords) - for j, score := range scores { - if score > highScore1 { - highScore2 = highScore1 - highScore1 = score - matchIdx = j - } - } - // If the difference between the highest and second highest scores is greater than 10 - // then it indicates that highscore is a high confidence match - if highScore1-highScore2 > 10 { - expenseAccount.Name = string(classifier.Classes[matchIdx]) - } else { - expenseAccount.Name = "unknown:unknown" - } + // Get reference and payee + reference := entry.BkTxCd.Prtry.Cd + payee := "" - // Parse error, set to zero - if dec, derr := decimal.NewFromString(record[amountColumn]); derr != nil { - expenseAccount.Balance = decimal.Zero - } else { - expenseAccount.Balance = dec - } + // Extract payee from entry details if available + if entry.NtryDtls != nil && entry.NtryDtls.TxDtls.RltdPties.Cdtr != nil { + payee = entry.NtryDtls.TxDtls.RltdPties.Cdtr.Pty.Nm + } else { + // Use additional entry info as fallback + payee = entry.AddtlNtryInf + } + inputPayeeWords := strings.Fields(payee) - // Negate amount if required - if negateAmount { - expenseAccount.Balance = expenseAccount.Balance.Neg() - } + expenseAccount.Name = predictAccount(classifier, inputPayeeWords) + expenseAccount.Balance = amount - // Apply scale - expenseAccount.Balance = expenseAccount.Balance.Mul(decScale) + // Determine if debit + isDebit := entry.CdtDbtInd == "DBIT" + if !isDebit { + expenseAccount.Balance = expenseAccount.Balance.Neg() + } - // Csv amount is the negative of the expense amount - csvAccount.Balance = expenseAccount.Balance.Neg() + // Apply scale + expenseAccount.Balance = expenseAccount.Balance.Mul(decScale) - // Create valid transaction for print in ledger format - trans := &ledger.Transaction{Date: csvDate, Payee: record[payeeColumn]} - trans.AccountChanges = []ledger.Account{csvAccount, expenseAccount} + // Csv amount is the negative of the expense amount + camtAccount.Balance = expenseAccount.Balance.Neg() - // Comment - if commentColumn >= 0 && record[commentColumn] != "" { - trans.Comments = []string{";" + record[commentColumn]} - } - WriteTransaction(os.Stdout, trans, 80) - } + // Create valid transaction for print in ledger format + trans := &ledger.Transaction{Date: dateTime, Payee: payee} + trans.AccountChanges = []ledger.Account{camtAccount, expenseAccount} + + // Comment + if reference != "" { + trans.Comments = []string{";" + reference} + } + WriteTransaction(os.Stdout, trans, 80) + } +} + +// importCmd represents the import command +var importCmd = &cobra.Command{ + Use: "import ", + Args: cobra.ExactArgs(2), + Short: "Import transactions from csv to ledger format", + Run: func(_ *cobra.Command, args []string) { + accountSubstring := args[0] + fileName := args[1] + + if strings.HasSuffix(strings.ToLower(fileName), ".xml") { + importCamt(accountSubstring, fileName) + } else { + importCSV(accountSubstring, fileName) } }, diff --git a/ledger/cmd/import_test.go b/ledger/cmd/import_test.go new file mode 100644 index 0000000..b78105c --- /dev/null +++ b/ledger/cmd/import_test.go @@ -0,0 +1,51 @@ +package cmd + +import ( + "testing" + + "github.com/howeyc/ledger" +) + +func Test_findMatchingAccount(t *testing.T) { + tests := []struct { + name string // description of this test case + // Named input parameters for target function. + generalLedger []*ledger.Transaction + accountSubstring string + want string + wantErr bool + }{ + { + "simple test", + []*ledger.Transaction{ + &ledger.Transaction{ + AccountChanges: []ledger.Account{ + {Name: "Equity:Fake"}, + {Name: "Liability:Real"}, + }, + }, + }, + "Fake", + "Equity:Fake", + false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, gotErr := findMatchingAccount(tt.generalLedger, tt.accountSubstring) + if gotErr != nil { + if !tt.wantErr { + t.Errorf("findMatchingAccount() failed: %v", gotErr) + } + return + } + if tt.wantErr { + t.Fatal("findMatchingAccount() succeeded unexpectedly") + } + // TODO: update the condition below to compare got with tt.want. + if got != tt.want { + t.Errorf("findMatchingAccount() = %v, want %v", got, tt.want) + } + }) + } +}