summary history files

commit:6504a4ba4a5db433b492ec78eece51788d24db4e
date:Sun Feb 16 06:59:03 2025 +1100
parents:23647c0ec5f8a597bbf958a78566dfff37426cc5
Add initial duplication support
diff --git a/internal/merge/merge.go b/internal/merge/merge.go
line changes: +14/-2
index 5e87186..1900694
--- a/internal/merge/merge.go
+++ b/internal/merge/merge.go
@@ -20,6 +20,8 @@ type OFXMerger struct {
 	ofxFiles ofxFiles
 }
 
+// ccMatch compares ofxgo.CCAcct c1 and c2 returning true if both accounts are
+// the same and false if they are different.
 func ccAccountsMatch(c1, c2 ofxgo.CCAcct) bool {
 	if c1.AcctID != c2.AcctID {
 		return false
@@ -32,6 +34,8 @@ func ccAccountsMatch(c1, c2 ofxgo.CCAcct) bool {
 
 }
 
+// bankAccountMatch compares ofxgo.BankAcct b1 and b2 returning true if both
+// accounts are the same and false if they are different.
 func bankAccountsMatch(b1, b2 ofxgo.BankAcct) bool {
 	if b1.BankID != b2.BankID {
 		return false
@@ -143,6 +147,7 @@ func newStatementResponse() (ofxgo.StatementResponse, error) {
 func (o *OFXMerger) Merge() ([]byte, error) {
 	buf := new(bytes.Buffer)
 
+	ts := NewTransactionSet()
 	switch o.ofxFiles.statementType {
 	case statementTypeCreditCard:
 		stmt, err := newCCStatementResponse()
@@ -157,12 +162,16 @@ func (o *OFXMerger) Merge() ([]byte, error) {
 			DtEnd:        o.ofxFiles.dtEnd(),
 			DtStart:      o.ofxFiles.dtStart(),
 		}
-		for _, ofxFile := range o.ofxFiles.files {
+
+		for idx, ofxFile := range o.ofxFiles.files {
 			ofxFileStmt, ok := ofxFile.resp.CreditCard[0].(*ofxgo.CCStatementResponse)
 			if !ok {
 				return []byte{}, fmt.Errorf("unable to process credit card statement")
 			}
 			for _, i := range ofxFileStmt.BankTranList.Transactions {
+				if ts.isDuplicate(idx, i) {
+					continue
+				}
 				stmt.BankTranList.Transactions = append(stmt.BankTranList.Transactions, i)
 			}
 		}
@@ -189,12 +198,15 @@ func (o *OFXMerger) Merge() ([]byte, error) {
 			DtEnd:        o.ofxFiles.dtEnd(),
 			DtStart:      o.ofxFiles.dtStart(),
 		}
-		for _, ofxFile := range o.ofxFiles.files {
+		for idx, ofxFile := range o.ofxFiles.files {
 			ofxFileStmt, ok := ofxFile.resp.Bank[0].(*ofxgo.StatementResponse)
 			if !ok {
 				return []byte{}, fmt.Errorf("unable to process bank statement")
 			}
 			for _, i := range ofxFileStmt.BankTranList.Transactions {
+				if ts.isDuplicate(idx, i) {
+					continue
+				}
 				stmt.BankTranList.Transactions = append(stmt.BankTranList.Transactions, i)
 			}
 		}

diff --git a/internal/merge/txdedupe.go b/internal/merge/txdedupe.go
line changes: +62/-0
index 0000000..af7c144
--- /dev/null
+++ b/internal/merge/txdedupe.go
@@ -0,0 +1,62 @@
+package merge
+
+import (
+	"fmt"
+	"sync"
+
+	"github.com/aclindsa/ofxgo"
+)
+
+type transactionHash string
+
+type transactionSet struct {
+	mu           sync.RWMutex
+	transactions map[transactionHash]int
+}
+
+func NewTransactionSet() *transactionSet {
+	return &transactionSet{
+		transactions: make(map[transactionHash]int),
+	}
+}
+
+// isDuplicate checks if a transaction has already been processed at a different index.
+// It returns true if the transaction exists at a different index, false if it's new
+// or exists at the same index.
+//
+// The function uses a transaction hash as a unique identifier and stores the index
+// where the transaction was first seen. If the transaction is found at the same
+// index, it's not considered a duplicate to handle cases where the same transaction
+// appears multiple times in the same ofx file.
+//
+// Parameters:
+//   - idx: the current position in the transaction list
+//   - transaction: the transaction to check for duplicates
+//
+// Returns:
+//   - bool: true if duplicate at different index, false if new or same index
+func (t *transactionSet) isDuplicate(idx int, transaction ofxgo.Transaction) bool {
+	hash := t.transactionHash(transaction)
+	t.mu.RLock()
+	i, ok := t.transactions[hash]
+	t.mu.RUnlock()
+
+	if !ok {
+		t.mu.Lock()
+		t.transactions[hash] = idx
+		t.mu.Unlock()
+		return false
+	}
+	if ok && i == idx {
+		return false
+	}
+	return true
+}
+
+func (t *transactionSet) transactionHash(transaction ofxgo.Transaction) transactionHash {
+	return transactionHash(fmt.Sprintf("%s%s%s%d",
+		transaction.TrnType,
+		transaction.DtPosted.Format("2006-01-02"),
+		transaction.Name,
+		transaction.TrnAmt.String()))
+}