forked from DarwinAwardWinner/ofxtoolkit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mungeofx.pl
executable file
·151 lines (131 loc) · 3.7 KB
/
mungeofx.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/perl
use Toolkit 'smartenv';
#use Toolkit 'smart4';
use XML::LibXML;
use IO::File;
use XML::Twig;
use Capture::Tiny;
use List::AllUtils qw( maxstr );
main: {
for my $filename (@ARGV)
{
my $input = IO::File->new($filename);
my $data = read_header_and_body($input);
$input->close;
my $body_text = join q(), @{$data->{body}};
my $munged_body_text = munge_ofx_text($body_text);
my $output = IO::File->new($filename, 'w');
$output->print(@{$data->{header}}, $munged_body_text);
$output->close;
}
}
# The non-XML-ish header at the top of OFX seems to cause most XML
# processors to choke. This is everything before the "<OFX>" tag. So
# separate out this header before processing the rest as XML, then
# tack the header back on at the end.
sub read_header_and_body {
alias my $input = $_[0];
my @header;
my @body;
while (<$input>) {
if ( m{ \s* <OFX> \s* }xsm ) {
push @body, $_, <$input>;
last;
}
else {
push @header, $_;
}
}
return {
header => \@header,
body => \@body,
};
}
sub munge_ofx_text {
alias my $text = $_[0];
state $date_tags = [
'DTACCTUP',
'DTASOF',
'DTEND',
'DTPOSTED',
'DTSERVER',
'DTSTART',
'DTUSER',
];
state $stmtrs_tags = [
'STMTRS',
'CCSTMTRS',
];
state $transaction_tags = [
'STMTTRN',
];
# These handlers will munge specific elements
state $handlers = {
(map { $_ => \&munge_transaction } @$transaction_tags),
(map { $_ => \&munge_date } @$date_tags),
# (map { $_ => \&munge_dtasof } @$stmtrs_tags),
};
my $twig = XML::Twig->new(
twig_handlers => $handlers,
pretty_print => 'indented',
);
$twig->parse($text);
#$twig->flush;
return $twig->sprint;
}
sub munge_transaction {
my $transaction = $_;
# Retrieve the offending children
my $name = $transaction->first_child('NAME');
my $memo = $transaction->first_child('MEMO');
if ($name and $memo) {
if (starts_with($memo->text, $name->text)) {
# If name is a truncated version of memo, then replace it
# with memo.
#### Completing name from memo...
$name->set_text($memo->text);
}
}
elsif ($name) {
# Missing memo
#### Copy name to memo...
$memo = $name->copy;
$memo->set_tag('MEMO');
$memo->paste(after => $name);
}
elsif ($memo) {
# Missing name
#### Copy memo to name...
$name = $memo->copy;
$name->set_tag('NAME');
$name->paste(before => $memo);
}
else {
# Missing both. Not allowed.
croak "Transaction has no name or memo. The offenting transaction was:\n"
. $transaction->sprint;
}
#### New transaction: $transaction->sprint
}
# Truncate dates to 8 digits: YYYYMMDD, because the stuff after that
# is usually invalid and evil.
sub munge_date {
my $date = $_;
$date->set_text(substr($date->trimmed_text,0,8));
}
# Change every DTASOF field to match the latest DTPOSTED of any
# transaction, because the provided DTASOF field is usually a lie.
sub munge_dtasof {
#### Munging DTASOF...
my $tranlist = $_[0];
my $latest_post_date = maxstr map { $_->trimmed_text } $tranlist->get_xpath('.//DTPOSTED');
#### Latest post date: $latest_post_date
my @as_of_date_nodes = $tranlist->get_xpath('.//DTASOF');
for my $node (@as_of_date_nodes) {
$node->set_text($latest_post_date);
}
}
sub starts_with {
alias my ($string, $start) = @_;
return substr($string, 0, length $start) eq $start;
}