-
Notifications
You must be signed in to change notification settings - Fork 0
/
nsw-oeh-npws-alerts.pl
executable file
·97 lines (73 loc) · 2.7 KB
/
nsw-oeh-npws-alerts.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/perl -w
use strict;
use warnings;
use File::Temp qw/ tempfile /;
use XML::RSS;
use JSON 'encode_json';
use File::Path qw(mkpath rmtree);
use HTML::Scrubber;
use File::Spec;
if (@ARGV < 2) {
die "Usage: $0 <output-directory> <alert-directory-name>\n";
}
my $basedir = $ARGV[0];
my $alertdir = $ARGV[1];
if (!-d $basedir) {
die "$basedir does not exist\nUsage: $0 <output-directory> <alert-directory-name>\n";
}
my $rss_url = "http://www.nationalparks.nsw.gov.au/api/rssfeed/get";
# create a temp file to store the downloaded feed
my ($feed_filehandle, $feed_filename) = tempfile();
# download the feed with wget as LWP doesn't seem to handle dropped connections well
# see README.md for further details
my $wget_status = system("wget --quiet --tries=0 --read-timeout=30 -O $feed_filename $rss_url");
if ($wget_status == 0) {
#remove existing park alerts as they are no longer current
rmtree( $basedir . "/$alertdir" );
# create a new empty directory for the new park alerts
mkdir $basedir . "/$alertdir";
my $rss = XML::RSS->new();
$rss->parsefile($feed_filename);
my $scrubber = HTML::Scrubber->new(
allow => [ qw[ strong b br a ul ol li i span ] ],
rules => [
a => {
href => 1
}
]
);
foreach my $item ( @{ $rss->{items} } ) {
my $park_name = $item->{title};
# ensure park name is a very basic string by stripping out everything except alphanumeric and space
$park_name =~ s/[^A-Za-z0-0 ]//g;
# replace spaces with -
$park_name =~ tr/ /-/;
# final check to ensure no path traversal can occur
($park_name) = File::Spec->no_upwards( ($park_name) );
my $park_file_name = $basedir . "/$alertdir/" . (lc($park_name)) . ".json";
open (my $park_file, '>', $park_file_name);
my $park_alert = {
"name" => $item->{title},
"pubDate" => $item->{pubDate},
"link" => $item->{link},
"description" => $scrubber->scrub($item->{description}),
"category" => $item->{category}
};
my $json = {
"metadata" => {
"pubDate" => $rss->{channel}->{'pubDate'},
"generator" => $rss->{channel}->{generator},
"link" => $rss->{channel}->{link},
"attribution" => "© State of New South Wales through the Office of Environment and Heritage"
},
"content" => $park_alert
};
print $park_file encode_json $json;
close $park_file;
}
unlink $feed_filename;
} else {
unlink $feed_filename;
print STDERR "Error downloading remote feed\n";
exit $wget_status;
}