-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsample-lines
executable file
·48 lines (36 loc) · 985 Bytes
/
sample-lines
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/env perl
use v5.14;
# select a random sample of lines from a file
my $n = 1;
if ($ARGV[0] =~ /^-n(.*)/) {
$n = $1;
shift @ARGV;
if (!$n) {
$n = $ARGV[0];
shift @ARGV;
}
}
my @sample;
while (<>) {
if (@sample < $n) {
push @sample, $_;
} else {
my $j = int(rand $.);
$sample[$j] = $_ if $j < $n;
}
}
print @sample;
__END__
=head1 NAME
sample-lines - Retrieve a random subset of input lines
=head1 SYNOPSIS
sample-lines [-n count] [file...]
=head1 OPTIONS
-n count Specify the number of lines to return (default is 1 line)
=head1 DESCRIPTION
Given any stream of lines of text, sample-lines will return a randomly-selected
subset of any size. If the requested size is more than the number of lines actually
supplied, all input lines are returned; otherwise, each input line has an equal
probability of being included in the output sample.
=head1 AUTHOR
Written by Mark J. Reed <[email protected]>