-
Notifications
You must be signed in to change notification settings - Fork 0
/
073-files.pl
executable file
·442 lines (360 loc) · 16.7 KB
/
073-files.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
#!/usr/bin/env perl
# REF: http://modernperlbooks.com/books/modern_perl_2016/09-managing-real-programs.html#RmlsZXM
use 5.034;
use warnings;
use autodie;
use feature 'say';
use Test::More;
# Most programs interact with the real world mostly by reading, writing, and otherwise manipulating files.
# Perl began as a tool for system administrators
# and is still a language well suited for text processing.
# ================================
# Input and Output
# ================================
# A filehandle represents the current state of one specific channel of input or output.
# Every Perl program starts with three standard filehandles,
# STDIN (the input to the program),
# STDOUT (the output from the program),
# and STDERR (the error output from the program).
# By default, everything you print or say goes to STDOUT,
# while errors and warnings go to STDERR.
# This separation of output allows you to redirect useful output and errors to two different places—
# an output file and error logs, for example.
# Use the open builtin to initialize a filehandle.
# To open a file for reading:
sub demo_open {
my $filename = '';
open my $fh, '<', 'filename' or die "Cannot read '$filename': $!\n";
}
# The first operand is a lexical which will contain the filehandle.
# The second operand is the file mode,
# which determines the type of file operation (reading, writing, appending, et cetera).
# The final operand is the name of the file on which to operate.
# If the open fails, the die clause will throw an exception,
# with the reason for failure in the ($!) magic variable.
# You may open files for writing, appending, reading and writing, and more.
# Some of the most important file modes are:
#
# <, which opens a file for reading.
# >, which open for writing, clobbering existing contents if the file exists and creating a new file otherwise.
# >>, which opens a file for writing, appending to any existing contents and creating a new file otherwise.
# +<, which opens a file for both reading and writing.
# -|, which opens a pipe to an external process for reading.
# |-, which opens a pipe to an external process for writing.
# You may also create filehandles which read from or write to plain Perl scalars,
# using any existing file mode:
sub demo_open_scalars {
my $fake_input = '';
my $captured_output = '';
open my $read_fh, '<', \$fake_input;
open my $write_fh, '>', \$captured_output;
do_something_awesome( $read_fh, $write_fh );
}
# perldoc perlopentut explains in detail more exotic uses of open,
# including its ability to launch and control other processes,
# as well as the use of sysopen for finer-grained control over input and output.
# perldoc perlfaq5 includes working code for many common IO tasks.
# Assume the examples in this section have use autodie; enabled
# so as to elide explicit error handling.
# If you choose not to use autodie,
# check the return values of all system calls to handle errors appropriately.
# ================================
# Input and Output - Unicode, IO Layers, and File Modes
# ================================
# In addition to the file mode,
# you may add an IO encoding layer which allows Perl to encode to or decode from a Unicode encoding.
sub demo_encoding_layers {
my $infile = '';
my $outfile = '';
# For example, to read a file written in the UTF-8 encoding:
open my $in_fh, '<:encoding(UTF-8)', $infile;
# ... or to write to a file using the UTF-8 encoding:
open my $out_fh, '>:encoding(UTF-8)', $outfile;
}
# ================================
# Input and Output - Two-argument open
# ================================
# Older code often uses the two-argument form of open(),
# which jams the file mode with the name of the file to open:
sub demo_two_arg_open {
my $file = '';
open my $fh, "> $file" or die "Cannot write to '$file': $!\n";
}
# Perl must extract the file mode from the filename.
# That's a risk; anytime Perl has to guess at what you mean, it may guess incorrectly.
# Worse, if $file came from untrusted user input, you have a potential security problem,
# as any unexpected characters could change how your program behaves.
# The three-argument open() is a safer replacement for this code.
# The special package global DATA filehandle represents the current file of source code.
# When Perl finishes compiling a file,
# it leaves DATA open and pointing to the end of the compilation unit
# if the file has a __DATA__ or __END__ section.
# Any text which occurs after that token is available for reading from DATA.
# The entire file is available if you use seek to rewind the filehandle.
# This is useful for short, self-contained programs.
# See perldoc perldata for more details.
# ================================
# Input and Output - Reading from Files
# ================================
# Given a filehandle opened for input,
# read from it with the readline builtin,
# also written as <>.
# A common idiom reads a line at a time in a while() loop:
sub demo_readline {
open my $fh, '<', 'some_file';
while (<$fh>) {
chomp;
say "Read a line '$_'";
}
}
# In scalar context, readline reads a single line of the file and returns it,
# or undef if it's reached the end of file (test that condition with the eof builtin).
# Each iteration in this example returns the next line or undef.
# This while idiom explicitly checks the definedness of the variable used for iteration,
# so only the end of file condition will end the loop.
# This idiom is equivalent to:
sub demo_check_eof {
open my $fh, '<', 'some_file';
while ( defined( $_ = <$fh> ) ) {
chomp;
say "Read a line '$_'";
last if eof $fh;
}
}
# NOTE: Why use while and not for?
# for imposes list context on its operands.
# When in list context, readline will read the entire file before processing any of it.
# while performs iteration and reads a line at a time.
# When memory use is a concern, use while.
# Every line read from readline includes the character or characters which mark the end of a line.
# In most cases, this is a platform-specific sequence
# consisting of a newline (\n), a carriage return (\r), or a combination of the two (\r\n).
# Use chomp to remove it.
# The cleanest way to read a file line-by-line in Perl is:
sub demo_cleanest_readline {
my $filename = '';
open my $fh, '<', $filename;
while ( my $line = <$fh> ) {
chomp $line;
...;
}
}
# Perl assumes that files contain text by default.
# If you're reading binary data—a media file or a compressed file, for example—
# use binmode before performing any IO.
# This will force Perl to treat the file data as pure data,
# without modifying it in any way,
# such as translating \n into the platform-specific newline sequence.
# While Unix-like platforms may not always need binmode,
# portable programs play it safe (Unicode and Strings).
# ================================
# Input and Output - Writing to Files
# ================================
# Given a filehandle open for output, print or say to write to the file:
sub demo_write {
open my $out_fh, '>', 'output_file.txt';
print $out_fh "Here's a line of text\n";
say $out_fh "... and here's another";
}
# NOTE the lack of comma between the filehandle and the next operand.
# Damian Conway's Perl Best Practices recommends enclosing the filehandle in curly braces as a habit.
# This is necessary to disambiguate parsing of a filehandle
# contained in anything other than a plain scalar—
# a filehandle in an array or hash or returned from an object method—
# and it won't hurt anything in the simpler cases.
# TODO: review
# Both print and say take a list of operands.
# Perl uses the magic global ($,) as the separator between list values.
# Perl uses any value of ($\) as the final argument to print
# (but always uses \n as an implicit final argument to say).
# Remember that ($\) is undef by default.
# These two examples produce the same result:
{
my @princes = qw( Corwin Eric Random ... );
local $\ = "\n\n";
# prints a list of princes, followed by two newlines
print @princes;
local $\ = '';
print join( $,, @princes ) . "\n\n";
}
# ================================
# Input and Output - Closing Files
# ================================
# When you've finished working with a file,
# close its filehandle explicitly or allow it to go out of scope.
# Perl will close it for you.
# The benefit of calling close explicitly is that you can check for—and recover from—specific errors,
# such as running out of space on a storage device
# or a broken network connection.
# As usual, autodie handles these checks for you:
sub demo_close {
use autodie qw( open close );
my $file = '';
open my $fh, '>', $file;
...;
close $fh;
}
# ================================
# TODO: review Input and Output - Special File Handling Variables
# ================================
# For every line read,
# Perl increments the value of the variable ($.),
# which serves as a line counter.
# (readline) uses the current contents of $/ as the line-ending sequence.
# The value of this variable defaults to
# the most appropriate line-ending character sequence for text files on your current platform.
# The word line is a misnomer, however.
# ($/) can contain any sequence of characters (but not a regular expression).
# This is useful for highly-structured data in which you want to read a record at a time.
# NOTE: Given a file with records separated by two blank lines,
# set $/ to \n\n to read a record at a time.
# Use chomp on a record read from the file to remove the double-newline sequence.
# Perl buffers its output by default,
# performing IO only when the amount of pending output exceeds a threshold.
# This allows Perl to batch up expensive IO operations
# instead of always writing very small amounts of data.
# Yet sometimes you want to send data as soon as you have it without waiting for that buffering—
# especially if you're writing a command-line filter connected to other programs
# or a line-oriented network service.
# The $| variable controls buffering on the currently active output filehandle.
# When set to a non-zero value, Perl will flush the output after each write to the filehandle.
# When set to a zero value, Perl will use its default buffering strategy.
# Files default to a fully-buffered strategy.
# (STDOUT) when connected to an active terminal—
# but not another program—
# uses a line-buffered strategy,
# where Perl flushes (STDOUT) every time it encounters a newline in the output.
# Instead of cluttering your code with a global variable,
# use the autoflush() method to change the buffering behavior of a lexical filehandle:
sub demo_autoflush {
open my $fh, '>', 'pecan.log';
$fh->autoflush(1);
}
# You can call any method provided by IO::File on a filehandle.
# For example,
# the input_line_number() and input_record_separator() methods do the job of ($.) and ($/) on individual filehandles.
# See the documentation for IO::File, IO::Handle, and IO::Seekable.
# ================================
# Directories and Paths
# ================================
# Working with directories is similar to working with files,
# except that you cannot write to directories.
# Open a directory handle with the opendir builtin:
sub demo_opendir {
opendir my $dirh, '/home/monkeytamer/tasks/';
}
# The readdir builtin reads from a directory.
# As with readline, you may iterate over the contents of directories one entry at a time
# or you may assign everything to an array in one swoop:
sub demo_readdir {
# iteration
opendir my $dirh, '';
while ( my $file = readdir $dirh ) {
...;
}
# flatten into a list, assign to array
opendir my $otherdirh, '';
my @files = readdir $otherdirh;
}
# In a while loop, readdir sets $_:
sub demo_readdir_magic_variable {
opendir my $dirh, 'tasks/circus/';
while ( readdir $dirh ) {
next if /^\./;
say "Found a task $_!";
}
}
# The curious regular expression in this example skips so-called hidden files on Unix and Unix-like systems,
# where a leading dot prevents them from appearing in directory listings by default.
# It also skips the two special files . and ..,
# which represent the current directory and the parent directory respectively.
# The names returned from readdir are relative to the directory itself.
# (Remember that an absolute path is a path fully qualified to its filesystem.)
# If the tasks/ directory contains three files
# named eat, drink, and be_monkey,
# readdir will return eat, drink, and be_monkey
# instead of tasks/eat, tasks/drink, and task/be_monkey.
# Close a directory handle with the closedir builtin or by letting it go out of scope.
# ================================
# Directories and Paths - Manipulating Paths
# ================================
# Perl offers a Unixy view of your filesystem
# and will interpret Unix-style paths appropriately for your operating system and filesystem.
# If you're using Microsoft Windows,
# you can use the path C:/My Documents/Robots/Bender/
# just as easily as you can use the path C:\My Documents\Robots\Caprica Six\.
# Even though Perl uses Unix file semantics consistently,
# cross-platform file manipulation is much easier with a module.
# The core File::Spec module family lets you manipulate file paths safely and portably.
# It's a little clunky, but it's well documented.
# The Path::Class distribution on the CPAN has a nicer interface.
# {
# use Path::Class;
#
# # Use the dir() function to create an object representing a directory
# # and the file() function to create an object representing a file:
# my $meals = dir( 'tasks', 'cooking' );
# my $file = file( 'tasks', 'health', 'robots.txt' );
#
# # You can get file objects from directories and vice versa:
# my $lunch = $meals->file('veggie_calzone');
# my $robots_dir = $robot_list->dir;
#
# # You can even open filehandles to directories and files:
# my $dir_fh = $dir->open;
# my $robots_fh = $robot_list->open('r') or die "Open failed: $!";
#
# # Both Path::Class::Dir and Path::Class::File offer further useful behaviors—
# # though beware that if you use a Path::Class object of some kind
# # with an operator or function which expects a string containing a file path,
# # you need to stringify the object yourself.
# # This is a persistent but minor annoyance.
# # (If you find it burdensome, try Path::Tiny as an alternative.)
# my $contents = read_from_filename("$lunch");
# }
# ================================
# File Manipulation
# ================================
# Besides reading and writing files,
# you can also manipulate them as you would directly from a command line or a file manager.
# The file test operators,
# collectively called the -X operators,
# examine file and directory attributes.
# To test that a file exists:
sub demo_file_existence {
my $filename = '';
say 'Present!' if -e $filename;
}
# The -e operator has a single operand,
# either the name of a file
# or handle to a file or directory.
# If the file or directory exists,
# the expression will evaluate to a true value.
# perldoc -f -X lists all other file tests.
# -f returns a true value if its operand is a plain file.
# -d returns a true value if its operand is a directory.
# -r returns a true value if the file permissions of its operand permit reading by the current user.
# -s returns a true value if its operand is a non-empty file.
# Look up the documentation for any of these operators with perldoc -f -r, for example.
# The rename builtin can rename a file or move it between directories.
# It takes two operands, the old path of the file and the new path:
sub demo_rename {
rename 'death_star.txt', 'carbon_sink.txt';
# or if you're stylish:
rename 'death_star.txt' => 'carbon_sink.txt';
}
# There's no core builtin to copy a file,
# but the core File::Copy module provides both copy() and move() functions.
# Use the unlink builtin to remove one or more files.
# (The delete builtin deletes an element from a hash, not a file from the filesystem.)
# These functions and builtins all return true values on success and set $! on error.
# Path::Class also provides convenience methods to remove files completely
# and portably as well as to check certain file attributes.
# Perl tracks its current working directory.
# By default, this is the active directory from where you launched the program.
# The core Cwd module's cwd() function returns the name of the current working directory.
# The builtin chdir attempts to change the current working directory.
# Working from the correct directory is essential to working with files with relative paths.
# The CPAN module File::chdir makes manipulating the current working directory easier.
# If you're a fan of the command line and use pushd and popd, see also File::pushd.
done_testing();