-
Notifications
You must be signed in to change notification settings - Fork 0
/
080-beyond-syntax.pl
executable file
·558 lines (453 loc) · 21.2 KB
/
080-beyond-syntax.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
#!/usr/bin/env perl
# REF: http://modernperlbooks.com/books/modern_perl_2016/10-perl-beyond-syntax.html#UGVybEJleW9uZFN5bnRheA
use 5.034;
use warnings;
use autodie;
use feature 'say';
use Test::More;
# "Simple" means different things to different programmers.
# Effective programmers understand how Perl's features interact and combine.
# Their fluent code takes advantage of language patterns and idioms.
# The result of this Perlish thinking is concise, powerful, and useful code—
# and it's simple when you understand it.
# ================================
# TODO: review Idioms
# ================================
# Every language has common patterns of expression, or idioms.
# The earth revolves, but we speak of the sun rising or setting.
# We brag about clever hacks but cringe at nasty hacks and code smells.
# Perl has idioms; they're both language features and design techniques.
# They're mannerisms and mechanisms that give your code a Perlish accent.
# You don't have to use them to get your job done,
# but they play to Perl's strengths.
# ================================
# Idioms - The Object as $self
# ================================
# Perl's object system (Moose) treats the invocant of a method as a mundane parameter.
# Regardless of whether you invoke a class or an instance method,
# the first element of @_ is always the invocant.
# By convention, most Perl code uses $class as the name of the class method invocant
# and $self for the name of the object invocant.
# This convention is strong enough
# that useful extensions such as Moops assume you will use $self as the name of object invocants.
# NOTE: Review for fun.
package MyClass v0.0.1 {
sub print_info {
say @_;
say "@_";
}
}
sub demo_different_calls {
MyClass::print_info(1);
MyClass->print_info(2);
MyClass::->print_info(3); # See 091-barewords "Good Uses of Barewords - Bareword package names"
}
# ================================
# Idioms - Named Parameters
# ================================
# Perl loves lists.
# Lists are a fundamental element of Perl.
# List flattening and binding lets you chain together multiple expressions
# to manipulate data in every way possible.
# While Perl's argument passing simplicity (everything flattens into @_) is occasionally too simple,
# assigning from @_ in list context allows you to unpack named parameters as pairs.
# The fat comma (Declaring Hashes) operator turns an ordinary list into an obvious list of pairs of arguments:
sub demo_named_parameters {
make_ice_cream_sundae(
whipped_cream => 1,
sprinkles => 1,
banana => 0,
ice_cream => 'mint chocolate chip',
);
}
# You can unpack these parameters into a hash
# and treat that hash as if it were a single argument:
sub make_ice_cream_sundae {
my %args = @_;
my $dessert = get_ice_cream( $args{ice_cream} );
...;
}
# ================================
# Idioms - Hash or Hash Ref?
# ================================
# Perl Best Practices suggests passing hash references instead.
# This allows Perl to perform caller-side validation of the hash reference.
# If you pass the wrong number of arguments,
# you'll get an error where you call the function.
# This technique works well with import() (Importing) or other methods;
# process as many parameters as you like before slurping the remainder into a hash:
sub import {
my ( $class, %args ) = @_;
my $calling_package = caller();
...;
}
# ================================
# Idioms - The Schwartzian Transform
# ================================
# The Schwartzian transform is an elegant demonstration of the pervasive list handling idiom borrowed from Lisp.
{
# Suppose you have a Perl hash which associates the names of your co-workers with their phone extensions:
my %extensions = (
'000' => 'Damian',
'002' => 'Wesley',
'042' => 'Robin',
'088' => 'Nic',
);
# NOTE: Fat comma hash key quoting only works on things that look like barewords.
# With the leading zero, these keys look like octal numbers.
# Everyone makes this mistake at least once.
# To sort this list by name alphabetically,
# you must sort the hash by its values, not its keys.
# Getting the values sorted correctly is easy:
my @sorted_names = sort values %extensions;
# ... but you need an extra step to preserve the association of names and extensions,
# hence the Schwartzian transform.
# First, convert the hash into a list of data structures which will be easier to sort—
# in this case, two-element anonymous arrays:
my @pairs = map { [ $_, $extensions{$_} ] } keys %extensions;
# sort takes this list of anonymous arrays and compares their second elements (the names) as strings:
my @sorted_pairs = sort { $a->[1] cmp $b->[1] } @pairs;
# The block provided to sort receives arguments in two package-scoped (Scope) variables: $a and $b.
# (See perldoc -f sort for an extensive discussion of the implications of this scoping.)
# The sort block takes its arguments two at a time.
# The first becomes the contents of $a and the second the contents of $b.
# If $a should sort ahead of $b in the results, the block must return -1.
# If both values sort to the same position, the block must return 0.
# Finally, if $a should sort after $b in the results, the block should return 1.
# Any other return values are errors.
# Reversing the hash in place would work if no one had the same name.
# This particular data set presents no such problem, but code defensively.
# The cmp operator performs string comparisons
# and the <=> performs numeric comparisons.
# Given @sorted_pairs, a second map operation converts the data structure to a more usable form:
my @formatted_exts = map { "$_->[1], ext. $_->[0]" } @sorted_pairs;
# ... and now you can print the whole thing:
say for @formatted_exts;
# The Schwartzian transformation chains all of these expressions together to elide those temporary variables:
say
for map { " $_->[1], ext. $_->[0]" }
sort { $a->[1] cmp $b->[1] }
map { [ $_ => $extensions{$_} ] }
keys %extensions;
# Read the expression from right to left, in evaluation order.
# For each key in the extensions hash,
# make a two-item anonymous array containing the key and the value.
# Sort that list of anonymous arrays by their second elements, the hash values.
# Format a string of output from those sorted arrays.
}
# The Schwartzian transform pipeline of map-sort-map transforms a data structure
# into another form easier for sorting
# and then transforms it back into the first from—or another form.
# While this sorting example is simple,
# consider the case of calculating a cryptographic hash for a large file.
# The Schwartzian transform is especially useful
# because it effectively caches any expensive calculations by performing them once in the first-executed map.
# ================================
# Idioms - Easy File Slurping
# ================================
# local is essential to managing Perl's magic global variables.
# You must understand scope (Scope) to use local effectively—
# but if you do, you can use tight and lightweight scopes in interesting ways.
# For example, to slurp files into a scalar in a single expression:
sub demo_file_slurping {
my $filename = '';
open my $fh, '<', $filename;
my $file_1 = do { local $/; <$fh> };
# or
my $file_2;
{ local $/; $file_2 = <$fh> };
}
# $/ is the input record separator.
# (local)izing it sets its value to undef, pending assignment.
# As the value of the separator is undefined,
# Perl happily reads the entire contents of the filehandle in one swoop.
# Because a do block evaluates to the value of the last expression evaluated within the block,
# this evaluates to the data read from the filehandle: the contents of the file.
# At the end of the expression,
# $/ has reverted to its previous state and $file contains the contents of the file.
# The second example avoids a second copy of the string containing the file's contents;
# it's not as pretty, but it uses the least amount of memory.
# This useful example is admittedly maddening for people who don't understand both local and scoping.
# The File::Slurper module from the CPAN is a worthy (and often faster) alternative.
# ================================
# Idioms - Handling Main
# ================================
# Many programs commonly set up several file-scoped lexical variables
# before handing off processing to other functions.
# It's tempting to use these variables directly,
# rather than passing values to and returning values from functions,
# especially as programs grow.
# TODO: review
# Unfortunately, these programs may come to rely on subtleties
# of what happens when during Perl's compilation process—
# a variable you thought would be initialized to a specific value may not get initialized until much later.
# Remember that Perl requires no special syntax for creating closures (Closures)—
# you can close over a lexical variable inadvertently.
# To avoid this, wrap the main code of your program in a single function, main().
# Encapsulate your variables to their proper scopes.
# Then add a single line to the beginning of your program,
# after you've used all of the modules and pragmas you need:
# {
# #!/usr/bin/perl
#
# use Modern::Perl;
#
# exit main( @ARGV );
#
# sub main {
# ...
#
# # successful exit
# return 0;
# }
#
# sub other_functions { ... }
# }
# Calling main() before anything else in the program
# forces you to be explicit about initialization and compilation order.
# Calling exit with main()'s return value prevents any other bare code from running.
# ================================
# Idioms - Controlled Execution
# ================================
# The effective difference between a program and a module is in its intended use.
# Users invoke programs directly,
# while programs load modules after execution has already begun.
# Yet both modules and programs are merely Perl code.
# Making a module executable is easy.
# So is making a program behave as a module
# (useful for testing parts of an existing program without formally making a module).
# All you need to do is to discover how Perl began to execute a piece of code.
# (caller)'s single optional argument governs the number of call frames (Recursion) to look back through.
# caller(0) reports information about the current call frame.
# To allow a module to run correctly as a program or a module,
# put all executable code in functions,
# add a main() function,
# and write a single line at the start of the module:
# {
# main() unless caller(0);
# }
# If there's no caller for the module,
# someone invoked it directly as a program (with `perl path/to/Module.pm` instead of (use Module;)).
# The eighth element of the list returned from (caller) in list context
# is a true value if the call frame represents use or require
# and undef otherwise.
# While that's more accurate, few people use it.
# ================================
# Idioms - Postfix Parameter Validation
# ================================
# The CPAN has several modules which help verify the parameters of your functions;
# Params::Validate and MooseX::Params::Validate are two good options.
# Simple validation is easy even without those modules.
# Suppose your function takes exactly two arguments. You could write:
{
use Carp 'croak';
sub groom_monkeys {
if ( @_ != 2 ) {
croak 'Can only groom two monkeys!';
}
...;
}
}
# ... but from a linguistic perspective,
# the consequences are more important than the check
# and deserve to be at the start of the expression:
{
croak 'Can only groom two monkeys!' unless @_ == 2;
}
# This early return technique—especially with postfix conditionals—
# can simplify the rest of the code.
# Each such assertion is effectively a single row in a truth table.
# Alternately, function signatures (Real Function Signatures) of some kind will handle this case for you.
# ================================
# Idioms - Regex En Passant
# ================================
# Many Perl idioms rely on the fact that expressions evaluate to values:
{
say my $ext_num = my $extension = 42;
}
# That clunky code demonstrates how to use the value of one expression in another expression.
# This isn't a new idea;
# you've likely used the return value of a function in a list
# or as an argument to another function before.
# You may not have realized its implications.
# Suppose you want to extract a first name from a first name plus surname combination
# with a precompiled regular expression in $first_name_rx:
{
my $name = qq//;
my $first_name_rx = qr//;
my ($first_name) = $name =~ /($first_name_rx)/;
}
# In list context, a successful regex match returns a list of all captures (Capturing),
# and Perl assigns the first one to $first_name.
# To remove all non-word characters to create a useful user name for a system account, you could write:
{
my $name = qq//;
( my $normalized_name = $name ) =~ tr/A-Za-z//dc;
}
# Newer code can use the non-destructive substitution modifier (/r):
{
my $name = qq//;
my $normalized_name = $name =~ tr/A-Za-z//dcr;
}
# First, assign the value of $name to $normalized_name.
# The parentheses change precedence so that assignment happens first.
# The assignment expression evaluates to the variable $normalized_name.
# That variable becomes the first operand to the transliteration operator.
# This technique works on other in-place modification operators:
{
my $age = 14;
( my $next_age = $age )++;
say "I am $age, but next year I will be $next_age";
}
# ================================
# Idioms - Unary Coercions
# ================================
# Perl's type system almost always does the right thing when you choose the correct operators.
# Use the string concatenation operator and Perl will treat both operands as strings.
# Use the addition operator and Perl will treat both operands as numeric.
# Occasionally you have to give Perl a hint about what you mean with a unary coercion
# to force a specific evaluation of a value.
{
my $value = qq//;
# Add zero to treat a value as numeric:
my $numeric_value = 0 + $value;
# Double negate a value to treat it as a boolean:
my $boolean_value = !!$value;
# Concatenate a value with the empty string to treat it as a string:
my $string_value = '' . $value;
}
# The need for these coercions is vanishingly rare, but it happens.
# While it may look like it would be safe to remove a "useless" (+ 0) from an expression,
# doing so may well break the code.
# ================================
# Global Variables
# ================================
# Perl provides several super global variables.
# They're not scoped to a package or file.
# They're really, truly global.
# Unfortunately, any direct or indirect modifications of these variables
# may change the behavior of other parts of the program.
# Experienced Perl hackers have memorized some of them.
# Few people have memorized all of them—they're terse.
# Only a handful are regularly useful.
# perldoc perlvar contains the exhaustive list of these variables.
# ================================
# Global Variables - Managing Super Globals
# ================================
# As Perl evolves, it moves more global behavior into lexical behavior,
# so that you can avoid many of these globals.
# When you must use them,
# use local in the smallest possible scope to constrain any modifications.
# You are still susceptible to any changes made to these variables from code you call,
# but you reduce the likelihood of surprising code outside of your scope.
# As the easy file slurping idiom (Easy File Slurping) demonstrates,
# local is often the right approach:
{
open my $fh, '<', '';
my $file;
{ local $/; $file = <$fh> };
}
# The effect of localizing $/ lasts only through the end of the block.
# There is a low chance
# that any Perl code will run as a result of reading lines from the filehandle
# and change the value of $/ within the do block.
# Not all cases of using super globals are this easy to guard, but this often works.
# TODO: review
# Other times you need to read the value of a super global
# and hope that no other code has modified it.
# Catching exceptions with an eval block is susceptible to at least one race condition
# where DESTROY() methods invoked on lexicals that have gone out of scope may reset $@:
{
local $@;
eval { ... };
if ( my $exception = $@ ) { ... }
}
# Copy $@ immediately after catching an exception to preserve its contents.
# See also Try::Tiny instead (Exception Caveats).
# ================================
# Global Variables - English Names
# ================================
# The core English module provides verbose names for punctuation-heavy super globals.
# Import them into a namespace with:
{
use English '-no_match_vars'; # unnecessary in 5.20 and 5.22
}
# This allows you to use the verbose names documented in perldoc perlvar
# within the scope of this pragma.
# Three regex-related super globals ($&, $`, and $')
# used to impose a global performance penalty for all regular expressions within a program.
# This has been fixed in Perl 5.20.
# If you forget the (-no_match_vars) import,
# your program will suffer the penalty even if you don't explicitly read from those variables.
# Modern Perl programs can use the @- variable instead of them.
# ================================
# Global Variables - Useful Super Globals
# ================================
# Most programs can get by with using only a couple of the super globals.
# You're most likely to encounter only a few of these variables.
# $/ (or $INPUT_RECORD_SEPARATOR from the English pragma)
# is a string of zero or more characters
# which denotes the end of a record when reading input a record at a time.
# By default, this is your platform-specific newline character sequence.
# If you undefine this value,
# Perl will attempt to read the entire file into memory.
# If you set this value to a reference to an integer,
# Perl will try to read that many bytes per record (so beware of Unicode concerns).
# If you set this value to an empty string (''),
# Perl will read in a paragraph at a time,
# where a paragraph is a chunk of text followed by an arbitrary number of newlines.
# $. ($INPUT_LINE_NUMBER)
# contains the number of records read from the most recently-accessed filehandle.
# You can read from this variable,
# but writing to it has no effect.
# Localizing this variable will localize the filehandle to which it refers.
# Yes, that's confusing.
# $| ($OUTPUT_AUTOFLUSH)
# governs whether Perl will flush everything written to the currently selected filehandle immediately
# or only when Perl's buffer is full.
# Unbuffered output is useful when writing to a pipe or socket or terminal
# which should not block waiting for input.
# This variable will coerce any values assigned to it to boolean values.
# @ARGV
# contains the command-line arguments passed to the program.
# $! ($ERRNO)
# is a dualvar (Dualvars)
# which contains the result of the most recent system call.
# In numeric context,
# this corresponds to C's errno value, where anything other than zero indicates an error.
# In string context, this evaluates to the appropriate system error string.
# Localize this variable before making a system call (implicitly or explicitly)
# to avoid overwriting the errno value for other code elsewhere.
# Perl's internals make system calls sometimes,
# so the value of this variable can change out from under you.
# Copy it immediately after causing a system call for accurate results.
# $" ($LIST_SEPARATOR)
# contains the string used to separate array and list elements interpolated into a string.
# %+
# contains named captures from successful regular expression matches (Named Captures).
# $@ ($EVAL_ERROR)
# contains the value thrown from the most recent exception (Catching Exceptions).
# $0 ($PROGRAM_NAME)
# contains the name of the program currently executing.
# You may modify this value on some Unix-like platforms to change the name of the program
# as it appears to other programs on the system, such as ps or top.
# $$ ($PID)
# contains the process id of the currently running instance of the program as the operating system understands it.
# This will vary between (fork())ed programs
# and may vary between threads in the same program.
# @INC
# holds a list of filesystem paths in which Perl will look for files to load with use or require.
# See perldoc -f require for other items this array can contain.
# %SIG
# maps OS and low-level Perl signals to function references used to handle those signals.
# Trap the standard Ctrl-C interrupt by catching the INT signal, for example.
# See perldoc perlipc for more information about signals and signal handling.
# ================================
# Global Variables - Alternatives to Super Globals
# ================================
# IO and exceptions are the worst perpetrators of action at a distance.
# Use Try::Tiny (Exception Caveats) to insulate you from the tricky semantics of proper exception handling.
# (local)ize and copy the value of $! to avoid strange behaviors
# when Perl makes implicit system calls.
# Use IO::File and its methods on lexical filehandles (Special File Handling Variables)
# to limit unwanted global changes to IO behavior.