-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathduplicate_columns.pl
139 lines (114 loc) · 3.25 KB
/
duplicate_columns.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/bin/env perl
# Duplicates selected columns.
# Usage:
# perl duplicate_columns.pl [tab-separated table] "[column title]" "[another column title]"
# [etc.]
# Prints to console. To print to file, use
# perl duplicate_columns.pl [tab-separated table] "[column title]" "[another column title]"
# [etc.] > [output table path]
use strict;
use warnings;
my $table = $ARGV[0];
my @titles_of_columns_to_duplicate = @ARGV[1..$#ARGV];
my $NEWLINE = "\n";
my $DELIMITER = "\t";
my $NO_DATA = "";
my $PRINT_DUPLICATE_VALUES = 0;
my $SORT_VALUES = 1;
my $CONCATENATED_VALUE_SEPARATOR = ", ";
my $CONCATENATED_COLUMN_TITLE_SEPARATOR = "_";
# verifies that input file exists and is not empty
if(!$table or !-e $table or -z $table)
{
print STDERR "Error: table not provided, does not exist, or empty:\n\t"
.$table."\nExiting.\n";
die;
}
# verifies that titles of column to duplicate are provided and make sense
if(!scalar @titles_of_columns_to_duplicate)
{
print STDERR "Error: title of columns to duplicate not provided. Exiting.\n";
die;
}
# reads in and processes input table
my %column_to_duplicate = (); # key: column (0-indexed) -> value: 1 if column will be duplicated
my %column_title_to_column = ();
foreach my $column_title(@titles_of_columns_to_duplicate)
{
$column_title_to_column{$column_title} = -1;
}
my $first_line = 1;
open TABLE, "<$table" || die "Could not open $table to read; terminating =(\n";
while(<TABLE>) # for each row in the file
{
chomp;
my $line = $_;
if($line =~ /\S/) # if row not empty
{
my @items_in_line = split($DELIMITER, $line, -1);
if($first_line) # column titles
{
# identifies columns to merge
my $column = 0;
foreach my $column_title(@items_in_line)
{
if(defined $column_title_to_column{$column_title})
{
if($column_title_to_column{$column_title} == -1)
{
$column_title_to_column{$column_title} = $column;
$column_to_duplicate{$column} = 1;
}
else
{
print STDERR "Warning: column ".$column_title." encountered more "
."than once in table ".$table."\n";
}
}
$column++;
}
# verifies that all columns have been found
foreach my $column_title(keys %column_title_to_column)
{
if($column_title_to_column{$column_title} == -1)
{
print STDERR "Error: expected column title ".$column_title
." not found in table ".$table
."\nExiting.\n";
die;
}
$column++;
}
# prints existing column titles
print $line;
# generates and prints duplicated column titles
# in order of their column titles provided in input
foreach my $column_title(@titles_of_columns_to_duplicate)
{
print $DELIMITER.$column_title."_copy";
}
print $NEWLINE;
$first_line = 0; # next line is not column titles
}
else # column values (not titles)
{
# prints existing values
print $line;
# retrieves and prints duplicated values
# in order of their column titles provided in input
foreach my $column_title(@titles_of_columns_to_duplicate)
{
my $column = $column_title_to_column{$column_title};
my $value = $items_in_line[$column];
print $DELIMITER;
if(defined $value and length $value)
{
print $value;
}
}
print $NEWLINE;
}
}
}
close TABLE;
# August 27, 2021