From 47668cc1fff90c9e453acec01a69e924f06f5b95 Mon Sep 17 00:00:00 2001
From: eharkins <eli.harkins@gmail.com>
Date: Wed, 2 Sep 2020 14:36:27 -0400
Subject: [PATCH] clean-tsv-metadata: too many col error msg

---
 bin/clean-tsv | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/bin/clean-tsv b/bin/clean-tsv
index 9d31f5716..521719dca 100755
--- a/bin/clean-tsv
+++ b/bin/clean-tsv
@@ -11,11 +11,18 @@ def clean_tsv_file(input_file, output_file, n_cols, header, sort_col):
     """
     if n_cols:
         # if --n-cols is passed, read in and back out to assert n columns
-        data = pd.read_csv(input_file,
-                               sep="\t",
-                               header=None,
-                               names=list(range(n_cols)), # overriding column names; this adds missing tabs when there are too few
-                               usecols=list(range(n_cols))) # using first n only; this removes extra tabs
+        try:
+            data = pd.read_csv(input_file,
+                                   sep="\t",
+                                   header=None,
+                                   names=list(range(n_cols)), # overriding column names; this adds missing tabs when there are too few
+                                   usecols=list(range(n_cols))) # using first n only; this removes extra tabs
+        except pd.errors.ParserError as e:
+            if "Too many columns specified" in str(e):
+                print(e)
+                print(f"--n-cols {n_cols} was passed, but there are not this many columns in {input_file}. --n-cols can't add extra columns, it just enforces up to the existing number of columns in the tsv.")
+                exit()
+            raise e
         data.to_csv(output_file, sep="\t", index=False, header=False, quoting=csv.QUOTE_NONE)
         input_file = output_file # we now want to read in the column-corrected version
     data = pd.read_csv(input_file,