osirrc · ryan-clancy · May 28, 2019 · May 21, 2019 · May 28, 2019 · May 28, 2019
diff --git a/README.md b/README.md
@@ -103,6 +103,7 @@ Options with `none` as the default are required.
 | `--output` | `string` | `none` | `--output $(pwd)/output` | the output path for run files
 | `--qrels` | `string` | `none` | `--qrels $(pwd)/qrels/qrels.robust2004.txt` | the qrels file for evaluation
 | `--opts` | `[key]=[value] ...` | `none` | `--opts search_args="-bm25"` | extra options passed to the search script
+| `--timings` | `flag` | `false` | `--timings` | print timing info (requires `time` command available in the image)
 
 ### Command Line Options - train
 

diff --git a/run.py b/run.py
@@ -68,6 +68,7 @@ def str_to_bool(s):
     parser_search.add_argument("--test_split", required=False, default="", type=str,
                                help="the subset of topic ids to use for testing")
     parser_search.add_argument("--opts", nargs="+", default="", type=str, help="the args passed to the search script")
+    parser_search.add_argument("--timings", action="store_true", help="enable timing information to be printed")
 
     # Specific to interact
     parser_interact = parser_sub.add_parser("interact")

diff --git a/searcher.py b/searcher.py
@@ -1,5 +1,6 @@
 import json
 import os
+import re
 import subprocess
 import sys
 
@@ -50,15 +51,81 @@ def search(self, client, output_path_guest, topic_path_host, topic_path_guest,
             "top_k": self.config.top_k
         }
 
+        # The search command
+        command = "sh -c '/search --json {}'"
+
+        if self.config.timings:
+
+            # The search command with timings
+            command = "sh -c 'time /search --json {}'"
+
+            # Duplicate first query
+            single_query_file = ''
+            with open(os.path.join(topic_path_host, os.path.basename(self.config.topic)), 'r') as file:
+                queries = file.read()
+                query_end = queries.find('</top>')
+                if query_end == -1:
+                    sys.exit('Query format unknown...')
+                single_query = queries[:query_end + 6]
+                single_query_file = os.path.splitext(os.path.basename(self.config.topic))[0] + '.single.txt'
+                out_file = open(os.path.join(topic_path_host, single_query_file), 'w')
+                out_file.write(single_query)
+                out_file.close()
+
+            # Time empty search
+            search_args['topic']['path'] = os.path.join(topic_path_guest, single_query_file)
+            container = client.containers.run("{}:{}".format(self.config.repo, save_tag), command.format(json.dumps(json.dumps(search_args))), volumes=volumes,
+                                              detach=True)
+            load_times = []
+            for line in container.logs(stream=True):
+                match = re.match('^(real|user|sys)\t*(.*)m(\s*)(.*)s$', line.decode('utf-8'))
+                if match:
+                    load_times.append(match)
+
+        # Time actual search
+        search_args['topic']['path'] = os.path.join(topic_path_guest, os.path.basename(self.config.topic))
         print("Starting container from saved image...")
-        container = client.containers.run("{}:{}".format(self.config.repo, save_tag),
-                                          command="sh -c '/search --json {}'".format(json.dumps(json.dumps(search_args))),
-                                          volumes=volumes, detach=True, publish_all_ports=True)
+        container = client.containers.run("{}:{}".format(self.config.repo, save_tag), command.format(json.dumps(json.dumps(search_args))), volumes=volumes,
+                                          detach=True)
 
+        search_times = []
         print("Logs for search in container with ID {}...".format(container.id))
         for line in container.logs(stream=True):
+            if self.config.timings:
+                match = re.match('^(real|user|sys)\t*(.*)m(\s*)(.*)s$', line.decode('utf-8'))
+                if match:
+                    search_times.append(match)
             print(str(line.decode('utf-8')), end="")
 
+        if self.config.timings:
+            print()
+
+            print('**********')
+            print('Index load timing information')
+            print(load_times[0].group(0))
+            print(load_times[1].group(0))
+            print(load_times[2].group(0))
+            print()
+
+            print('**********')
+            print('Search timing information')
+            print(search_times[0].group(0))
+            print(search_times[1].group(0))
+            print(search_times[2].group(0))
+            print()
+
+            result_minutes = []
+            result_seconds = []
+            for i in range(len(load_times)):
+                result_minutes.append(int(search_times[i].group(2)) - int(load_times[i].group(2)))
+                result_seconds.append(float(search_times[i].group(4)) - float(load_times[i].group(4)))
+            print('**********')
+            print('Search timing less load')
+            print('real\t{}m{}{:.3f}s'.format(result_minutes[0], search_times[0].group(3), result_seconds[0]))
+            print('user\t{}m{}{:.3f}s'.format(result_minutes[1], search_times[0].group(3), result_seconds[1]))
+            print('sys\t{}m{}{:.3f}s'.format(result_minutes[2], search_times[0].group(3), result_seconds[2]))
+            print()
+
         print("Evaluating results using trec_eval...")
         for file in os.listdir(self.config.output):
             run = os.path.join(self.config.output, file)