From 5c170fde2ba023a775b720c19d01c0727e0ac7d9 Mon Sep 17 00:00:00 2001
From: "zhangyunming1990@gmail.com" <zhangyunming1990@gmail.com>
Date: Wed, 31 Oct 2018 23:51:13 -0400
Subject: [PATCH 01/19] updating the evaluation scripts to generate cpps files
 from the .gt files

---
 apps/bfs.gt                                   |  11 +-
 apps/cc.gt                                    |   6 +-
 apps/pagerankdelta.gt                         |   7 +-
 apps/sssp.gt                                  |   5 +-
 graphit_eval/eval/table7/Makefile             |  36 ++-
 graphit_eval/eval/table7/benchmark.py         |  14 +-
 .../eval/table7/cpps/bfs_hybrid_dense.cpp     | 159 -----------
 .../table7/cpps/bfs_hybrid_dense_bitvec.cpp   | 163 -----------
 .../cpps/bfs_hybrid_dense_bitvec_segment.cpp  | 153 -----------
 .../eval/table7/cpps/bfs_push_slq.cpp         |  47 ----
 .../eval/table7/cpps/cc_dense_forward.cpp     | 129 ---------
 .../eval/table7/cpps/cc_hybrid_dense.cpp      | 147 ----------
 .../table7/cpps/cc_hybrid_dense_bitvec.cpp    | 157 -----------
 .../cpps/cc_hybrid_dense_bitvec_numa.cpp      | 201 --------------
 .../cpps/cc_hybrid_dense_bitvec_segment.cpp   | 167 -----------
 .../eval/table7/cpps/cf_pull_load_balance.cpp |  96 -------
 .../cpps/cf_pull_load_balance_segment.cpp     | 110 --------
 .../eval/table7/cpps/pagerank_pull.cpp        | 146 ----------
 .../eval/table7/cpps/pagerank_pull_numa.cpp   | 188 -------------
 .../table7/cpps/pagerank_pull_segment.cpp     | 153 -----------
 .../cpps/pagerankdelta_hybrid_dense.cpp       | 207 --------------
 .../pagerankdelta_hybrid_dense_bitvec.cpp     | 217 ---------------
 ...pagerankdelta_hybrid_dense_bitvec_numa.cpp | 259 ------------------
 ...erankdelta_hybrid_dense_bitvec_segment.cpp | 223 ---------------
 .../table7/cpps/pagerankdelta_sparse_push.cpp | 163 -----------
 .../table7/cpps/sssp_hybrid_denseforward.cpp  | 144 ----------
 .../cpps/sssp_hybrid_denseforward_numa.cpp    | 180 ------------
 .../cpps/sssp_hybrid_denseforward_segment.cpp | 152 ----------
 .../eval/table7/cpps/sssp_hybrid_segment.cpp  | 158 -----------
 .../eval/table7/cpps/sssp_push_slq.cpp        |  73 -----
 graphit_eval/eval/table7/table7_graphit.py    |  17 +-
 31 files changed, 68 insertions(+), 3820 deletions(-)
 delete mode 100755 graphit_eval/eval/table7/cpps/bfs_hybrid_dense.cpp
 delete mode 100755 graphit_eval/eval/table7/cpps/bfs_hybrid_dense_bitvec.cpp
 delete mode 100755 graphit_eval/eval/table7/cpps/bfs_hybrid_dense_bitvec_segment.cpp
 delete mode 100755 graphit_eval/eval/table7/cpps/bfs_push_slq.cpp
 delete mode 100755 graphit_eval/eval/table7/cpps/cc_dense_forward.cpp
 delete mode 100755 graphit_eval/eval/table7/cpps/cc_hybrid_dense.cpp
 delete mode 100755 graphit_eval/eval/table7/cpps/cc_hybrid_dense_bitvec.cpp
 delete mode 100644 graphit_eval/eval/table7/cpps/cc_hybrid_dense_bitvec_numa.cpp
 delete mode 100755 graphit_eval/eval/table7/cpps/cc_hybrid_dense_bitvec_segment.cpp
 delete mode 100755 graphit_eval/eval/table7/cpps/cf_pull_load_balance.cpp
 delete mode 100755 graphit_eval/eval/table7/cpps/cf_pull_load_balance_segment.cpp
 delete mode 100755 graphit_eval/eval/table7/cpps/pagerank_pull.cpp
 delete mode 100755 graphit_eval/eval/table7/cpps/pagerank_pull_numa.cpp
 delete mode 100755 graphit_eval/eval/table7/cpps/pagerank_pull_segment.cpp
 delete mode 100755 graphit_eval/eval/table7/cpps/pagerankdelta_hybrid_dense.cpp
 delete mode 100755 graphit_eval/eval/table7/cpps/pagerankdelta_hybrid_dense_bitvec.cpp
 delete mode 100755 graphit_eval/eval/table7/cpps/pagerankdelta_hybrid_dense_bitvec_numa.cpp
 delete mode 100755 graphit_eval/eval/table7/cpps/pagerankdelta_hybrid_dense_bitvec_segment.cpp
 delete mode 100644 graphit_eval/eval/table7/cpps/pagerankdelta_sparse_push.cpp
 delete mode 100755 graphit_eval/eval/table7/cpps/sssp_hybrid_denseforward.cpp
 delete mode 100755 graphit_eval/eval/table7/cpps/sssp_hybrid_denseforward_numa.cpp
 delete mode 100755 graphit_eval/eval/table7/cpps/sssp_hybrid_denseforward_segment.cpp
 delete mode 100644 graphit_eval/eval/table7/cpps/sssp_hybrid_segment.cpp
 delete mode 100755 graphit_eval/eval/table7/cpps/sssp_push_slq.cpp

diff --git a/apps/bfs.gt b/apps/bfs.gt
index ce86297b..4774801c 100644
--- a/apps/bfs.gt
+++ b/apps/bfs.gt
@@ -23,13 +23,16 @@ func main()
 	startTimer();
         vertices.apply(reset);
     	var frontier : vertexset{Vertex} = new vertexset{Vertex}(0);
-    	frontier.addVertex(14);
-    	parent[14] = 14;
+	var start_vertex : int = atoi(argv[2]);
+    	frontier.addVertex(start_vertex);
+    	parent[start_vertex] = start_vertex;
 
     	while (frontier.getVertexSetSize() != 0)
-            #s1# frontier = edges.from(frontier).to(toFilter).applyModified(updateEdge,parent, true);
+            #s1# var output : vertexset{Vertex} = edges.from(frontier).to(toFilter).applyModified(updateEdge,parent, true);
+	    delete frontier;
+	    frontier = output;
     	end
-
+	delete frontier;
         var elapsed_time : float = stopTimer();
     	print "elapsed time: ";
     	print elapsed_time;
diff --git a/apps/cc.gt b/apps/cc.gt
index 876648d3..7570f4dd 100644
--- a/apps/cc.gt
+++ b/apps/cc.gt
@@ -22,9 +22,11 @@ func main()
         var frontier : vertexset{Vertex} = new vertexset{Vertex}(n);
         vertices.apply(init);
         while (frontier.getVertexSetSize() != 0)
-            #s1# frontier = edges.from(frontier).applyModified(updateEdge,IDs);
+            #s1# var output: vertexset{Vertex} = edges.from(frontier).applyModified(updateEdge,IDs);
+	    delete frontier;
+	    frontier = output;
         end
-
+	delete frontier;
         var elapsed_time : float = stopTimer();
         print "elapsed time: ";
         print elapsed_time;
diff --git a/apps/pagerankdelta.gt b/apps/pagerankdelta.gt
index 346fdcf0..3a0f17a2 100644
--- a/apps/pagerankdelta.gt
+++ b/apps/pagerankdelta.gt
@@ -50,11 +50,14 @@ func main()
         for i in 1:11
             #s1# edges.from(frontier).apply(updateEdge);
             if i == 1
-                frontier = vertices.where(updateVertexFirstRound);
+                output = vertices.where(updateVertexFirstRound);
             else
-                frontier = vertices.where(updateVertex);
+                output = vertices.where(updateVertex);
             end
+	    delete frontier;
+	    delete output;
         end
+	delete frontier;
 
          var elapsed_time : double = stopTimer();
          print "elapsed time: ";
diff --git a/apps/sssp.gt b/apps/sssp.gt
index ddf03cb5..8b44ea5c 100644
--- a/apps/sssp.gt
+++ b/apps/sssp.gt
@@ -19,8 +19,9 @@ func main()
         vertices.apply(reset);
         var n : int = edges.getVertices();
         var frontier : vertexset{Vertex} = new vertexset{Vertex}(0);
-        frontier.addVertex(14); %add source vertex
-        SP[14] = 14;
+	var start_vertex : int = atoi(argv[2]);
+        frontier.addVertex(start_vertex); %add source vertex
+        SP[start_vertex] = 0;
 
         var rounds : int = 0;
         while (frontier.getVertexSetSize() != 0)
diff --git a/graphit_eval/eval/table7/Makefile b/graphit_eval/eval/table7/Makefile
index 0c9f8da7..2f81fcd3 100644
--- a/graphit_eval/eval/table7/Makefile
+++ b/graphit_eval/eval/table7/Makefile
@@ -1,8 +1,11 @@
 
 
-COMMON_FLAGS = -std=c++11 -I ../../../src/runtime_lib/ 
+COMMON_FLAGS = -std=c++11 -g -I ../../../src/runtime_lib/ 
 CILK_FLAGS = $(COMMON_FLAGS)
 OPENMP_FLAGS = $(COMMON_FLAGS)
+GRAPHITC_PY = ../../../build/bin/graphitc.py
+GRAPHIT_APP_DIR = ../../../apps
+GRAPHIT_SCHEDULE_DIR= ../../../test/input_with_schedules
 
 ifdef GCC_PAR
 # compiling with gcc CILK and OPENMP
@@ -24,16 +27,31 @@ OPENMP_FLAGS += -O3
 
 endif
 
-pr = pagerank_pull pagerank_pull_segment 
-sssp = sssp_hybrid_denseforward 
-cc = cc_hybrid_dense cc_hybrid_dense_bitvec_segment
-bfs = bfs_hybrid_dense bfs_hybrid_dense_bitvec 
-prd = pagerankdelta_hybrid_dense pagerankdelta_hybrid_dense_bitvec_segment pagerankdelta_sparse_push
-cf = cf_pull_load_balance_segment 
+#the cpps files that are faster compiled with cilk 
+pr_cilk_cpps = pagerank_pull pagerank_pull_segment 
+sssp_cilk_cpps = sssp_hybrid_denseforward 
+cc_cilk_cpps = cc_hybrid_dense cc_hybrid_dense_bitvec_segment
+bfs_cilk_cpps = bfs_hybrid_dense bfs_hybrid_dense_bitvec 
+prd_cilk_cpps = pagerankdelta_hybrid_dense pagerankdelta_hybrid_dense_bitvec_segment pagerankdelta_sparse_push
+cf_cilk_cpps = cf_pull_load_balance_segment 
 
-all:  $(prd) $(bfs) $(cc) $(sssp) $(pr) openmp_only_schedules
+all: graphit_files cpps
 
-openmp_only_schedules:
+graphit_files:
+	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/bfs.gt -f ${GRAPHIT_SCHEDULE_DIR}/bfs_hybrid_dense_parallel_cas.gt -o cpps/bfs_hybrid_dense.cpp
+	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/bfs.gt -f ${GRAPHIT_SCHEDULE_DIR}/bfs_hybrid_dense_parallel_cas_bitvector.gt -o cpps/bfs_hybrid_dense_bitvec.cpp
+	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/bfs.gt  -f ${GRAPHIT_SCHEDULE_DIR}/bfs_push_sliding_queue_parallel_cas.gt -o cpps/bfs_push_slq.cpp
+	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/sssp.gt -f ${GRAPHIT_SCHEDULE_DIR}/sssp_hybrid_denseforward_parallel_cas.gt -o cpps/sssp_hybrid_denseforward.cpp
+	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/sssp.gt -f ${GRAPHIT_SCHEDULE_DIR}/sssp_push_parallel_sliding_queue.gt -o cpps/sssp_push_slq.cpp
+
+cpps:  $(prd_cilk_cpps) $(bfs_cilk_cpps) $(cc_cilk_cpps) $(sssp_cilk_cpps) $(pr_cilk_cpps) openmp_only_cpps
+
+test: $(bfs_cilk_cpps) ${sssp_cilk_cpps}
+	$(PCC) $(OPENMP_FLAGS) -o bin/bfs_push_slq cpps/bfs_push_slq.cpp
+	$(PCC) $(OPENMP_FLAGS) -o bin/sssp_push_slq cpps/sssp_push_slq.cpp
+
+# this two cpps files can only be compiled with openmp to get parallel performance
+openmp_only_cpps:
 	$(PCC) $(OPENMP_FLAGS) -o bin/bfs_push_slq cpps/bfs_push_slq.cpp
 	$(PCC) $(OPENMP_FLAGS) -o bin/sssp_push_slq cpps/sssp_push_slq.cpp
 % : cpps/%.cpp
diff --git a/graphit_eval/eval/table7/benchmark.py b/graphit_eval/eval/table7/benchmark.py
index 46227a21..0a8d9389 100755
--- a/graphit_eval/eval/table7/benchmark.py
+++ b/graphit_eval/eval/table7/benchmark.py
@@ -6,7 +6,7 @@
 import time
 from threading import Timer
 
-use_NUMACTL = True
+use_NUMACTL = True 
 
 
 framework_app_lookup = {
@@ -68,7 +68,9 @@ def path_setup(frameworks):
 
 def get_starting_points(graph):
     """ Use the points with non-zero out degree and don't hang during execution.  """
-    if graph != "friendster":
+    if graph == "testGraph":
+        return ["1","2"]
+    elif graph != "friendster":
         return ["17", "38", "47", "52", "53", "58", "59", "69", "94", "96"]
     else:
         # friendster takes a long time so use fewer starting points
@@ -93,7 +95,7 @@ def get_cmd_graphit(g, p, point):
 
     command = graphit_PATH + graphit_binary_map[g][p] + " " + args
     #if p in ["pr", "cc", "prd"] and g in ["twitter", "webGraph", "friendster"]:
-    if g in ["twitter", "webGraph", "friendster"]:
+    if g in ["socLive", "twitter", "webGraph", "friendster"]:
         
         if use_NUMACTL:
             # if NUAMCTL is available
@@ -118,11 +120,11 @@ def main():
     parser.add_argument('-a', '--applications', nargs='+',
                         default=["bfs", "sssp", "pr", "cc", "prd"], 
                         help="applications to benchmark. Defaults to all four applications.")
-    parser.add_argument('--use_NUMACTL', type=int, default=1, help='use numactl when running. 1 for enable (default), 0 for disable')
+    
     args = parser.parse_args()
 
-    if args.use_NUMACTL != 1:
-        use_NUMACTL = False
+
+
 
     path_setup(["graphit"])
 
diff --git a/graphit_eval/eval/table7/cpps/bfs_hybrid_dense.cpp b/graphit_eval/eval/table7/cpps/bfs_hybrid_dense.cpp
deleted file mode 100755
index b808f497..00000000
--- a/graphit_eval/eval/table7/cpps/bfs_hybrid_dense.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include <algorithm>
-#include "intrinsics.h"
-Graph edges; 
-int  * __restrict parent;
-template <typename TO_FUNC , typename APPLY_FUNC, typename PUSH_APPLY_FUNC> VertexSubset<NodeID>* edgeset_apply_hybrid_dense_parallel_from_vertexset_to_filter_func_with_frontier(Graph & g , VertexSubset<NodeID>* from_vertexset, TO_FUNC to_func, APPLY_FUNC apply_func, PUSH_APPLY_FUNC push_apply_func) 
-{ 
-    int64_t numVertices = g.num_nodes(), numEdges = g.num_edges();
-    from_vertexset->toSparse();
-    long m = from_vertexset->size();
-    // used to generate nonzero indices to get degrees
-    uintT *degrees = newA(uintT, m);
-    // We probably need this when we get something that doesn't have a dense set, not sure
-    // We can also write our own, the eixsting one doesn't quite work for bitvectors
-    //from_vertexset->toSparse();
-    {
-      parallel_for (long i = 0; i < m; i++) {
-	NodeID v = from_vertexset->dense_vertex_set_[i];
-	degrees[i] = g.out_degree(v);
-      }
-    }
-    uintT outDegrees = sequence::plusReduce(degrees, m);
-    int64_t threshold = numEdges / 20;
-    if (m + outDegrees > threshold) {
-      VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-      bool * next = newA(bool, g.num_nodes());
-      parallel_for (int i = 0; i < numVertices; i++)next[i] = 0;
-      from_vertexset->toDense();
-
-      parallel_for ( NodeID d=0; d < g.num_nodes(); d++) {
-	if (to_func(d)){ 
-	  for(NodeID s : g.in_neigh(d)){
-	    if (from_vertexset->bool_map_[s] ) { 
-	      if( apply_func ( s , d ) ) { 
-		next[d] = 1; 
-		if (!to_func(d)) break; 
-	      }
-	    }
-	  } //end of loop on in neighbors
-	} //end of to filtering 
-      } //end of outer for loop
-
-      next_frontier->num_vertices_ = sequence::sum(next, numVertices);
-      next_frontier->bool_map_ = next;
-      free(degrees);
-      return next_frontier;
-    } else {
-
-      VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-      if (numVertices != from_vertexset->getVerticesRange()) {
-        cout << "edgeMap: Sizes Don't match" << endl;
-        abort();
-      }
-
-      if (outDegrees == 0) {
-      free(degrees);
-      return next_frontier;
-      }
-      uintT *offsets = degrees;
-      long outEdgeCount = sequence::plusScan(offsets, degrees, m);
-      uintE *outEdges = newA(uintE, outEdgeCount);
-      parallel_for (long i=0; i < m; i++) {
-	NodeID s = from_vertexset->dense_vertex_set_[i];
-	int j = 0;
-	uintT offset = offsets[i];
-        for(NodeID d : g.out_neigh(s)){
-          if (to_func(d)) { 
-            if( push_apply_func ( s , d  ) ) { 
-              outEdges[offset + j] = d; 
-            } else { outEdges[offset + j] = UINT_E_MAX; }
-          } //end of to func
-           else { outEdges[offset + j] = UINT_E_MAX;  }
-          j++;
-        } //end of for loop on neighbors
-      }
-      uintE *nextIndices = newA(uintE, outEdgeCount);
-      long nextM = sequence::filter(outEdges, nextIndices, outEdgeCount, nonMaxF());
-      next_frontier->num_vertices_ = nextM;
-      delete[] from_vertexset->dense_vertex_set_;
-      next_frontier->dense_vertex_set_ = nextIndices;
-
-      free(outEdges);
-      free(degrees);
-      return next_frontier;
-    } //end of else
-} //end of edgeset apply function 
-struct updateEdge_push_ver
-{
-  bool operator() (NodeID src, NodeID dst) 
-  {
-    bool output3 ;
-    bool parent_trackving_var_2 = (bool) 0;
-    parent_trackving_var_2 = compare_and_swap ( parent[dst],  -(1) , src);
-    output3 = parent_trackving_var_2;
-    return output3;
-  };
-};
-struct parent_generated_vector_op_apply_func_0
-{
-  void operator() (NodeID v) 
-  {
-    parent[v] =  -(1) ;
-  };
-};
-struct updateEdge
-{
-  bool operator() (NodeID src, NodeID dst) 
-  {
-    bool output1 ;
-    parent[dst] = src;
-    output1 = (bool) 1;
-    return output1;
-  };
-};
-struct toFilter
-{
-  bool operator() (NodeID v) 
-  {
-    bool output ;
-    output = (parent[v]) == ( -(1) );
-    return output;
-  };
-};
-struct reset
-{
-  void operator() (NodeID v) 
-  {
-    parent[v] =  -(1) ;
-  };
-};
-int main(int argc, char * argv[])
-{
-  edges = builtin_loadEdgesFromFile ( argv[(1) ]) ;
-  parent = new int [ builtin_getVertices(edges) ];
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    parent_generated_vector_op_apply_func_0()(vertexsetapply_iter);
-  };
-  for ( int trail = (0) ; trail < (10) ; trail++ )
-  {
-    startTimer() ;
-    parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-      reset()(vertexsetapply_iter);
-    };
-    VertexSubset<int> *  frontier = new VertexSubset<int> ( builtin_getVertices(edges)  , (0) );
-    builtin_addVertex(frontier, atoi(argv[2]) ) ;
-    parent[atoi(argv[2]) ] = atoi(argv[2]) ;
-    while ( (builtin_getVertexSetSize(frontier) ) != ((0) ))
-    {
-      frontier = edgeset_apply_hybrid_dense_parallel_from_vertexset_to_filter_func_with_frontier(edges, frontier, toFilter(), updateEdge(), updateEdge_push_ver()); 
-    }
-    float elapsed_time = stopTimer() ;
-    std::cout << "elapsed time: "<< std::endl;
-    std::cout << elapsed_time<< std::endl;
-    free(frontier);
-  }
-  free(parent);
-};
-
diff --git a/graphit_eval/eval/table7/cpps/bfs_hybrid_dense_bitvec.cpp b/graphit_eval/eval/table7/cpps/bfs_hybrid_dense_bitvec.cpp
deleted file mode 100755
index 92eb7f72..00000000
--- a/graphit_eval/eval/table7/cpps/bfs_hybrid_dense_bitvec.cpp
+++ /dev/null
@@ -1,163 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include <algorithm>
-#include "intrinsics.h"
-Graph edges; 
-int  * __restrict parent;
-template <typename TO_FUNC , typename APPLY_FUNC, typename PUSH_APPLY_FUNC> VertexSubset<NodeID>* edgeset_apply_hybrid_dense_parallel_from_vertexset_to_filter_func_with_frontier_pull_frontier_bitvector(Graph & g , VertexSubset<NodeID>* from_vertexset, TO_FUNC to_func, APPLY_FUNC apply_func, PUSH_APPLY_FUNC push_apply_func) 
-{ 
-    int64_t numVertices = g.num_nodes(), numEdges = g.num_edges();
-    from_vertexset->toSparse();
-    long m = from_vertexset->size();
-    // used to generate nonzero indices to get degrees
-    uintT *degrees = newA(uintT, m);
-    // We probably need this when we get something that doesn't have a dense set, not sure
-    // We can also write our own, the eixsting one doesn't quite work for bitvectors
-    //from_vertexset->toSparse();
-    {
-        parallel_for (long i = 0; i < m; i++) {
-            NodeID v = from_vertexset->dense_vertex_set_[i];
-            degrees[i] = g.out_degree(v);
-        }
-    }
-    uintT outDegrees = sequence::plusReduce(degrees, m);
-    if (m + outDegrees > numEdges / 20) {
-      VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-      bool * next = newA(bool, g.num_nodes());
-      parallel_for (int i = 0; i < numVertices; i++)next[i] = 0;
-      from_vertexset->toDense();
-      Bitmap bitmap(numVertices);
-      bitmap.reset();
-      parallel_for(int i = 0; i < numVertices; i+=32){
-	int start = i;
-	int end = (((i + 32) < numVertices)? (i+32):numVertices);
-	for(int j = start; j < end; j++){
-	  if (from_vertexset->bool_map_[j])
-	    bitmap.set_bit(j);
-	}
-      }
-      parallel_for ( NodeID d=0; d < g.num_nodes(); d++) {
-	if (to_func(d)){ 
-	  for(NodeID s : g.in_neigh(d)){
-	    if (bitmap.get_bit(s)) { 
-	      if( apply_func ( s , d ) ) { 
-		next[d] = 1; 
-		if (!to_func(d)) break; 
-	      }
-	    }
-	  } //end of loop on in neighbors
-	} //end of to filtering 
-      } //end of outer for loop
-      next_frontier->num_vertices_ = sequence::sum(next, numVertices);
-      free(next_frontier->bool_map_);
-      next_frontier->bool_map_ = next;
-      free(degrees);
-      return next_frontier;
-    } else {
-      VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-      if (numVertices != from_vertexset->getVerticesRange()) {
-        cout << "edgeMap: Sizes Don't match" << endl;
-        abort();
-      }
-      if (outDegrees == 0) {
-	free(degrees);
-	return next_frontier;
-      }
-      uintT *offsets = degrees;
-      long outEdgeCount = sequence::plusScan(offsets, degrees, m);
-      uintE *outEdges = newA(uintE, outEdgeCount);
-      parallel_for (long i=0; i < m; i++) {
-	NodeID s = from_vertexset->dense_vertex_set_[i];
-	int j = 0;
-	uintT offset = offsets[i];
-        for(NodeID d : g.out_neigh(s)){
-          if (to_func(d)) { 
-            if( push_apply_func ( s , d  ) ) { 
-              outEdges[offset + j] = d; 
-            } else { outEdges[offset + j] = UINT_E_MAX; }
-          } //end of to func
-	  else { outEdges[offset + j] = UINT_E_MAX;  }
-          j++;
-        } //end of for loop on neighbors
-      }
-      uintE *nextIndices = newA(uintE, outEdgeCount);
-      long nextM = sequence::filter(outEdges, nextIndices, outEdgeCount, nonMaxF());
-      free(outEdges);
-      free(degrees);
-      next_frontier->num_vertices_ = nextM;
-      //delete[] next_frontier->dense_vertex_set_;
-      free(next_frontier->dense_vertex_set_);
-      next_frontier->dense_vertex_set_ = nextIndices;
-      return next_frontier;
-    } //end of else
-} //end of edgeset apply function 
-struct updateEdge_push_ver
-{
-  bool operator() (NodeID src, NodeID dst) 
-  {
-    bool output3 ;
-    bool parent_trackving_var_2 = (bool) 0;
-    parent_trackving_var_2 = compare_and_swap ( parent[dst],  -(1) , src);
-    output3 = parent_trackving_var_2;
-    return output3;
-  };
-};
-struct parent_generated_vector_op_apply_func_0
-{
-  void operator() (NodeID v) 
-  {
-    parent[v] =  -(1) ;
-  };
-};
-struct updateEdge
-{
-  bool operator() (NodeID src, NodeID dst) 
-  {
-    bool output1 ;
-    parent[dst] = src;
-    output1 = (bool) 1;
-    return output1;
-  };
-};
-struct toFilter
-{
-  bool operator() (NodeID v) 
-  {
-    bool output ;
-    output = (parent[v]) == ( -(1) );
-    return output;
-  };
-};
-struct reset
-{
-  void operator() (NodeID v) 
-  {
-    parent[v] =  -(1) ;
-  };
-};
-int main(int argc, char * argv[])
-{
-  edges = builtin_loadEdgesFromFile ( argv[(1) ]) ;
-  parent = new int [ builtin_getVertices(edges) ];
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    parent_generated_vector_op_apply_func_0()(vertexsetapply_iter);
-  };
-  for ( int trail = (0) ; trail < (10) ; trail++ )
-  {
-    startTimer() ;
-    parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-      reset()(vertexsetapply_iter);
-    };
-    VertexSubset<int> *  frontier = new VertexSubset<int> ( builtin_getVertices(edges)  , (0) );
-    builtin_addVertex(frontier, (14) ) ;
-    parent[(14) ] = (14) ;
-    while ( (builtin_getVertexSetSize(frontier) ) != ((0) ))
-    {
-      frontier = edgeset_apply_hybrid_dense_parallel_from_vertexset_to_filter_func_with_frontier_pull_frontier_bitvector(edges, frontier, toFilter(), updateEdge(), updateEdge_push_ver()); 
-    }
-    float elapsed_time = stopTimer() ;
-    std::cout << "elapsed time: "<< std::endl;
-    std::cout << elapsed_time<< std::endl;
-  }
-};
-
diff --git a/graphit_eval/eval/table7/cpps/bfs_hybrid_dense_bitvec_segment.cpp b/graphit_eval/eval/table7/cpps/bfs_hybrid_dense_bitvec_segment.cpp
deleted file mode 100755
index b0c9261d..00000000
--- a/graphit_eval/eval/table7/cpps/bfs_hybrid_dense_bitvec_segment.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include <algorithm>
-#include "intrinsics.h"
-Graph edges; 
-int  * __restrict parent;
-template <typename TO_FUNC , typename APPLY_FUNC, typename PUSH_APPLY_FUNC> VertexSubset<NodeID>* edgeset_apply_hybrid_dense_parallel_from_vertexset_to_filter_func_with_frontier_pull_frontier_bitvector(Graph & g , VertexSubset<NodeID>* from_vertexset, TO_FUNC to_func, APPLY_FUNC apply_func, PUSH_APPLY_FUNC push_apply_func) 
-{ 
-    int64_t numVertices = g.num_nodes(), numEdges = g.num_edges();
-    from_vertexset->toSparse();
-    long m = from_vertexset->size();
-    // used to generate nonzero indices to get degrees
-    uintT *degrees = newA(uintT, m);
-    // We probably need this when we get something that doesn't have a dense set, not sure
-    // We can also write our own, the eixsting one doesn't quite work for bitvectors
-    //from_vertexset->toSparse();
-    {
-        parallel_for (long i = 0; i < m; i++) {
-            NodeID v = from_vertexset->dense_vertex_set_[i];
-            degrees[i] = g.out_degree(v);
-        }
-    }
-    uintT outDegrees = sequence::plusReduce(degrees, m);
-    if (m + outDegrees > numEdges / 20) {
-  VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-  bool * next = newA(bool, g.num_nodes());
-  parallel_for (int i = 0; i < numVertices; i++)next[i] = 0;
-    from_vertexset->toDense();
-  Bitmap bitmap(numVertices);
-  bitmap.reset();
-  parallel_for(int i = 0; i < numVertices; i+=32){
-     int start = i;
-     int end = (((i + 32) < numVertices)? (i+32):numVertices);
-     for(int j = start; j < end; j++){
-        if (from_vertexset->bool_map_[j])
-          bitmap.set_bit(j);
-     }
-  }
-    parallel_for ( NodeID d=0; d < g.num_nodes(); d++) {
-      if (to_func(d)){ 
-        for(NodeID s : g.in_neigh(d)){
-          if (bitmap.get_bit(s)) { 
-            if( apply_func ( s , d ) ) { 
-              next[d] = 1; 
-              if (!to_func(d)) break; 
-            }
-          }
-        } //end of loop on in neighbors
-      } //end of to filtering 
-    } //end of outer for loop
-  next_frontier->num_vertices_ = sequence::sum(next, numVertices);
-  free(next_frontier->bool_map_);
-  next_frontier->bool_map_ = next;
-  free(degrees);
-  return next_frontier;
-} else {
-    VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-    if (numVertices != from_vertexset->getVerticesRange()) {
-        cout << "edgeMap: Sizes Don't match" << endl;
-        abort();
-    }
-    if (outDegrees == 0) {
-      free(degrees);
-      return next_frontier;
-    }
-    uintT *offsets = degrees;
-    long outEdgeCount = sequence::plusScan(offsets, degrees, m);
-    uintE *outEdges = newA(uintE, outEdgeCount);
-      parallel_for (long i=0; i < m; i++) {
-    NodeID s = from_vertexset->dense_vertex_set_[i];
-    int j = 0;
-    uintT offset = offsets[i];
-        for(NodeID d : g.out_neigh(s)){
-          if (to_func(d)) { 
-            if( push_apply_func ( s , d  ) ) { 
-              outEdges[offset + j] = d; 
-            } else { outEdges[offset + j] = UINT_E_MAX; }
-          } //end of to func
-           else { outEdges[offset + j] = UINT_E_MAX;  }
-          j++;
-        } //end of for loop on neighbors
-      }
-  uintE *nextIndices = newA(uintE, outEdgeCount);
-  long nextM = sequence::filter(outEdges, nextIndices, outEdgeCount, nonMaxF());
-  free(outEdges);
-  free(degrees);
-  next_frontier->num_vertices_ = nextM;
-  delete[] next_frontier->dense_vertex_set_;
-  next_frontier->dense_vertex_set_ = nextIndices;
-  return next_frontier;
-} //end of else
-} //end of edgeset apply function 
-struct updateEdge_push_ver
-{
-  bool operator() (NodeID src, NodeID dst) 
-  {
-    bool output3 ;
-    bool parent_trackving_var_2 = (bool) 0;
-    parent_trackving_var_2 = compare_and_swap ( parent[dst],  -(1) , src);
-    output3 = parent_trackving_var_2;
-    return output3;
-  };
-};
-struct parent_generated_vector_op_apply_func_0
-{
-  void operator() (NodeID v) 
-  {
-    parent[v] =  -(1) ;
-  };
-};
-struct updateEdge
-{
-  bool operator() (NodeID src, NodeID dst) 
-  {
-    bool output1 ;
-    parent[dst] = src;
-    output1 = (bool) 1;
-    return output1;
-  };
-};
-struct toFilter
-{
-  bool operator() (NodeID v) 
-  {
-    bool output ;
-    output = (parent[v]) == ( -(1) );
-    return output;
-  };
-};
-int main(int argc, char * argv[])
-{
-  edges = builtin_loadEdgesFromFile ( argv[(1) ]) ;
-  edges.buildPushSegmentedGraphs("s1", atoi(argv[3]));
-  parent = new int [ builtin_getVertices(edges) ];
-  for ( int trail = (0) ; trail < (10) ; trail++ )
-  {
-    parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-      parent_generated_vector_op_apply_func_0()(i);
-    };
-    startTimer() ;
-    VertexSubset<int> *  frontier = new VertexSubset<int> ( builtin_getVertices(edges)  , (0) );
-    builtin_addVertex(frontier, atoi(argv[2]) ) ;
-    parent[atoi(argv[2]) ] = atoi(argv[2]) ;
-    while ( (builtin_getVertexSetSize(frontier) ) != ((0) ))
-      {
-	frontier = edgeset_apply_hybrid_dense_parallel_from_vertexset_to_filter_func_with_frontier_pull_frontier_bitvector(edges, frontier, toFilter(), updateEdge(), updateEdge_push_ver()); 
-      }
-    float elapsed_time = stopTimer() ;
-    std::cout << "elapsed time: "<< std::endl;
-    std::cout << elapsed_time<< std::endl;
-  }
-};
-
diff --git a/graphit_eval/eval/table7/cpps/bfs_push_slq.cpp b/graphit_eval/eval/table7/cpps/bfs_push_slq.cpp
deleted file mode 100755
index 690fca54..00000000
--- a/graphit_eval/eval/table7/cpps/bfs_push_slq.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include "intrinsics.h"
-#include <string>
-Graph edges; 
-std::vector< int  >  parent;
-void parent_generated_vector_op_apply_func_0(NodeID v) 
-{
-  parent[v] =  -(1) ;
-};
-bool updateEdge(NodeID src, NodeID dst) 
-{
-  bool output2 ;
-  bool parent_trackving_var_1 = (bool) 0;
-  parent_trackving_var_1 = compare_and_swap ( parent[dst],  -(1) , src);
-  output2 = parent_trackving_var_1;
-  return output2;
-};
-bool toFilter(NodeID v) 
-{
-  bool output ;
-  output = (parent[v]) == ( -(1) );
-  return output;
-};
-int main(int argc, char * argv[] ) 
-{
-  edges = builtin_loadEdgesFromFile ( argv[(1) ]) ;
-  parent = std::vector< int  >   ( builtin_getVertices(edges)  );
-  
-  for (int trail = 0; trail < 10; trail++){
- 
-    parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-      parent_generated_vector_op_apply_func_0(i);
-    };
-    startTimer() ;
-    VertexSubset<int> *  frontier = new VertexSubset<int> ( builtin_getVertices(edges)  , (0) );
-    builtin_addVertex(frontier, std::stoi(argv[2]) ) ;
-    parent[std::stoi(argv[2]) ] = std::stoi(argv[2]) ;
-    while ( (builtin_getVertexSetSize(frontier) ) != ((0) ))
-      {
-	frontier = edgeset_apply_push_parallel_sliding_queue_from_vertexset_with_frontier(edges, frontier, updateEdge); 
-      }
-    float elapsed_time = stopTimer() ;
-    std::cout << "elapsed time: "<< std::endl;
-    std::cout << elapsed_time<< std::endl;
-  }
-}
diff --git a/graphit_eval/eval/table7/cpps/cc_dense_forward.cpp b/graphit_eval/eval/table7/cpps/cc_dense_forward.cpp
deleted file mode 100755
index 726b89c6..00000000
--- a/graphit_eval/eval/table7/cpps/cc_dense_forward.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include <algorithm>
-#include "intrinsics.h"
-Graph edges; 
-int  * __restrict IDs;
-template <typename APPLY_FUNC > VertexSubset<NodeID>* edgeset_apply_hybrid_denseforward_parallel_deduplicatied_from_vertexset_with_frontier(Graph & g , VertexSubset<NodeID>* from_vertexset, APPLY_FUNC apply_func) 
-{ 
-    int64_t numVertices = g.num_nodes(), numEdges = g.num_edges();
-    from_vertexset->toSparse();
-    long m = from_vertexset->size();
-    // used to generate nonzero indices to get degrees
-    uintT *degrees = newA(uintT, m);
-    // We probably need this when we get something that doesn't have a dense set, not sure
-    // We can also write our own, the eixsting one doesn't quite work for bitvectors
-    //from_vertexset->toSparse();
-    {
-        parallel_for (long i = 0; i < m; i++) {
-            NodeID v = from_vertexset->dense_vertex_set_[i];
-            degrees[i] = g.out_degree(v);
-        }
-    }
-    uintT outDegrees = sequence::plusReduce(degrees, m);
-    if (m + outDegrees > numEdges / 20) {
-  VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-  bool * next = newA(bool, g.num_nodes());
-  parallel_for (int i = 0; i < numVertices; i++)next[i] = 0;
-    from_vertexset->toDense();
-    parallel_for ( NodeID s=0; s < g.num_nodes(); s++) {
-        if (from_vertexset->bool_map_[s] ) { 
-          for(NodeID d : g.out_neigh(s)){
-            if(  apply_func ( s , d  ) ) { 
-              next[d] = 1; 
-            }
-          } // end of inner for loop
-        } // end of if for from func or from vertexset
-      } //end of outer for loop
-  next_frontier->num_vertices_ = sequence::sum(next, numVertices);
-  next_frontier->bool_map_ = next;
-  return next_frontier;
-} else {
-    if (g.flags_ == nullptr){
-      g.flags_ = new int[numVertices]();
-      parallel_for(int i = 0; i < numVertices; i++) g.flags_[i]=0;
-    }
-    VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-    if (numVertices != from_vertexset->getVerticesRange()) {
-        cout << "edgeMap: Sizes Don't match" << endl;
-        abort();
-    }
-    if (outDegrees == 0) return next_frontier;
-    uintT *offsets = degrees;
-    long outEdgeCount = sequence::plusScan(offsets, degrees, m);
-    uintE *outEdges = newA(uintE, outEdgeCount);
-        parallel_for (long i=0; i < m; i++) {
-    NodeID s = from_vertexset->dense_vertex_set_[i];
-    int j = 0;
-    uintT offset = offsets[i];
-          for(NodeID d : g.out_neigh(s)){
-            if( apply_func ( s , d  ) && CAS(&(g.flags_[d]), 0, 1)  ) { 
-              outEdges[offset + j] = d; 
-            } else { outEdges[offset + j] = UINT_E_MAX; }
-            j++;
-          } //end of for loop on neighbors
-        }
-  uintE *nextIndices = newA(uintE, outEdgeCount);
-  long nextM = sequence::filter(outEdges, nextIndices, outEdgeCount, nonMaxF());
-  free(outEdges);
-  free(degrees);
-  next_frontier->num_vertices_ = nextM;
-  next_frontier->dense_vertex_set_ = nextIndices;
-  parallel_for(int i = 0; i < nextM; i++){
-     g.flags_[nextIndices[i]] = 0;
-  }
-  return next_frontier;
-} //end of else
-} //end of edgeset apply function 
-struct IDs_generated_vector_op_apply_func_0
-{
-  void operator() (NodeID v) 
-  {
-    IDs[v] = (1) ;
-  };
-};
-struct updateEdge
-{
-  bool operator() (NodeID src, NodeID dst) 
-  {
-    bool output2 ;
-    bool IDs_trackving_var_1 = (bool) 0;
-    IDs_trackving_var_1 = writeMin( &IDs[dst], IDs[src] ); 
-    output2 = IDs_trackving_var_1;
-    return output2;
-  };
-};
-struct init
-{
-  void operator() (NodeID v) 
-  {
-    IDs[v] = v;
-  };
-};
-int main(int argc, char * argv[])
-{
-  edges = builtin_loadEdgesFromFile ( argv[(1) ]) ;
-  IDs = new int [ builtin_getVertices(edges) ];
-  for (int trail = 0; trail < 5; trail++){
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    IDs_generated_vector_op_apply_func_0()(vertexsetapply_iter);
-  };
-  int n = builtin_getVertices(edges) ;
-  int num_iter = 0;
-  startTimer() ;
-  VertexSubset<int> *  frontier = new VertexSubset<int> ( builtin_getVertices(edges)  , n);
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    init()(vertexsetapply_iter);
-  };
-  while ( (builtin_getVertexSetSize(frontier) ) != ((0) ))
-  {
-    num_iter++;
-    frontier = edgeset_apply_hybrid_denseforward_parallel_deduplicatied_from_vertexset_with_frontier(edges, frontier, updateEdge()); 
-  }
-  float elapsed_time = stopTimer() ;
-  std::cout << "elapsed time: "<< std::endl;
-  std::cout << elapsed_time<< std::endl;
-  std::cout << "num_iter: " << num_iter << std::endl;
-  }
-};
-
diff --git a/graphit_eval/eval/table7/cpps/cc_hybrid_dense.cpp b/graphit_eval/eval/table7/cpps/cc_hybrid_dense.cpp
deleted file mode 100755
index da30a705..00000000
--- a/graphit_eval/eval/table7/cpps/cc_hybrid_dense.cpp
+++ /dev/null
@@ -1,147 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include <algorithm>
-#include "intrinsics.h"
-Graph edges; 
-int  * __restrict IDs;
-template <typename APPLY_FUNC , typename PUSH_APPLY_FUNC> VertexSubset<NodeID>* edgeset_apply_hybrid_dense_parallel_deduplicatied_from_vertexset_with_frontier(Graph & g , VertexSubset<NodeID>* from_vertexset, APPLY_FUNC apply_func, PUSH_APPLY_FUNC push_apply_func) 
-{ 
-    int64_t numVertices = g.num_nodes(), numEdges = g.num_edges();
-    from_vertexset->toSparse();
-    long m = from_vertexset->size();
-    // used to generate nonzero indices to get degrees
-    uintT *degrees = newA(uintT, m);
-    // We probably need this when we get something that doesn't have a dense set, not sure
-    // We can also write our own, the eixsting one doesn't quite work for bitvectors
-    //from_vertexset->toSparse();
-    {
-        parallel_for (long i = 0; i < m; i++) {
-            NodeID v = from_vertexset->dense_vertex_set_[i];
-            degrees[i] = g.out_degree(v);
-        }
-    }
-    uintT outDegrees = sequence::plusReduce(degrees, m);
-    if (m + outDegrees > numEdges / 20) {
-  VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-  bool * next = newA(bool, g.num_nodes());
-  parallel_for (int i = 0; i < numVertices; i++)next[i] = 0;
-    from_vertexset->toDense();
-    parallel_for ( NodeID d=0; d < g.num_nodes(); d++) {
-      for(NodeID s : g.in_neigh(d)){
-        if (from_vertexset->bool_map_[s] ) { 
-          if( apply_func ( s , d ) ) { 
-            next[d] = 1; 
-          }
-        }
-      } //end of loop on in neighbors
-    } //end of outer for loop
-  next_frontier->num_vertices_ = sequence::sum(next, numVertices);
-  free(next_frontier->bool_map_);
-  next_frontier->bool_map_ = next;
-  free(degrees);
-  return next_frontier;
-} else {
-    if (g.flags_ == nullptr){
-      g.flags_ = new int[numVertices]();
-      parallel_for(int i = 0; i < numVertices; i++) g.flags_[i]=0;
-    }
-    VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-    if (numVertices != from_vertexset->getVerticesRange()) {
-        cout << "edgeMap: Sizes Don't match" << endl;
-        abort();
-    }
-    if (outDegrees == 0) {
-      free(degrees);
-      return next_frontier;
-    }
-    uintT *offsets = degrees;
-    long outEdgeCount = sequence::plusScan(offsets, degrees, m);
-    uintE *outEdges = newA(uintE, outEdgeCount);
-      parallel_for (long i=0; i < m; i++) {
-    NodeID s = from_vertexset->dense_vertex_set_[i];
-    int j = 0;
-    uintT offset = offsets[i];
-        for(NodeID d : g.out_neigh(s)){
-          if( push_apply_func ( s , d  ) && CAS(&(g.flags_[d]), 0, 1)  ) { 
-            outEdges[offset + j] = d; 
-          } else { outEdges[offset + j] = UINT_E_MAX; }
-          j++;
-        } //end of for loop on neighbors
-      }
-  uintE *nextIndices = newA(uintE, outEdgeCount);
-  long nextM = sequence::filter(outEdges, nextIndices, outEdgeCount, nonMaxF());
-  free(outEdges);
-  free(degrees);
-  next_frontier->num_vertices_ = nextM;
-  delete[] next_frontier->dense_vertex_set_;
-  next_frontier->dense_vertex_set_ = nextIndices;
-  parallel_for(int i = 0; i < nextM; i++){
-     g.flags_[nextIndices[i]] = 0;
-  }
-  return next_frontier;
-} //end of else
-} //end of edgeset apply function 
-struct updateEdge_push_ver
-{
-  bool operator() (NodeID src, NodeID dst) 
-  {
-    bool output4 ;
-    bool IDs_trackving_var_3 = (bool) 0;
-    IDs_trackving_var_3 = writeMin( &IDs[dst], IDs[src] ); 
-    output4 = IDs_trackving_var_3;
-    return output4;
-  };
-};
-struct IDs_generated_vector_op_apply_func_0
-{
-  void operator() (NodeID v) 
-  {
-    IDs[v] = (1) ;
-  };
-};
-struct updateEdge
-{
-  bool operator() (NodeID src, NodeID dst) 
-  {
-    bool output2 ;
-    bool IDs_trackving_var_1 = (bool) 0;
-    if ( ( IDs[dst]) > ( IDs[src]) ) { 
-      IDs[dst]= IDs[src]; 
-      IDs_trackving_var_1 = true ; 
-    } 
-    output2 = IDs_trackving_var_1;
-    return output2;
-  };
-};
-struct init
-{
-  void operator() (NodeID v) 
-  {
-    IDs[v] = v;
-  };
-};
-int main(int argc, char * argv[])
-{
-  edges = builtin_loadEdgesFromFile ( argv[(1) ]) ;
-  IDs = new int [ builtin_getVertices(edges) ];
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    IDs_generated_vector_op_apply_func_0()(vertexsetapply_iter);
-  };
-  int n = builtin_getVertices(edges) ;
-  for ( int trail = (0) ; trail < (10) ; trail++ )
-  {
-    startTimer() ;
-    VertexSubset<int> *  frontier = new VertexSubset<int> ( builtin_getVertices(edges)  , n);
-    parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-      init()(vertexsetapply_iter);
-    };
-    while ( (builtin_getVertexSetSize(frontier) ) != ((0) ))
-    {
-      frontier = edgeset_apply_hybrid_dense_parallel_deduplicatied_from_vertexset_with_frontier(edges, frontier, updateEdge(), updateEdge_push_ver()); 
-    }
-    float elapsed_time = stopTimer() ;
-    std::cout << "elapsed time: "<< std::endl;
-    std::cout << elapsed_time<< std::endl;
-  }
-};
-
diff --git a/graphit_eval/eval/table7/cpps/cc_hybrid_dense_bitvec.cpp b/graphit_eval/eval/table7/cpps/cc_hybrid_dense_bitvec.cpp
deleted file mode 100755
index 3bbc49ab..00000000
--- a/graphit_eval/eval/table7/cpps/cc_hybrid_dense_bitvec.cpp
+++ /dev/null
@@ -1,157 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include <algorithm>
-#include "intrinsics.h"
-Graph edges; 
-int  * __restrict IDs;
-template <typename APPLY_FUNC , typename PUSH_APPLY_FUNC> VertexSubset<NodeID>* edgeset_apply_hybrid_dense_parallel_deduplicatied_from_vertexset_with_frontier_pull_frontier_bitvector(Graph & g , VertexSubset<NodeID>* from_vertexset, APPLY_FUNC apply_func, PUSH_APPLY_FUNC push_apply_func) 
-{ 
-    int64_t numVertices = g.num_nodes(), numEdges = g.num_edges();
-    from_vertexset->toSparse();
-    long m = from_vertexset->size();
-    // used to generate nonzero indices to get degrees
-    uintT *degrees = newA(uintT, m);
-    // We probably need this when we get something that doesn't have a dense set, not sure
-    // We can also write our own, the eixsting one doesn't quite work for bitvectors
-    //from_vertexset->toSparse();
-    {
-        parallel_for (long i = 0; i < m; i++) {
-            NodeID v = from_vertexset->dense_vertex_set_[i];
-            degrees[i] = g.out_degree(v);
-        }
-    }
-    uintT outDegrees = sequence::plusReduce(degrees, m);
-    if (m + outDegrees > numEdges / 20) {
-  VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-  bool * next = newA(bool, g.num_nodes());
-  parallel_for (int i = 0; i < numVertices; i++)next[i] = 0;
-    from_vertexset->toDense();
-  Bitmap bitmap(numVertices);
-  bitmap.reset();
-  parallel_for(int i = 0; i < numVertices; i+=32){
-     int start = i;
-     int end = (((i + 32) < numVertices)? (i+32):numVertices);
-     for(int j = start; j < end; j++){
-        if (from_vertexset->bool_map_[j])
-          bitmap.set_bit(j);
-     }
-  }
-    parallel_for ( NodeID d=0; d < g.num_nodes(); d++) {
-      for(NodeID s : g.in_neigh(d)){
-        if (bitmap.get_bit(s)) { 
-          if( apply_func ( s , d ) ) { 
-            next[d] = 1; 
-          }
-	}
-      } //end of loop on in neighbors
-    } //end of outer for loop
-  next_frontier->num_vertices_ = sequence::sum(next, numVertices);
-  free(next_frontier->bool_map_);
-  next_frontier->bool_map_ = next;
-  free(degrees);
-  return next_frontier;
-} else {
-    if (g.flags_ == nullptr){
-      g.flags_ = new int[numVertices]();
-      parallel_for(int i = 0; i < numVertices; i++) g.flags_[i]=0;
-    }
-    VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-    if (numVertices != from_vertexset->getVerticesRange()) {
-        cout << "edgeMap: Sizes Don't match" << endl;
-        abort();
-    }
-    if (outDegrees == 0) {
-      free(degrees);
-      return next_frontier;
-    }
-    uintT *offsets = degrees;
-    long outEdgeCount = sequence::plusScan(offsets, degrees, m);
-    uintE *outEdges = newA(uintE, outEdgeCount);
-      parallel_for (long i=0; i < m; i++) {
-    NodeID s = from_vertexset->dense_vertex_set_[i];
-    int j = 0;
-    uintT offset = offsets[i];
-        for(NodeID d : g.out_neigh(s)){
-          if( push_apply_func ( s , d  ) && CAS(&(g.flags_[d]), 0, 1)  ) { 
-            outEdges[offset + j] = d; 
-          } else { outEdges[offset + j] = UINT_E_MAX; }
-          j++;
-        } //end of for loop on neighbors
-      }
-  uintE *nextIndices = newA(uintE, outEdgeCount);
-  long nextM = sequence::filter(outEdges, nextIndices, outEdgeCount, nonMaxF());
-  free(outEdges);
-  free(degrees);
-  next_frontier->num_vertices_ = nextM;
-  delete[] next_frontier->dense_vertex_set_;
-  next_frontier->dense_vertex_set_ = nextIndices;
-  parallel_for(int i = 0; i < nextM; i++){
-     g.flags_[nextIndices[i]] = 0;
-  }
-  return next_frontier;
-} //end of else
-} //end of edgeset apply function 
-struct updateEdge_push_ver
-{
-  bool operator() (NodeID src, NodeID dst) 
-  {
-    bool output4 ;
-    bool IDs_trackving_var_3 = (bool) 0;
-    IDs_trackving_var_3 = writeMin( &IDs[dst], IDs[src] ); 
-    output4 = IDs_trackving_var_3;
-    return output4;
-  };
-};
-struct IDs_generated_vector_op_apply_func_0
-{
-  void operator() (NodeID v) 
-  {
-    IDs[v] = (1) ;
-  };
-};
-struct updateEdge
-{
-  bool operator() (NodeID src, NodeID dst) 
-  {
-    bool output2 ;
-    bool IDs_trackving_var_1 = (bool) 0;
-    if ( ( IDs[dst]) > ( IDs[src]) ) { 
-      IDs[dst]= IDs[src]; 
-      IDs_trackving_var_1 = true ; 
-    } 
-    output2 = IDs_trackving_var_1;
-    return output2;
-  };
-};
-struct init
-{
-  void operator() (NodeID v) 
-  {
-    IDs[v] = v;
-  };
-};
-int main(int argc, char * argv[])
-{
-  edges = builtin_loadEdgesFromFile ( argv[(1) ]) ;
-  IDs = new int [ builtin_getVertices(edges) ];
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    IDs_generated_vector_op_apply_func_0()(vertexsetapply_iter);
-  };
-  int n = builtin_getVertices(edges) ;
-  for ( int trail = (0) ; trail < (10) ; trail++ )
-  {
-    startTimer() ;
-    VertexSubset<int> *  frontier = new VertexSubset<int> ( builtin_getVertices(edges)  , n);
-    parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-      init()(vertexsetapply_iter);
-    };
-    while ( (builtin_getVertexSetSize(frontier) ) != ((0) ))
-    {
-      frontier = edgeset_apply_hybrid_dense_parallel_deduplicatied_from_vertexset_with_frontier_pull_frontier_bitvector(edges, frontier, updateEdge(), updateEdge_push_ver()); 
-    }
-    float elapsed_time = stopTimer() ;
-    std::cout << "elapsed time: "<< std::endl;
-    std::cout << elapsed_time<< std::endl;
-  }
-};
-
diff --git a/graphit_eval/eval/table7/cpps/cc_hybrid_dense_bitvec_numa.cpp b/graphit_eval/eval/table7/cpps/cc_hybrid_dense_bitvec_numa.cpp
deleted file mode 100644
index 2ba43cf7..00000000
--- a/graphit_eval/eval/table7/cpps/cc_hybrid_dense_bitvec_numa.cpp
+++ /dev/null
@@ -1,201 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include <algorithm>
-#include "intrinsics.h"
-Graph edges; 
-int  * __restrict IDs;
-int  **local_IDs;
-template <typename APPLY_FUNC , typename PUSH_APPLY_FUNC> VertexSubset<NodeID>* edgeset_apply_hybrid_dense_parallel_deduplicatied_from_vertexset_with_frontier_pull_frontier_bitvector(Graph & g , VertexSubset<NodeID>* from_vertexset, APPLY_FUNC apply_func, PUSH_APPLY_FUNC push_apply_func) 
-{ 
-    int64_t numVertices = g.num_nodes(), numEdges = g.num_edges();
-    from_vertexset->toSparse();
-    long m = from_vertexset->size();
-    // used to generate nonzero indices to get degrees
-    uintT *degrees = newA(uintT, m);
-    // We probably need this when we get something that doesn't have a dense set, not sure
-    // We can also write our own, the eixsting one doesn't quite work for bitvectors
-    //from_vertexset->toSparse();
-    {
-        parallel_for (long i = 0; i < m; i++) {
-            NodeID v = from_vertexset->dense_vertex_set_[i];
-            degrees[i] = g.out_degree(v);
-        }
-    }
-    uintT outDegrees = sequence::plusReduce(degrees, m);
-    if (m + outDegrees > numEdges / 20) {
-  VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-  bool * next = newA(bool, g.num_nodes());
-  parallel_for (int i = 0; i < numVertices; i++)next[i] = 0;
-    from_vertexset->toDense();
-  Bitmap bitmap(numVertices);
-  bitmap.reset();
-  parallel_for(int i = 0; i < numVertices; i+=32){
-     int start = i;
-     int end = (((i + 32) < numVertices)? (i+32):numVertices);
-     for(int j = start; j < end; j++){
-        if (from_vertexset->bool_map_[j])
-          bitmap.set_bit(j);
-     }
-  }
-  //   parallel_for (int n = 0; n < numVertices; n++) {
-  //   for (int socketId = 0; socketId < omp_get_num_places(); socketId++) {
-  //     local_IDs[socketId][n] = IDs[n];
-  //   }
-  // }
-  int numPlaces = omp_get_num_places();
-    int numSegments = g.getNumSegments("s1");
-    int segmentsPerSocket = (numSegments + numPlaces - 1) / numPlaces;
-#pragma omp parallel num_threads(numPlaces) proc_bind(spread)
-{
-    int socketId = omp_get_place_num();
-    for (int i = 0; i < segmentsPerSocket; i++) {
-      int segmentId = socketId + i * numPlaces;
-      if (segmentId >= numSegments) break;
-      auto sg = g.getSegmentedGraph(std::string("s1"), segmentId);
-#pragma omp parallel num_threads(omp_get_place_num_procs(socketId)) proc_bind(close)
-{
-#pragma omp for schedule(dynamic, 1024)
-for ( NodeID localId=0; localId < sg->numVertices; localId++) {
-      NodeID d = sg->graphId[localId];
-      for (int64_t ngh = sg->vertexArray[localId]; ngh < sg->vertexArray[localId+1]; ngh++) {
-        NodeID s = sg->edgeArray[ngh];
-        if (bitmap.get_bit(s)) { 
-          if( apply_func ( s , d , socketId) ) { 
-            next[d] = 1; 
-          }
-        }
-      } //end of loop on in neighbors
-    } //end of outer for loop
-} // end of per-socket parallel_for
-    } // end of segment for loop
-}// end of per-socket parallel region
-
-  // parallel_for (int n = 0; n < numVertices; n++) {
-  //   for (int socketId = 0; socketId < omp_get_num_places(); socketId++) {
-  //     IDs[n] = min(IDs[n], local_IDs[socketId][n]);
-  //   }
-  // }
-  next_frontier->num_vertices_ = sequence::sum(next, numVertices);
-  next_frontier->bool_map_ = next;
-  return next_frontier;
-} else {
-    if (g.flags_ == nullptr){
-      g.flags_ = new int[numVertices]();
-      parallel_for(int i = 0; i < numVertices; i++) g.flags_[i]=0;
-    }
-    VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-    if (numVertices != from_vertexset->getVerticesRange()) {
-        cout << "edgeMap: Sizes Don't match" << endl;
-        abort();
-    }
-    if (outDegrees == 0) return next_frontier;
-    uintT *offsets = degrees;
-    long outEdgeCount = sequence::plusScan(offsets, degrees, m);
-    uintE *outEdges = newA(uintE, outEdgeCount);
-      parallel_for (long i=0; i < m; i++) {
-    NodeID s = from_vertexset->dense_vertex_set_[i];
-    int j = 0;
-    uintT offset = offsets[i];
-        for(NodeID d : g.out_neigh(s)){
-          if( push_apply_func ( s , d  ) && CAS(&(g.flags_[d]), 0, 1)  ) { 
-            outEdges[offset + j] = d; 
-          } else { outEdges[offset + j] = UINT_E_MAX; }
-          j++;
-        } //end of for loop on neighbors
-      }
-  uintE *nextIndices = newA(uintE, outEdgeCount);
-  long nextM = sequence::filter(outEdges, nextIndices, outEdgeCount, nonMaxF());
-  free(outEdges);
-  free(degrees);
-  next_frontier->num_vertices_ = nextM;
-  next_frontier->dense_vertex_set_ = nextIndices;
-  parallel_for(int i = 0; i < nextM; i++){
-     g.flags_[nextIndices[i]] = 0;
-  }
-  return next_frontier;
- } //end of else
-} //end of edgeset apply function 
-struct updateEdge_push_ver
-{
-  bool operator() (NodeID src, NodeID dst) 
-  {
-    bool output4 ;
-    bool IDs_trackving_var_3 = (bool) 0;
-    IDs_trackving_var_3 = writeMin( &IDs[dst], IDs[src] ); 
-    output4 = IDs_trackving_var_3;
-    return output4;
-  };
-};
-struct IDs_generated_vector_op_apply_func_0
-{
-  void operator() (NodeID v) 
-  {
-    IDs[v] = (1) ;
-  };
-};
-struct updateEdge
-{
-  bool operator() (NodeID src, NodeID dst, int socketId) 
-  {
-    // bool output2 ;
-    // bool IDs_trackving_var_1 = (bool) 0;
-    // if ( ( local_IDs[socketId][dst]) > ( IDs[src]) ) { 
-    //   local_IDs[socketId][dst]= IDs[src]; 
-    //   IDs_trackving_var_1 = true ; 
-    // } 
-    // output2 = IDs_trackving_var_1;
-    // return output2;
-    bool output2 ;
-    bool IDs_trackving_var_1 = (bool) 0;
-    if ( ( IDs[dst]) > ( IDs[src]) ) {
-      IDs[dst]= IDs[src];
-      IDs_trackving_var_1 = true ;
-    }
-    output2 = IDs_trackving_var_1;
-    return output2;
-  };
-};
-struct init
-{
-  void operator() (NodeID v) 
-  {
-    IDs[v] = v;
-  };
-};
-int main(int argc, char * argv[])
-{
-  edges = builtin_loadEdgesFromFile ( argv[(1) ]) ;
-  edges.buildPullSegmentedGraphs("s1", 30, true);
-  IDs = new int [ builtin_getVertices(edges) ];
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    IDs_generated_vector_op_apply_func_0()(vertexsetapply_iter);
-  };
-  local_IDs = new int *[omp_get_num_places()];
-  for (int socketId = 0; socketId < omp_get_num_places(); socketId++) {
-    local_IDs[socketId] = (int *)numa_alloc_onnode(sizeof(int ) * builtin_getVertices(edges) , socketId);
-    parallel_for (int n = 0; n < builtin_getVertices(edges) ; n++) {
-      local_IDs[socketId][n] = IDs[n];
-    }
-  }
-  omp_set_nested(1);
-  int n = builtin_getVertices(edges) ;
-  for ( int trail = (0) ; trail < (10) ; trail++ )
-  {
-    startTimer() ;
-    VertexSubset<int> *  frontier = new VertexSubset<int> ( builtin_getVertices(edges)  , n);
-    parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-      init()(vertexsetapply_iter);
-    };
-    while ( (builtin_getVertexSetSize(frontier) ) != ((0) ))
-    {
-      frontier = edgeset_apply_hybrid_dense_parallel_deduplicatied_from_vertexset_with_frontier_pull_frontier_bitvector(edges, frontier, updateEdge(), updateEdge_push_ver()); 
-    }
-    float elapsed_time = stopTimer() ;
-    std::cout << "elapsed time: "<< std::endl;
-    std::cout << elapsed_time<< std::endl;
-  }
-  for (int socketId = 0; socketId < omp_get_num_places(); socketId++) {
-    numa_free(local_IDs[socketId], sizeof(int ) * builtin_getVertices(edges) );
-  }
-};
-
diff --git a/graphit_eval/eval/table7/cpps/cc_hybrid_dense_bitvec_segment.cpp b/graphit_eval/eval/table7/cpps/cc_hybrid_dense_bitvec_segment.cpp
deleted file mode 100755
index e97e7750..00000000
--- a/graphit_eval/eval/table7/cpps/cc_hybrid_dense_bitvec_segment.cpp
+++ /dev/null
@@ -1,167 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include <algorithm>
-#include "intrinsics.h"
-Graph edges; 
-int  * __restrict IDs;
-template <typename APPLY_FUNC , typename PUSH_APPLY_FUNC> VertexSubset<NodeID>* edgeset_apply_hybrid_dense_parallel_deduplicatied_from_vertexset_with_frontier_pull_frontier_bitvector(Graph & g , VertexSubset<NodeID>* from_vertexset, APPLY_FUNC apply_func, PUSH_APPLY_FUNC push_apply_func) 
-{ 
-    int64_t numVertices = g.num_nodes(), numEdges = g.num_edges();
-    from_vertexset->toSparse();
-    long m = from_vertexset->size();
-    // used to generate nonzero indices to get degrees
-    uintT *degrees = newA(uintT, m);
-    // We probably need this when we get something that doesn't have a dense set, not sure
-    // We can also write our own, the eixsting one doesn't quite work for bitvectors
-    //from_vertexset->toSparse();
-    {
-        parallel_for (long i = 0; i < m; i++) {
-            NodeID v = from_vertexset->dense_vertex_set_[i];
-            degrees[i] = g.out_degree(v);
-        }
-    }
-    uintT outDegrees = sequence::plusReduce(degrees, m);
-    if (m + outDegrees > numEdges / 20) {
-      VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-      bool * next = newA(bool, g.num_nodes());
-      parallel_for (int i = 0; i < numVertices; i++)next[i] = 0;
-      from_vertexset->toDense();
-      Bitmap bitmap(numVertices);
-      bitmap.reset();
-      parallel_for(int i = 0; i < numVertices; i+=32){
-      	int start = i;
-      	int end = (((i + 32) < numVertices)? (i+32):numVertices);
-      	for(int j = start; j < end; j++){
-      	  if (from_vertexset->bool_map_[j])
-      	    bitmap.set_bit(j);
-      	}
-      }
-      for (int segmentId = 0; segmentId < g.getNumSegments("s1"); segmentId++) {
-	auto sg = g.getSegmentedGraph(std::string("s1"), segmentId);
-	parallel_for ( NodeID localId=0; localId < sg->numVertices; localId++) {
-	  NodeID d = sg->graphId[localId];
-	  for (int64_t ngh = sg->vertexArray[localId]; ngh < sg->vertexArray[localId+1]; ngh++) {
-	    NodeID s = sg->edgeArray[ngh];
-	    if (bitmap.get_bit(s)) { 
-	      if( apply_func ( s , d ) ) { 
-		next[d] = 1; 
-	      }
-	    }
-	  } //end of loop on in neighbors
-	} //end of outer for loop
-      } // end of segment for loop
-      next_frontier->num_vertices_ = sequence::sum(next, numVertices);
-      next_frontier->bool_map_ = next;
-      return next_frontier;
-    } else {
-      if (g.flags_ == nullptr){
-	g.flags_ = new int[numVertices]();
-	parallel_for(int i = 0; i < numVertices; i++) g.flags_[i]=0;
-      }
-      VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-      if (numVertices != from_vertexset->getVerticesRange()) {
-        cout << "edgeMap: Sizes Don't match" << endl;
-        abort();
-      }
-      if (outDegrees == 0) return next_frontier;
-      uintT *offsets = degrees;
-      long outEdgeCount = sequence::plusScan(offsets, degrees, m);
-      uintE *outEdges = newA(uintE, outEdgeCount);
-      parallel_for (long i=0; i < m; i++) {
-	NodeID s = from_vertexset->dense_vertex_set_[i];
-	int j = 0;
-	uintT offset = offsets[i];
-        for(NodeID d : g.out_neigh(s)){
-          if( push_apply_func ( s , d  ) && CAS(&(g.flags_[d]), 0, 1)  ) { 
-            outEdges[offset + j] = d; 
-          } else { outEdges[offset + j] = UINT_E_MAX; }
-          j++;
-        } //end of for loop on neighbors
-      }
-      uintE *nextIndices = newA(uintE, outEdgeCount);
-      long nextM = sequence::filter(outEdges, nextIndices, outEdgeCount, nonMaxF());
-      free(outEdges);
-      free(degrees);
-      next_frontier->num_vertices_ = nextM;
-      next_frontier->dense_vertex_set_ = nextIndices;
-      parallel_for(int i = 0; i < nextM; i++){
-	g.flags_[nextIndices[i]] = 0;
-      }
-      return next_frontier;
-    } //end of else
-} //end of edgeset apply function 
-struct updateEdge_push_ver
-{
-  bool operator() (NodeID src, NodeID dst) 
-  {
-    bool output4 ;
-    bool IDs_trackving_var_3 = (bool) 0;
-    IDs_trackving_var_3 = writeMin( &IDs[dst], IDs[src] ); 
-    output4 = IDs_trackving_var_3;
-    return output4;
-  };
-};
-struct IDs_generated_vector_op_apply_func_0
-{
-  void operator() (NodeID v) 
-  {
-    IDs[v] = (1) ;
-  };
-};
-struct updateEdge
-{
-  bool operator() (NodeID src, NodeID dst) 
-  {
-    bool output2 ;
-    bool IDs_trackving_var_1 = (bool) 0;
-    if ( ( IDs[dst]) > ( IDs[src]) ) { 
-      IDs[dst]= IDs[src]; 
-      IDs_trackving_var_1 = true ; 
-    } 
-    output2 = IDs_trackving_var_1;
-    return output2;
-  };
-};
-struct init
-{
-  void operator() (NodeID v) 
-  {
-    IDs[v] = v;
-  };
-};
-int main(int argc, char * argv[])
-{
-  edges = builtin_loadEdgesFromFile ( argv[(1) ]) ;
-  edges.buildPullSegmentedGraphs("s1", atoi(argv[2]));
-  IDs = new int [ builtin_getVertices(edges) ];
-  parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-    IDs_generated_vector_op_apply_func_0()(i);
-  };
-  int n = builtin_getVertices(edges) ;
-  for ( int trail = (0) ; trail < (10) ; trail++ )
-  {
-    startTimer() ;
-    VertexSubset<int> *  frontier = new VertexSubset<int> ( builtin_getVertices(edges)  , n);
-    parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-      init()(i);
-    };
-    while ( (builtin_getVertexSetSize(frontier) ) != ((0) ))
-    {
-      frontier = edgeset_apply_hybrid_dense_parallel_deduplicatied_from_vertexset_with_frontier_pull_frontier_bitvector(edges, frontier, updateEdge(), updateEdge_push_ver()); 
-    }
-    float elapsed_time = stopTimer() ;
-    std::cout << "elapsed time: "<< std::endl;
-    std::cout << elapsed_time<< std::endl;
-  }
-  int cc=0;
-  Bitmap bitmap(builtin_getVertices(edges));
-  bitmap.reset();
-  for (int i = 0;i < builtin_getVertices(edges);i++) {
-    if (!bitmap.get_bit(IDs[i])) {
-      cc++;
-      bitmap.set_bit(IDs[i]);
-    }
-  }
-  std::cout << "components=" << cc << std::endl;
-};
-
diff --git a/graphit_eval/eval/table7/cpps/cf_pull_load_balance.cpp b/graphit_eval/eval/table7/cpps/cf_pull_load_balance.cpp
deleted file mode 100755
index 7f0e6daa..00000000
--- a/graphit_eval/eval/table7/cpps/cf_pull_load_balance.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include "intrinsics.h"
-WGraph edges;
-typedef double defined_type_0 [ 20]; 
-defined_type_0 * __restrict  latent_vec;
-typedef double defined_type_1 [ 20]; 
-defined_type_1 * __restrict  error_vec;
-double step; 
-double lambda; 
-int K; 
-template <typename APPLY_FUNC > VertexSubset<NodeID>* edgeset_apply_pull_parallel_weighted_pull_edge_based_load_balance(WGraph & g , APPLY_FUNC apply_func) 
-{ 
-    long numVertices = g.num_nodes(), numEdges = g.num_edges();
-    if (g.offsets_ == nullptr) g.SetUpOffsets(true);
-  SGOffset * edge_in_index = g.offsets_;
-    std::function<void(int,int,int)> recursive_lambda = 
-    [&apply_func, &g,  &recursive_lambda, edge_in_index  ]
-    (NodeID start, NodeID end, int grain_size){
-         if ((start == end-1) || ((edge_in_index[end] - edge_in_index[start]) < grain_size)){
-  for (NodeID d = start; d < end; d++){
-    for(WNode s : g.in_neigh(d)){
-      apply_func ( s.v , d, s.w );
-    } //end of loop on in neighbors
-   } //end of outer for loop
-        } else { // end of if statement on grain size, recursive case next
-                 cilk_spawn recursive_lambda(start, start + ((end-start) >> 1), grain_size);
-                  recursive_lambda(start + ((end-start)>>1), end, grain_size);
-        } 
-    }; //end of lambda function
-    recursive_lambda(0, numVertices, 4096);
-    cilk_sync; 
-  return new VertexSubset<NodeID>(g.num_nodes(), g.num_nodes());
-} //end of edgeset apply function 
-struct updateEdge
-{
-  void operator() (NodeID src, NodeID dst, int rating) 
-  {
-    double estimate = (0) ;
-    for ( int i = (0) ; i < K; i++ )
-    {
-      estimate += (latent_vec[src][i] * latent_vec[dst][i]);
-    }
-    double err = (rating - estimate);
-    for ( int i = (0) ; i < K; i++ )
-    {
-      error_vec[dst][i] += (latent_vec[src][i] * err);
-    }
-  };
-};
-struct updateVertex
-{
-  void operator() (NodeID v) 
-  {
-    for ( int i = (0) ; i < K; i++ )
-    {
-      latent_vec[v][i] += (step * (( -lambda * latent_vec[v][i]) + error_vec[v][i]));
-      error_vec[v][i] = (0) ;
-    }
-  };
-};
-struct initVertex
-{
-  void operator() (NodeID v) 
-  {
-    for ( int i = (0) ; i < K; i++ )
-    {
-      latent_vec[v][i] = ((float) 0.5) ;
-      error_vec[v][i] = (0) ;
-    }
-  };
-};
-int main(int argc, char * argv[])
-{
-  edges = builtin_loadWeightedEdgesFromFile ( argv[(1) ]) ;
-  latent_vec = new defined_type_0 [ builtin_getVertices(edges) ];
-  error_vec = new defined_type_1 [ builtin_getVertices(edges) ];
-  step = ((float) 3.5e-07) ;
-  lambda = ((float) 0.001) ;
-  K = (20) ;
-  parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-    initVertex()(i);
-  };
-  startTimer() ;
-  for ( int i = (0) ; i < (10) ; i++ )
-  {
-    edgeset_apply_pull_parallel_weighted_pull_edge_based_load_balance(edges, updateEdge()); 
-    parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-      updateVertex()(i);
-    };
-  }
-  double elapsed_time = stopTimer() ;
-  std::cout << "elapsed time: "<< std::endl;
-  std::cout << elapsed_time<< std::endl;
-};
-
diff --git a/graphit_eval/eval/table7/cpps/cf_pull_load_balance_segment.cpp b/graphit_eval/eval/table7/cpps/cf_pull_load_balance_segment.cpp
deleted file mode 100755
index 9242f7c3..00000000
--- a/graphit_eval/eval/table7/cpps/cf_pull_load_balance_segment.cpp
+++ /dev/null
@@ -1,110 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include <algorithm>
-#include "intrinsics.h"
-WGraph edges;
-typedef double defined_type_0 [ 20]; 
-defined_type_0 * __restrict  latent_vec;
-typedef double defined_type_1 [ 20]; 
-defined_type_1 * __restrict  error_vec;
-double step; 
-double lambda; 
-int K; 
-template <typename APPLY_FUNC > void edgeset_apply_pull_parallel_weighted_pull_edge_based_load_balance(WGraph & g , APPLY_FUNC apply_func) 
-{ 
-    int64_t numVertices = g.num_nodes(), numEdges = g.num_edges();
-    std::cout<<"|V|="<<numVertices<<std::endl;
-    int64_t cnt=0;
-    for (int segmentId = 0; segmentId < g.getNumSegments("s1"); segmentId++) {
-      auto sg = g.getSegmentedGraph(std::string("s1"), segmentId);
-      cnt+= sg->numVertices;
-      std::cout<<"segment " << segmentId << " has " << sg->numVertices <<std::endl;
-  if (g.offsets_ == nullptr) g.SetUpOffsets(true);
-  SGOffset * edge_in_index = g.offsets_;
-    std::function<void(int,int,int)> recursive_lambda = 
-    [&apply_func, &g,  &recursive_lambda, edge_in_index, sg  ]
-    (NodeID start, NodeID end, int grain_size){
-         if ((start == end-1) || ((sg->vertexArray[end] - sg->vertexArray[start]) < grain_size)){
-  for (NodeID localId = start; localId < end; localId++){
-    NodeID d = sg->graphId[localId];
-    for (int64_t ngh = sg->vertexArray[localId]; ngh < sg->vertexArray[localId+1]; ngh++) {
-      WNode s = sg->edgeArray[ngh];
-      apply_func ( s.v , d, s.w );
-    } //end of loop on in neighbors
-   } //end of outer for loop
-        } else { // end of if statement on grain size, recursive case next
-                 cilk_spawn recursive_lambda(start, start + ((end-start) >> 1), grain_size);
-                  recursive_lambda(start + ((end-start)>>1), end, grain_size);
-        } 
-    }; //end of lambda function
-    recursive_lambda(0, sg->numVertices, 4096);
-    cilk_sync; 
-    } // end of segment for loop
-    std::cout<<"sum(|V_i|)="<<cnt<<std::endl;
-} //end of edgeset apply function 
-struct updateEdge
-{
-  void operator() (NodeID src, NodeID dst, int rating) 
-  {
-    double estimate = (0) ;
-    for ( int i = (0) ; i < K; i++ )
-    {
-      estimate += (latent_vec[src][i] * latent_vec[dst][i]);
-    }
-    double err = (rating - estimate);
-    for ( int i = (0) ; i < K; i++ )
-    {
-      error_vec[dst][i] += (latent_vec[src][i] * err);
-    }
-  };
-};
-struct updateVertex
-{
-  void operator() (NodeID v) 
-  {
-    for ( int i = (0) ; i < K; i++ )
-    {
-      latent_vec[v][i] += (step * (( -lambda * latent_vec[v][i]) + error_vec[v][i]));
-      error_vec[v][i] = (0) ;
-    }
-  };
-};
-struct initVertex
-{
-  void operator() (NodeID v) 
-  {
-    for ( int i = (0) ; i < K; i++ )
-    {
-      latent_vec[v][i] = ((float) 0.5) ;
-      error_vec[v][i] = (0) ;
-    }
-  };
-};
-int main(int argc, char * argv[])
-{
-  edges = builtin_loadWeightedEdgesFromFile ( argv[(1) ]) ;
-  edges.buildPullSegmentedGraphs("s1", atoi(argv[2]));
-  latent_vec = new defined_type_0 [ builtin_getVertices(edges) ];
-  error_vec = new defined_type_1 [ builtin_getVertices(edges) ];
-  step = ((float) 3.5e-07) ;
-  lambda = ((float) 0.001) ;
-  K = (20) ;
-  for ( int trail = (0) ; trail < (10) ; trail++ )
-  {
-    startTimer() ;
-    parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-      initVertex()(vertexsetapply_iter);
-    };
-    for ( int i = (0) ; i < (10) ; i++ )
-    {
-      edgeset_apply_pull_parallel_weighted_pull_edge_based_load_balance(edges, updateEdge()); 
-      parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-        updateVertex()(vertexsetapply_iter);
-      };
-    }
-    double elapsed_time = stopTimer() ;
-    std::cout << "elapsed time: "<< std::endl;
-    std::cout << elapsed_time<< std::endl;
-  }
-};
-
diff --git a/graphit_eval/eval/table7/cpps/pagerank_pull.cpp b/graphit_eval/eval/table7/cpps/pagerank_pull.cpp
deleted file mode 100755
index e3deba73..00000000
--- a/graphit_eval/eval/table7/cpps/pagerank_pull.cpp
+++ /dev/null
@@ -1,146 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include <algorithm>
-#include "intrinsics.h"
-Graph edges; 
-double  * __restrict old_rank;
-double  * __restrict new_rank;
-int  * __restrict out_degree;
-double  * __restrict contrib;
-double  * __restrict error;
-int  * __restrict generated_tmp_vector_2;
-double damp; 
-double beta_score; 
-template <typename APPLY_FUNC > void edgeset_apply_pull_parallel(Graph & g , APPLY_FUNC apply_func) 
-{ 
-    int64_t numVertices = g.num_nodes(), numEdges = g.num_edges();
-    parallel_for ( NodeID d=0; d < g.num_nodes(); d++) {
-      for(NodeID s : g.in_neigh(d)){
-	apply_func ( s , d );
-      } //end of loop on in neighbors
-    } //end of outer for loop
-} //end of edgeset apply function 
-struct error_generated_vector_op_apply_func_5
-{
-  void operator() (NodeID v) 
-  {
-    error[v] = ((float) 0) ;
-  };
-};
-struct contrib_generated_vector_op_apply_func_4
-{
-  void operator() (NodeID v) 
-  {
-    contrib[v] = ((float) 0) ;
-  };
-};
-struct generated_vector_op_apply_func_3
-{
-  void operator() (NodeID v) 
-  {
-    out_degree[v] = generated_tmp_vector_2[v];
-  };
-};
-struct new_rank_generated_vector_op_apply_func_1
-{
-  void operator() (NodeID v) 
-  {
-    new_rank[v] = ((float) 0) ;
-  };
-};
-struct old_rank_generated_vector_op_apply_func_0
-{
-  void operator() (NodeID v) 
-  {
-    old_rank[v] = (((float) 1)  / builtin_getVertices(edges) );
-  };
-};
-struct computeContrib
-{
-  void operator() (NodeID v) 
-  {
-    contrib[v] = (old_rank[v] / out_degree[v]);
-  };
-};
-struct updateEdge
-{
-  void operator() (NodeID src, NodeID dst) 
-  {
-    new_rank[dst] = (new_rank[dst] + contrib[src]);
-  };
-};
-struct updateVertex
-{
-  void operator() (NodeID v) 
-  {
-    double old_score = old_rank[v];
-    new_rank[v] = (beta_score + (damp * new_rank[v]));
-    error[v] = fabs((new_rank[v] - old_rank[v])) ;
-    old_rank[v] = new_rank[v];
-    new_rank[v] = ((float) 0) ;
-  };
-};
-struct printRank
-{
-  void operator() (NodeID v) 
-  {
-    std::cout << old_rank[v]<< std::endl;
-  };
-};
-struct reset
-{
-  void operator() (NodeID v) 
-  {
-    old_rank[v] = (((float) 1)  / builtin_getVertices(edges) );
-    new_rank[v] = ((float) 0) ;
-  };
-};
-int main(int argc, char * argv[])
-{
-  edges = builtin_loadEdgesFromFile ( argv[(1) ]) ;
-  old_rank = new double [ builtin_getVertices(edges) ];
-  new_rank = new double [ builtin_getVertices(edges) ];
-  out_degree = new int [ builtin_getVertices(edges) ];
-  contrib = new double [ builtin_getVertices(edges) ];
-  error = new double [ builtin_getVertices(edges) ];
-  generated_tmp_vector_2 = new int [ builtin_getVertices(edges) ];
-  damp = ((float) 0.85) ;
-  beta_score = ((((float) 1)  - damp) / builtin_getVertices(edges) );
-  parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-    old_rank_generated_vector_op_apply_func_0()(i);
-  };
-  parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-    new_rank_generated_vector_op_apply_func_1()(i);
-  };
-  generated_tmp_vector_2 = builtin_getOutDegrees(edges) ;
-  parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-    generated_vector_op_apply_func_3()(i);
-  };
-  parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-    contrib_generated_vector_op_apply_func_4()(i);
-  };
-  parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-    error_generated_vector_op_apply_func_5()(i);
-  };
-  for ( int trail = (0) ; trail < (10) ; trail++ )
-  {
-    parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-      reset()(i);
-    };
-    startTimer() ;
-    for ( int i = (0) ; i < (20) ; i++ )
-    {
-      parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-        computeContrib()(i);
-      };
-      edgeset_apply_pull_parallel(edges, updateEdge()); 
-      parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-        updateVertex()(i);
-      };
-    }
-    double elapsed_time = stopTimer() ;
-    std::cout << "elapsed time: "<< std::endl;
-    std::cout << elapsed_time<< std::endl;
-  }
-};
-
diff --git a/graphit_eval/eval/table7/cpps/pagerank_pull_numa.cpp b/graphit_eval/eval/table7/cpps/pagerank_pull_numa.cpp
deleted file mode 100755
index 328496a7..00000000
--- a/graphit_eval/eval/table7/cpps/pagerank_pull_numa.cpp
+++ /dev/null
@@ -1,188 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include <algorithm>
-#include "intrinsics.h"
-Graph edges; 
-double  * __restrict old_rank;
-double  * __restrict new_rank;
-int  * __restrict out_degree;
-double  * __restrict contrib;
-double  * __restrict error;
-int  * __restrict generated_tmp_vector_2;
-double damp; 
-double beta_score; 
-double  **local_new_rank;
-template <typename APPLY_FUNC > void edgeset_apply_pull_parallel(Graph & g , APPLY_FUNC apply_func) 
-{ 
-    int64_t numVertices = g.num_nodes(), numEdges = g.num_edges();
-  parallel_for (int n = 0; n < numVertices; n++) {
-    for (int socketId = 0; socketId < omp_get_num_places(); socketId++) {
-      local_new_rank[socketId][n] = new_rank[n];
-    }
-  }
-  int numPlaces = omp_get_num_places();
-    int numSegments = g.getNumSegments("s1");
-    int segmentsPerSocket = (numSegments + numPlaces - 1) / numPlaces;
-#pragma omp parallel num_threads(numPlaces) proc_bind(spread)
-{
-    int socketId = omp_get_place_num();
-    for (int i = 0; i < segmentsPerSocket; i++) {
-      int segmentId = socketId + i * numPlaces;
-      if (segmentId >= numSegments) break;
-      auto sg = g.getSegmentedGraph(std::string("s1"), segmentId);
-#pragma omp parallel num_threads(omp_get_place_num_procs(socketId)) proc_bind(close)
-{
-#pragma omp for schedule(dynamic, 1024)
-for ( NodeID localId=0; localId < sg->numVertices; localId++) {
-    NodeID d = sg->graphId[localId];
-    for (int64_t ngh = sg->vertexArray[localId]; ngh < sg->vertexArray[localId+1]; ngh++) {
-      NodeID s = sg->edgeArray[ngh];
-      apply_func ( s , d , socketId);
-    } //end of loop on in neighbors
-  } //end of outer for loop
-} // end of per-socket parallel_for
-    } // end of segment for loop
-}// end of per-socket parallel region
-
-  parallel_for (int n = 0; n < numVertices; n++) {
-    for (int socketId = 0; socketId < omp_get_num_places(); socketId++) {
-      new_rank[n] += local_new_rank[socketId][n];
-    }
-  }
-} //end of edgeset apply function 
-struct error_generated_vector_op_apply_func_5
-{
-  void operator() (NodeID v) 
-  {
-    error[v] = ((float) 0) ;
-  };
-};
-struct contrib_generated_vector_op_apply_func_4
-{
-  void operator() (NodeID v) 
-  {
-    contrib[v] = ((float) 0) ;
-  };
-};
-struct generated_vector_op_apply_func_3
-{
-  void operator() (NodeID v) 
-  {
-    out_degree[v] = generated_tmp_vector_2[v];
-  };
-};
-struct new_rank_generated_vector_op_apply_func_1
-{
-  void operator() (NodeID v) 
-  {
-    new_rank[v] = ((float) 0) ;
-  };
-};
-struct old_rank_generated_vector_op_apply_func_0
-{
-  void operator() (NodeID v) 
-  {
-    old_rank[v] = (((float) 1)  / builtin_getVertices(edges) );
-  };
-};
-struct computeContrib
-{
-  void operator() (NodeID v) 
-  {
-    contrib[v] = (old_rank[v] / out_degree[v]);
-  };
-};
-struct updateEdge
-{
-  void operator() (NodeID src, NodeID dst, int socketId) 
-  {
-    local_new_rank[socketId][dst] += contrib[src];
-  };
-};
-struct updateVertex
-{
-  void operator() (NodeID v) 
-  {
-    double old_score = old_rank[v];
-    new_rank[v] = (beta_score + (damp * new_rank[v]));
-    error[v] = fabs((new_rank[v] - old_rank[v])) ;
-    old_rank[v] = new_rank[v];
-    new_rank[v] = ((float) 0) ;
-  };
-};
-struct printRank
-{
-  void operator() (NodeID v) 
-  {
-    std::cout << old_rank[v]<< std::endl;
-  };
-};
-struct reset
-{
-  void operator() (NodeID v) 
-  {
-    old_rank[v] = (((float) 1)  / builtin_getVertices(edges) );
-    new_rank[v] = ((float) 0) ;
-  };
-};
-int main(int argc, char * argv[])
-{
-  edges = builtin_loadEdgesFromFile ( argv[(1) ]) ;
-  edges.buildPullSegmentedGraphs("s1", atoi(argv[2]), true);
-  old_rank = new double [ builtin_getVertices(edges) ];
-  new_rank = new double [ builtin_getVertices(edges) ];
-  out_degree = new int [ builtin_getVertices(edges) ];
-  contrib = new double [ builtin_getVertices(edges) ];
-  error = new double [ builtin_getVertices(edges) ];
-  generated_tmp_vector_2 = new int [ builtin_getVertices(edges) ];
-  damp = ((float) 0.85) ;
-  beta_score = ((((float) 1)  - damp) / builtin_getVertices(edges) );
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    old_rank_generated_vector_op_apply_func_0()(vertexsetapply_iter);
-  };
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    new_rank_generated_vector_op_apply_func_1()(vertexsetapply_iter);
-  };
-  generated_tmp_vector_2 = builtin_getOutDegrees(edges) ;
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    generated_vector_op_apply_func_3()(vertexsetapply_iter);
-  };
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    contrib_generated_vector_op_apply_func_4()(vertexsetapply_iter);
-  };
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    error_generated_vector_op_apply_func_5()(vertexsetapply_iter);
-  };
-  local_new_rank = new double *[omp_get_num_places()];
-  for (int socketId = 0; socketId < omp_get_num_places(); socketId++) {
-    local_new_rank[socketId] = (double *)numa_alloc_onnode(sizeof(double ) * builtin_getVertices(edges) , socketId);
-    parallel_for (int n = 0; n < builtin_getVertices(edges) ; n++) {
-      local_new_rank[socketId][n] = new_rank[n];
-    }
-  }
-  omp_set_nested(1);
-  for ( int trail = (0) ; trail < (10) ; trail++ )
-  {
-    startTimer() ;
-    parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-      reset()(vertexsetapply_iter);
-    };
-    for ( int i = (0) ; i < (20) ; i++ )
-    {
-      parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-        computeContrib()(vertexsetapply_iter);
-      };
-      edgeset_apply_pull_parallel(edges, updateEdge()); 
-      parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-        updateVertex()(vertexsetapply_iter);
-      };
-    }
-    double elapsed_time = stopTimer() ;
-    std::cout << "elapsed time: "<< std::endl;
-    std::cout << elapsed_time<< std::endl;
-  }
-  for (int socketId = 0; socketId < omp_get_num_places(); socketId++) {
-    numa_free(local_new_rank[socketId], sizeof(double ) * builtin_getVertices(edges) );
-  }
-};
-
diff --git a/graphit_eval/eval/table7/cpps/pagerank_pull_segment.cpp b/graphit_eval/eval/table7/cpps/pagerank_pull_segment.cpp
deleted file mode 100755
index 670d5100..00000000
--- a/graphit_eval/eval/table7/cpps/pagerank_pull_segment.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include <algorithm>
-#include "intrinsics.h"
-Graph edges; 
-double  * __restrict old_rank;
-double  * __restrict new_rank;
-int  * __restrict out_degree;
-double  * __restrict contrib;
-double  * __restrict error;
-int  * __restrict generated_tmp_vector_2;
-double damp; 
-double beta_score; 
-template <typename APPLY_FUNC > void edgeset_apply_pull_parallel(Graph & g , APPLY_FUNC apply_func) 
-{ 
-  int64_t numVertices = g.num_nodes(), numEdges = g.num_edges();
-  for (int segmentId = 0; segmentId < g.getNumSegments("s1"); segmentId++) {
-    auto sg = g.getSegmentedGraph(std::string("s1"), segmentId);
-    parallel_for ( NodeID localId=0; localId < sg->numVertices; localId++) {
-      NodeID d = sg->graphId[localId];
-      for (int64_t ngh = sg->vertexArray[localId]; ngh < sg->vertexArray[localId+1]; ngh++) {
-	NodeID s = sg->edgeArray[ngh];
-	apply_func ( s , d );
-      } //end of loop on in neighbors
-    } //end of outer for loop
-  } // end of segment for loop
-
-} //end of edgeset apply function 
-struct error_generated_vector_op_apply_func_5
-{
-  void operator() (NodeID v) 
-  {
-    error[v] = ((float) 0) ;
-  };
-};
-struct contrib_generated_vector_op_apply_func_4
-{
-  void operator() (NodeID v) 
-  {
-    contrib[v] = ((float) 0) ;
-  };
-};
-struct generated_vector_op_apply_func_3
-{
-  void operator() (NodeID v) 
-  {
-    out_degree[v] = generated_tmp_vector_2[v];
-  };
-};
-struct new_rank_generated_vector_op_apply_func_1
-{
-  void operator() (NodeID v) 
-  {
-    new_rank[v] = ((float) 0) ;
-  };
-};
-struct old_rank_generated_vector_op_apply_func_0
-{
-  void operator() (NodeID v) 
-  {
-    old_rank[v] = (((float) 1)  / builtin_getVertices(edges) );
-  };
-};
-struct computeContrib
-{
-  void operator() (NodeID v) 
-  {
-    contrib[v] = (old_rank[v] / out_degree[v]);
-  };
-};
-struct updateEdge
-{
-  void operator() (NodeID src, NodeID dst) 
-  {
-    new_rank[dst] = (new_rank[dst] + contrib[src]);
-  };
-};
-struct updateVertex
-{
-  void operator() (NodeID v) 
-  {
-    double old_score = old_rank[v];
-    new_rank[v] = (beta_score + (damp * new_rank[v]));
-    error[v] = fabs((new_rank[v] - old_rank[v])) ;
-    old_rank[v] = new_rank[v];
-    new_rank[v] = ((float) 0) ;
-  };
-};
-struct printRank
-{
-  void operator() (NodeID v) 
-  {
-    std::cout << old_rank[v]<< std::endl;
-  };
-};
-struct reset
-{
-  void operator() (NodeID v) 
-  {
-    old_rank[v] = (((float) 1)  / builtin_getVertices(edges) );
-    new_rank[v] = ((float) 0) ;
-  };
-};
-int main(int argc, char * argv[])
-{
-  edges = builtin_loadEdgesFromFile ( argv[(1) ]) ;
-  edges.buildPullSegmentedGraphs("s1", atoi(argv[2]));
-  old_rank = new double [ builtin_getVertices(edges) ];
-  new_rank = new double [ builtin_getVertices(edges) ];
-  out_degree = new int [ builtin_getVertices(edges) ];
-  contrib = new double [ builtin_getVertices(edges) ];
-  error = new double [ builtin_getVertices(edges) ];
-  generated_tmp_vector_2 = new int [ builtin_getVertices(edges) ];
-  damp = ((float) 0.85) ;
-  beta_score = ((((float) 1)  - damp) / builtin_getVertices(edges) );
-  parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-    old_rank_generated_vector_op_apply_func_0()(i);
-  };
-  parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-    new_rank_generated_vector_op_apply_func_1()(i);
-  };
-  generated_tmp_vector_2 = builtin_getOutDegrees(edges) ;
-  parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-    generated_vector_op_apply_func_3()(i);
-  };
-  parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-    contrib_generated_vector_op_apply_func_4()(i);
-  };
-  parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-    error_generated_vector_op_apply_func_5()(i);
-  };
-  for ( int trail = (0) ; trail < (10) ; trail++ )
-  {
-    parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-      reset()(i);
-    };
-    startTimer() ;
-    for ( int i = (0) ; i < (20) ; i++ )
-    {
-      parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-        computeContrib()(i);
-      };
-      edgeset_apply_pull_parallel(edges, updateEdge()); 
-      parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-        updateVertex()(i);
-      };
-    }
-    double elapsed_time = stopTimer() ;
-    std::cout << "elapsed time: "<< std::endl;
-    std::cout << elapsed_time<< std::endl;
-  }
-};
-
diff --git a/graphit_eval/eval/table7/cpps/pagerankdelta_hybrid_dense.cpp b/graphit_eval/eval/table7/cpps/pagerankdelta_hybrid_dense.cpp
deleted file mode 100755
index 249e9599..00000000
--- a/graphit_eval/eval/table7/cpps/pagerankdelta_hybrid_dense.cpp
+++ /dev/null
@@ -1,207 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include <algorithm>
-#include "intrinsics.h"
-Graph edges; 
-typedef struct struct_delta_out_degree { 
-  double delta;
-  int out_degree;
-} struct_delta_out_degree;
-double  * __restrict cur_rank;
-double  * __restrict ngh_sum;
-struct_delta_out_degree  * __restrict array_of_struct_delta_out_degree;
-int  * __restrict generated_tmp_vector_3;
-double damp; 
-double beta_score; 
-double epsilon2; 
-double epsilon; 
-template <typename APPLY_FUNC , typename PUSH_APPLY_FUNC> void edgeset_apply_hybrid_dense_parallel_from_vertexset(Graph & g , VertexSubset<NodeID>* from_vertexset, APPLY_FUNC apply_func, PUSH_APPLY_FUNC push_apply_func) 
-{ 
-    int64_t numVertices = g.num_nodes(), numEdges = g.num_edges();
-    from_vertexset->toSparse();
-    long m = from_vertexset->size();
-    // used to generate nonzero indices to get degrees
-    uintT *degrees = newA(uintT, m);
-    // We probably need this when we get something that doesn't have a dense set, not sure
-    // We can also write our own, the eixsting one doesn't quite work for bitvectors
-    //from_vertexset->toSparse();
-    {
-        parallel_for (long i = 0; i < m; i++) {
-            NodeID v = from_vertexset->dense_vertex_set_[i];
-            degrees[i] = g.out_degree(v);
-        }
-    }
-    uintT outDegrees = sequence::plusReduce(degrees, m);
-    if (m + outDegrees > numEdges / 20) {
-    from_vertexset->toDense();
-    parallel_for ( NodeID d=0; d < g.num_nodes(); d++) {
-      for(NodeID s : g.in_neigh(d)){
-        if (from_vertexset->bool_map_[s] ) { 
-          apply_func ( s , d );
-        }
-      } //end of loop on in neighbors
-    } //end of outer for loop
-} else {
-      parallel_for (long i=0; i < m; i++) {
-    NodeID s = from_vertexset->dense_vertex_set_[i];
-    int j = 0;
-        for(NodeID d : g.out_neigh(s)){
-          push_apply_func ( s , d  );
-        } //end of for loop on neighbors
-      }
-} //end of else
-} //end of edgeset apply function 
-struct updateEdge_push_ver
-{
-  void operator() (NodeID src, NodeID dst) 
-  {
-    writeAdd( &ngh_sum[dst], (array_of_struct_delta_out_degree[src].delta  / array_of_struct_delta_out_degree[src].out_degree ) ); 
-  };
-};
-struct generated_vector_op_apply_func_4
-{
-  void operator() (NodeID v) 
-  {
-    array_of_struct_delta_out_degree[v].out_degree  = generated_tmp_vector_3[v];
-  };
-};
-struct delta_generated_vector_op_apply_func_2
-{
-  void operator() (NodeID v) 
-  {
-    array_of_struct_delta_out_degree[v].delta  = (((float) 1)  / builtin_getVertices(edges) );
-  };
-};
-struct ngh_sum_generated_vector_op_apply_func_1
-{
-  void operator() (NodeID v) 
-  {
-    ngh_sum[v] = ((float) 0) ;
-  };
-};
-struct cur_rank_generated_vector_op_apply_func_0
-{
-  void operator() (NodeID v) 
-  {
-    cur_rank[v] = (0) ;
-  };
-};
-struct updateEdge
-{
-  void operator() (NodeID src, NodeID dst) 
-  {
-    ngh_sum[dst] += (array_of_struct_delta_out_degree[src].delta  / array_of_struct_delta_out_degree[src].out_degree );
-  };
-};
-struct updateVertexFirstRound
-{
-  bool operator() (NodeID v) 
-  {
-    bool output ;
-    array_of_struct_delta_out_degree[v].delta  = ((damp * ngh_sum[v]) + beta_score);
-    cur_rank[v] += array_of_struct_delta_out_degree[v].delta ;
-    array_of_struct_delta_out_degree[v].delta  = (array_of_struct_delta_out_degree[v].delta  - (((float) 1)  / builtin_getVertices(edges) ));
-    output = (fabs(array_of_struct_delta_out_degree[v].delta ) ) > ((epsilon2 * cur_rank[v]));
-    ngh_sum[v] = (0) ;
-    return output;
-  };
-};
-struct updateVertex
-{
-  bool operator() (NodeID v) 
-  {
-    bool output ;
-    array_of_struct_delta_out_degree[v].delta  = (ngh_sum[v] * damp);
-    cur_rank[v] += array_of_struct_delta_out_degree[v].delta ;
-    ngh_sum[v] = (0) ;
-    output = (fabs(array_of_struct_delta_out_degree[v].delta ) ) > ((epsilon2 * cur_rank[v]));
-    return output;
-  };
-};
-struct printRank
-{
-  void operator() (NodeID v) 
-  {
-    std::cout << cur_rank[v]<< std::endl;
-  };
-};
-struct reset
-{
-  void operator() (NodeID v) 
-  {
-    cur_rank[v] = (0) ;
-    ngh_sum[v] = ((float) 0) ;
-    array_of_struct_delta_out_degree[v].delta  = (((float) 1)  / builtin_getVertices(edges) );
-  };
-};
-int main(int argc, char * argv[])
-{
-  edges = builtin_loadEdgesFromFile ( argv[(1) ]) ;
-  cur_rank = new double [ builtin_getVertices(edges) ];
-  ngh_sum = new double [ builtin_getVertices(edges) ];
-  array_of_struct_delta_out_degree = new struct_delta_out_degree [ builtin_getVertices(edges) ];
-  generated_tmp_vector_3 = new int [ builtin_getVertices(edges) ];
-  damp = ((float) 0.85) ;
-  beta_score = ((((float) 1)  - damp) / builtin_getVertices(edges) );
-  epsilon2 = ((float) 0.1) ;
-  epsilon = ((float) 1e-07) ;
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    cur_rank_generated_vector_op_apply_func_0()(vertexsetapply_iter);
-  };
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    ngh_sum_generated_vector_op_apply_func_1()(vertexsetapply_iter);
-  };
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    delta_generated_vector_op_apply_func_2()(vertexsetapply_iter);
-  };
-  generated_tmp_vector_3 = builtin_getOutDegrees(edges) ;
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    generated_vector_op_apply_func_4()(vertexsetapply_iter);
-  };
-  int n = builtin_getVertices(edges) ;
-  for ( int trail = (0) ; trail < (10) ; trail++ )
-  {
-    startTimer() ;
-    VertexSubset<int> *  frontier = new VertexSubset<int> ( builtin_getVertices(edges)  , n);
-    parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-      reset()(vertexsetapply_iter);
-    };
-    for ( int i = (1) ; i < (11) ; i++ )
-    {
-      edgeset_apply_hybrid_dense_parallel_from_vertexset(edges, frontier, updateEdge(), updateEdge_push_ver()); 
-      if ((i) == ((1) ))
-       { 
-        auto ____graphit_tmp_out = new VertexSubset <NodeID> ( builtin_getVertices(edges)  , 0 );
-bool * next5 = newA(bool, builtin_getVertices(edges) );
-        parallel_for (int v = 0; v < builtin_getVertices(edges) ; v++) {
-          next5[v] = 0;
-if ( updateVertexFirstRound()( v ) )
-            next5[v] = 1;
-        } //end of loop
-____graphit_tmp_out->num_vertices_ = sequence::sum( next5, builtin_getVertices(edges)  );
-____graphit_tmp_out->bool_map_ = next5;
-
-        frontier  = ____graphit_tmp_out; 
-       } 
-      else
-       { 
-        auto ____graphit_tmp_out = new VertexSubset <NodeID> ( builtin_getVertices(edges)  , 0 );
-bool * next6 = newA(bool, builtin_getVertices(edges) );
-        parallel_for (int v = 0; v < builtin_getVertices(edges) ; v++) {
-          next6[v] = 0;
-if ( updateVertex()( v ) )
-            next6[v] = 1;
-        } //end of loop
-____graphit_tmp_out->num_vertices_ = sequence::sum( next6, builtin_getVertices(edges)  );
-____graphit_tmp_out->bool_map_ = next6;
-
-        frontier  = ____graphit_tmp_out; 
-
-       } 
-    }
-    double elapsed_time = stopTimer() ;
-    std::cout << "elapsed time: "<< std::endl;
-    std::cout << elapsed_time<< std::endl;
-  }
-};
-
diff --git a/graphit_eval/eval/table7/cpps/pagerankdelta_hybrid_dense_bitvec.cpp b/graphit_eval/eval/table7/cpps/pagerankdelta_hybrid_dense_bitvec.cpp
deleted file mode 100755
index a82f489f..00000000
--- a/graphit_eval/eval/table7/cpps/pagerankdelta_hybrid_dense_bitvec.cpp
+++ /dev/null
@@ -1,217 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include <algorithm>
-#include "intrinsics.h"
-Graph edges; 
-typedef struct struct_delta_out_degree { 
-  double delta;
-  int out_degree;
-} struct_delta_out_degree;
-double  * __restrict cur_rank;
-double  * __restrict ngh_sum;
-struct_delta_out_degree  * __restrict array_of_struct_delta_out_degree;
-int  * __restrict generated_tmp_vector_3;
-double damp; 
-double beta_score; 
-double epsilon2; 
-double epsilon; 
-template <typename APPLY_FUNC , typename PUSH_APPLY_FUNC> void edgeset_apply_hybrid_dense_parallel_from_vertexset_pull_frontier_bitvector(Graph & g , VertexSubset<NodeID>* from_vertexset, APPLY_FUNC apply_func, PUSH_APPLY_FUNC push_apply_func) 
-{ 
-    int64_t numVertices = g.num_nodes(), numEdges = g.num_edges();
-    from_vertexset->toSparse();
-    long m = from_vertexset->size();
-    // used to generate nonzero indices to get degrees
-    uintT *degrees = newA(uintT, m);
-    // We probably need this when we get something that doesn't have a dense set, not sure
-    // We can also write our own, the eixsting one doesn't quite work for bitvectors
-    //from_vertexset->toSparse();
-    {
-        parallel_for (long i = 0; i < m; i++) {
-            NodeID v = from_vertexset->dense_vertex_set_[i];
-            degrees[i] = g.out_degree(v);
-        }
-    }
-    uintT outDegrees = sequence::plusReduce(degrees, m);
-    if (m + outDegrees > numEdges / 20) {
-    from_vertexset->toDense();
-  Bitmap bitmap(numVertices);
-  bitmap.reset();
-  parallel_for(int i = 0; i < numVertices; i+=32){
-     int start = i;
-     int end = (((i + 32) < numVertices)? (i+32):numVertices);
-     for(int j = start; j < end; j++){
-        if (from_vertexset->bool_map_[j])
-          bitmap.set_bit(j);
-     }
-  }
-    parallel_for ( NodeID d=0; d < g.num_nodes(); d++) {
-      for(NodeID s : g.in_neigh(d)){
-        if (bitmap.get_bit(s)) { 
-          apply_func ( s , d );
-        }
-      } //end of loop on in neighbors
-    } //end of outer for loop
-} else {
-      parallel_for (long i=0; i < m; i++) {
-    NodeID s = from_vertexset->dense_vertex_set_[i];
-    int j = 0;
-        for(NodeID d : g.out_neigh(s)){
-          push_apply_func ( s , d  );
-        } //end of for loop on neighbors
-      }
-} //end of else
-} //end of edgeset apply function 
-struct updateEdge_push_ver
-{
-  void operator() (NodeID src, NodeID dst) 
-  {
-    writeAdd( &ngh_sum[dst], (array_of_struct_delta_out_degree[src].delta  / array_of_struct_delta_out_degree[src].out_degree ) ); 
-  };
-};
-struct generated_vector_op_apply_func_4
-{
-  void operator() (NodeID v) 
-  {
-    array_of_struct_delta_out_degree[v].out_degree  = generated_tmp_vector_3[v];
-  };
-};
-struct delta_generated_vector_op_apply_func_2
-{
-  void operator() (NodeID v) 
-  {
-    array_of_struct_delta_out_degree[v].delta  = (((float) 1)  / builtin_getVertices(edges) );
-  };
-};
-struct ngh_sum_generated_vector_op_apply_func_1
-{
-  void operator() (NodeID v) 
-  {
-    ngh_sum[v] = ((float) 0) ;
-  };
-};
-struct cur_rank_generated_vector_op_apply_func_0
-{
-  void operator() (NodeID v) 
-  {
-    cur_rank[v] = (0) ;
-  };
-};
-struct updateEdge
-{
-  void operator() (NodeID src, NodeID dst) 
-  {
-    ngh_sum[dst] += (array_of_struct_delta_out_degree[src].delta  / array_of_struct_delta_out_degree[src].out_degree );
-  };
-};
-struct updateVertexFirstRound
-{
-  bool operator() (NodeID v) 
-  {
-    bool output ;
-    array_of_struct_delta_out_degree[v].delta  = ((damp * ngh_sum[v]) + beta_score);
-    cur_rank[v] += array_of_struct_delta_out_degree[v].delta ;
-    array_of_struct_delta_out_degree[v].delta  = (array_of_struct_delta_out_degree[v].delta  - (((float) 1)  / builtin_getVertices(edges) ));
-    output = (fabs(array_of_struct_delta_out_degree[v].delta ) ) > ((epsilon2 * cur_rank[v]));
-    ngh_sum[v] = (0) ;
-    return output;
-  };
-};
-struct updateVertex
-{
-  bool operator() (NodeID v) 
-  {
-    bool output ;
-    array_of_struct_delta_out_degree[v].delta  = (ngh_sum[v] * damp);
-    cur_rank[v] += array_of_struct_delta_out_degree[v].delta ;
-    ngh_sum[v] = (0) ;
-    output = (fabs(array_of_struct_delta_out_degree[v].delta ) ) > ((epsilon2 * cur_rank[v]));
-    return output;
-  };
-};
-struct printRank
-{
-  void operator() (NodeID v) 
-  {
-    std::cout << cur_rank[v]<< std::endl;
-  };
-};
-struct reset
-{
-  void operator() (NodeID v) 
-  {
-    cur_rank[v] = (0) ;
-    ngh_sum[v] = ((float) 0) ;
-    array_of_struct_delta_out_degree[v].delta  = (((float) 1)  / builtin_getVertices(edges) );
-  };
-};
-int main(int argc, char * argv[])
-{
-  edges = builtin_loadEdgesFromFile ( argv[(1) ]) ;
-  cur_rank = new double [ builtin_getVertices(edges) ];
-  ngh_sum = new double [ builtin_getVertices(edges) ];
-  array_of_struct_delta_out_degree = new struct_delta_out_degree [ builtin_getVertices(edges) ];
-  generated_tmp_vector_3 = new int [ builtin_getVertices(edges) ];
-  damp = ((float) 0.85) ;
-  beta_score = ((((float) 1)  - damp) / builtin_getVertices(edges) );
-  epsilon2 = ((float) 0.1) ;
-  epsilon = ((float) 1e-07) ;
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    cur_rank_generated_vector_op_apply_func_0()(vertexsetapply_iter);
-  };
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    ngh_sum_generated_vector_op_apply_func_1()(vertexsetapply_iter);
-  };
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    delta_generated_vector_op_apply_func_2()(vertexsetapply_iter);
-  };
-  generated_tmp_vector_3 = builtin_getOutDegrees(edges) ;
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    generated_vector_op_apply_func_4()(vertexsetapply_iter);
-  };
-  int n = builtin_getVertices(edges) ;
-  for ( int trail = (0) ; trail < (10) ; trail++ )
-  {
-    startTimer() ;
-    VertexSubset<int> *  frontier = new VertexSubset<int> ( builtin_getVertices(edges)  , n);
-    parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-      reset()(vertexsetapply_iter);
-    };
-    for ( int i = (1) ; i < (11) ; i++ )
-    {
-      edgeset_apply_hybrid_dense_parallel_from_vertexset_pull_frontier_bitvector(edges, frontier, updateEdge(), updateEdge_push_ver()); 
-      if ((i) == ((1) ))
-       { 
-        auto ____graphit_tmp_out = new VertexSubset <NodeID> ( builtin_getVertices(edges)  , 0 );
-bool * next5 = newA(bool, builtin_getVertices(edges) );
-        parallel_for (int v = 0; v < builtin_getVertices(edges) ; v++) {
-          next5[v] = 0;
-if ( updateVertexFirstRound()( v ) )
-            next5[v] = 1;
-        } //end of loop
-____graphit_tmp_out->num_vertices_ = sequence::sum( next5, builtin_getVertices(edges)  );
-____graphit_tmp_out->bool_map_ = next5;
-
-        frontier  = ____graphit_tmp_out; 
-       } 
-      else
-       { 
-        auto ____graphit_tmp_out = new VertexSubset <NodeID> ( builtin_getVertices(edges)  , 0 );
-bool * next6 = newA(bool, builtin_getVertices(edges) );
-        parallel_for (int v = 0; v < builtin_getVertices(edges) ; v++) {
-          next6[v] = 0;
-if ( updateVertex()( v ) )
-            next6[v] = 1;
-        } //end of loop
-____graphit_tmp_out->num_vertices_ = sequence::sum( next6, builtin_getVertices(edges)  );
-____graphit_tmp_out->bool_map_ = next6;
-
-        frontier  = ____graphit_tmp_out; 
-
-       } 
-    }
-    double elapsed_time = stopTimer() ;
-    std::cout << "elapsed time: "<< std::endl;
-    std::cout << elapsed_time<< std::endl;
-  }
-};
-
diff --git a/graphit_eval/eval/table7/cpps/pagerankdelta_hybrid_dense_bitvec_numa.cpp b/graphit_eval/eval/table7/cpps/pagerankdelta_hybrid_dense_bitvec_numa.cpp
deleted file mode 100755
index 6b287013..00000000
--- a/graphit_eval/eval/table7/cpps/pagerankdelta_hybrid_dense_bitvec_numa.cpp
+++ /dev/null
@@ -1,259 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include <algorithm>
-#include "intrinsics.h"
-Graph edges; 
-typedef struct struct_delta_out_degree { 
-  double delta;
-  int out_degree;
-} struct_delta_out_degree;
-double  * __restrict cur_rank;
-double  * __restrict ngh_sum;
-struct_delta_out_degree  * __restrict array_of_struct_delta_out_degree;
-int  * __restrict generated_tmp_vector_3;
-double damp; 
-double beta_score; 
-double epsilon2; 
-double epsilon; 
-double  **local_ngh_sum;
-template <typename APPLY_FUNC , typename PUSH_APPLY_FUNC> void edgeset_apply_hybrid_dense_parallel_from_vertexset_pull_frontier_bitvector(Graph & g , VertexSubset<NodeID>* from_vertexset, APPLY_FUNC apply_func, PUSH_APPLY_FUNC push_apply_func) 
-{ 
-    int64_t numVertices = g.num_nodes(), numEdges = g.num_edges();
-    from_vertexset->toSparse();
-    long m = from_vertexset->size();
-    // used to generate nonzero indices to get degrees
-    uintT *degrees = newA(uintT, m);
-    // We probably need this when we get something that doesn't have a dense set, not sure
-    // We can also write our own, the eixsting one doesn't quite work for bitvectors
-    //from_vertexset->toSparse();
-    {
-        parallel_for (long i = 0; i < m; i++) {
-            NodeID v = from_vertexset->dense_vertex_set_[i];
-            degrees[i] = g.out_degree(v);
-        }
-    }
-    uintT outDegrees = sequence::plusReduce(degrees, m);
-    if (m + outDegrees > numEdges / 20) {
-    from_vertexset->toDense();
-  Bitmap bitmap(numVertices);
-  bitmap.reset();
-  parallel_for(int i = 0; i < numVertices; i+=32){
-     int start = i;
-     int end = (((i + 32) < numVertices)? (i+32):numVertices);
-     for(int j = start; j < end; j++){
-        if (from_vertexset->bool_map_[j])
-          bitmap.set_bit(j);
-     }
-  }
-    parallel_for (int n = 0; n < numVertices; n++) {
-    for (int socketId = 0; socketId < omp_get_num_places(); socketId++) {
-      local_ngh_sum[socketId][n] = ngh_sum[n];
-    }
-  }
-  int numPlaces = omp_get_num_places();
-    int numSegments = g.getNumSegments("s1");
-    int segmentsPerSocket = (numSegments + numPlaces - 1) / numPlaces;
-#pragma omp parallel num_threads(numPlaces) proc_bind(spread)
-{
-    int socketId = omp_get_place_num();
-    for (int i = 0; i < segmentsPerSocket; i++) {
-      int segmentId = socketId + i * numPlaces;
-      if (segmentId >= numSegments) break;
-      auto sg = g.getSegmentedGraph(std::string("s1"), segmentId);
-#pragma omp parallel num_threads(omp_get_place_num_procs(socketId)) proc_bind(close)
-{
-#pragma omp for schedule(dynamic, 1024)
-for ( NodeID localId=0; localId < sg->numVertices; localId++) {
-      NodeID d = sg->graphId[localId];
-      for (int64_t ngh = sg->vertexArray[localId]; ngh < sg->vertexArray[localId+1]; ngh++) {
-        NodeID s = sg->edgeArray[ngh];
-        if (bitmap.get_bit(s)) { 
-          apply_func ( s , d , socketId);
-        }
-      } //end of loop on in neighbors
-    } //end of outer for loop
-} // end of per-socket parallel_for
-    } // end of segment for loop
-}// end of per-socket parallel region
-
-  parallel_for (int n = 0; n < numVertices; n++) {
-    for (int socketId = 0; socketId < omp_get_num_places(); socketId++) {
-      ngh_sum[n] += local_ngh_sum[socketId][n];
-    }
-  }
-} else {
-      parallel_for (long i=0; i < m; i++) {
-    NodeID s = from_vertexset->dense_vertex_set_[i];
-    int j = 0;
-        for(NodeID d : g.out_neigh(s)){
-          push_apply_func ( s , d  );
-        } //end of for loop on neighbors
-      }
-} //end of else
-} //end of edgeset apply function 
-struct updateEdge_push_ver
-{
-  void operator() (NodeID src, NodeID dst) 
-  {
-    writeAdd( &ngh_sum[dst], (array_of_struct_delta_out_degree[src].delta  / array_of_struct_delta_out_degree[src].out_degree ) ); 
-  };
-};
-struct generated_vector_op_apply_func_4
-{
-  void operator() (NodeID v) 
-  {
-    array_of_struct_delta_out_degree[v].out_degree  = generated_tmp_vector_3[v];
-  };
-};
-struct delta_generated_vector_op_apply_func_2
-{
-  void operator() (NodeID v) 
-  {
-    array_of_struct_delta_out_degree[v].delta  = (((float) 1)  / builtin_getVertices(edges) );
-  };
-};
-struct ngh_sum_generated_vector_op_apply_func_1
-{
-  void operator() (NodeID v) 
-  {
-    ngh_sum[v] = ((float) 0) ;
-  };
-};
-struct cur_rank_generated_vector_op_apply_func_0
-{
-  void operator() (NodeID v) 
-  {
-    cur_rank[v] = (0) ;
-  };
-};
-struct updateEdge
-{
-  void operator() (NodeID src, NodeID dst, int socketId) 
-  {
-    local_ngh_sum[socketId][dst] += (array_of_struct_delta_out_degree[src].delta  / array_of_struct_delta_out_degree[src].out_degree );
-  };
-};
-struct updateVertexFirstRound
-{
-  bool operator() (NodeID v) 
-  {
-    bool output ;
-    array_of_struct_delta_out_degree[v].delta  = ((damp * ngh_sum[v]) + beta_score);
-    cur_rank[v] += array_of_struct_delta_out_degree[v].delta ;
-    array_of_struct_delta_out_degree[v].delta  = (array_of_struct_delta_out_degree[v].delta  - (((float) 1)  / builtin_getVertices(edges) ));
-    output = (fabs(array_of_struct_delta_out_degree[v].delta ) ) > ((epsilon2 * cur_rank[v]));
-    ngh_sum[v] = (0) ;
-    return output;
-  };
-};
-struct updateVertex
-{
-  bool operator() (NodeID v) 
-  {
-    bool output ;
-    array_of_struct_delta_out_degree[v].delta  = (ngh_sum[v] * damp);
-    cur_rank[v] += array_of_struct_delta_out_degree[v].delta ;
-    ngh_sum[v] = (0) ;
-    output = (fabs(array_of_struct_delta_out_degree[v].delta ) ) > ((epsilon2 * cur_rank[v]));
-    return output;
-  };
-};
-struct printRank
-{
-  void operator() (NodeID v) 
-  {
-    std::cout << cur_rank[v]<< std::endl;
-  };
-};
-struct reset
-{
-  void operator() (NodeID v) 
-  {
-    cur_rank[v] = (0) ;
-    ngh_sum[v] = ((float) 0) ;
-    array_of_struct_delta_out_degree[v].delta  = (((float) 1)  / builtin_getVertices(edges) );
-  };
-};
-int main(int argc, char * argv[])
-{
-  edges = builtin_loadEdgesFromFile ( argv[(1) ]) ;
-  edges.buildPullSegmentedGraphs("s1", atoi(argv[2]), true);
-  cur_rank = new double [ builtin_getVertices(edges) ];
-  ngh_sum = new double [ builtin_getVertices(edges) ];
-  array_of_struct_delta_out_degree = new struct_delta_out_degree [ builtin_getVertices(edges) ];
-  generated_tmp_vector_3 = new int [ builtin_getVertices(edges) ];
-  damp = ((float) 0.85) ;
-  beta_score = ((((float) 1)  - damp) / builtin_getVertices(edges) );
-  epsilon2 = ((float) 0.1) ;
-  epsilon = ((float) 1e-07) ;
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    cur_rank_generated_vector_op_apply_func_0()(vertexsetapply_iter);
-  };
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    ngh_sum_generated_vector_op_apply_func_1()(vertexsetapply_iter);
-  };
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    delta_generated_vector_op_apply_func_2()(vertexsetapply_iter);
-  };
-  generated_tmp_vector_3 = builtin_getOutDegrees(edges) ;
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    generated_vector_op_apply_func_4()(vertexsetapply_iter);
-  };
-  local_ngh_sum = new double *[omp_get_num_places()];
-  for (int socketId = 0; socketId < omp_get_num_places(); socketId++) {
-    local_ngh_sum[socketId] = (double *)numa_alloc_onnode(sizeof(double ) * builtin_getVertices(edges) , socketId);
-    parallel_for (int n = 0; n < builtin_getVertices(edges) ; n++) {
-      local_ngh_sum[socketId][n] = ngh_sum[n];
-    }
-  }
-  omp_set_nested(1);
-  int n = builtin_getVertices(edges) ;
-  for ( int trail = (0) ; trail < (10) ; trail++ )
-  {
-    startTimer() ;
-    VertexSubset<int> *  frontier = new VertexSubset<int> ( builtin_getVertices(edges)  , n);
-    parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-      reset()(vertexsetapply_iter);
-    };
-    for ( int i = (1) ; i < (11) ; i++ )
-    {
-      edgeset_apply_hybrid_dense_parallel_from_vertexset_pull_frontier_bitvector(edges, frontier, updateEdge(), updateEdge_push_ver()); 
-      if ((i) == ((1) ))
-       { 
-        auto ____graphit_tmp_out = new VertexSubset <NodeID> ( builtin_getVertices(edges)  , 0 );
-bool * next5 = newA(bool, builtin_getVertices(edges) );
-        parallel_for (int v = 0; v < builtin_getVertices(edges) ; v++) {
-          next5[v] = 0;
-if ( updateVertexFirstRound()( v ) )
-            next5[v] = 1;
-        } //end of loop
-____graphit_tmp_out->num_vertices_ = sequence::sum( next5, builtin_getVertices(edges)  );
-____graphit_tmp_out->bool_map_ = next5;
-
-        frontier  = ____graphit_tmp_out; 
-       } 
-      else
-       { 
-        auto ____graphit_tmp_out = new VertexSubset <NodeID> ( builtin_getVertices(edges)  , 0 );
-bool * next6 = newA(bool, builtin_getVertices(edges) );
-        parallel_for (int v = 0; v < builtin_getVertices(edges) ; v++) {
-          next6[v] = 0;
-if ( updateVertex()( v ) )
-            next6[v] = 1;
-        } //end of loop
-____graphit_tmp_out->num_vertices_ = sequence::sum( next6, builtin_getVertices(edges)  );
-____graphit_tmp_out->bool_map_ = next6;
-
-        frontier  = ____graphit_tmp_out; 
-
-       } 
-    }
-    double elapsed_time = stopTimer() ;
-    std::cout << "elapsed time: "<< std::endl;
-    std::cout << elapsed_time<< std::endl;
-  }
-  for (int socketId = 0; socketId < omp_get_num_places(); socketId++) {
-    numa_free(local_ngh_sum[socketId], sizeof(double ) * builtin_getVertices(edges) );
-  }
-};
-
diff --git a/graphit_eval/eval/table7/cpps/pagerankdelta_hybrid_dense_bitvec_segment.cpp b/graphit_eval/eval/table7/cpps/pagerankdelta_hybrid_dense_bitvec_segment.cpp
deleted file mode 100755
index 899846db..00000000
--- a/graphit_eval/eval/table7/cpps/pagerankdelta_hybrid_dense_bitvec_segment.cpp
+++ /dev/null
@@ -1,223 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include <algorithm>
-#include "intrinsics.h"
-Graph edges; 
-typedef struct struct_delta_out_degree { 
-  double delta;
-  int out_degree;
-} struct_delta_out_degree;
-double  * __restrict cur_rank;
-double  * __restrict ngh_sum;
-struct_delta_out_degree  * __restrict array_of_struct_delta_out_degree;
-int  * __restrict generated_tmp_vector_3;
-double damp; 
-double beta_score; 
-double epsilon2; 
-double epsilon; 
-template <typename APPLY_FUNC , typename PUSH_APPLY_FUNC> void edgeset_apply_hybrid_dense_parallel_from_vertexset_pull_frontier_bitvector(Graph & g , VertexSubset<NodeID>* from_vertexset, APPLY_FUNC apply_func, PUSH_APPLY_FUNC push_apply_func) 
-{ 
-    int64_t numVertices = g.num_nodes(), numEdges = g.num_edges();
-    from_vertexset->toSparse();
-    long m = from_vertexset->size();
-    // used to generate nonzero indices to get degrees
-    uintT *degrees = newA(uintT, m);
-    // We probably need this when we get something that doesn't have a dense set, not sure
-    // We can also write our own, the eixsting one doesn't quite work for bitvectors
-    //from_vertexset->toSparse();
-    {
-        parallel_for (long i = 0; i < m; i++) {
-            NodeID v = from_vertexset->dense_vertex_set_[i];
-            degrees[i] = g.out_degree(v);
-        }
-    }
-    uintT outDegrees = sequence::plusReduce(degrees, m);
-    if (m + outDegrees > numEdges / 20) {
-    from_vertexset->toDense();
-  Bitmap bitmap(numVertices);
-  bitmap.reset();
-  parallel_for(int i = 0; i < numVertices; i+=32){
-     int start = i;
-     int end = (((i + 32) < numVertices)? (i+32):numVertices);
-     for(int j = start; j < end; j++){
-        if (from_vertexset->bool_map_[j])
-          bitmap.set_bit(j);
-     }
-  }
-      for (int segmentId = 0; segmentId < g.getNumSegments("s1"); segmentId++) {
-      auto sg = g.getSegmentedGraph(std::string("s1"), segmentId);
-parallel_for ( NodeID localId=0; localId < sg->numVertices; localId++) {
-      NodeID d = sg->graphId[localId];
-      for (int64_t ngh = sg->vertexArray[localId]; ngh < sg->vertexArray[localId+1]; ngh++) {
-        NodeID s = sg->edgeArray[ngh];
-        if (bitmap.get_bit(s)) { 
-          apply_func ( s , d );
-        }
-      } //end of loop on in neighbors
-    } //end of outer for loop
-    } // end of segment for loop
-} else {
-      parallel_for (long i=0; i < m; i++) {
-    NodeID s = from_vertexset->dense_vertex_set_[i];
-    int j = 0;
-        for(NodeID d : g.out_neigh(s)){
-          push_apply_func ( s , d  );
-        } //end of for loop on neighbors
-      }
-} //end of else
-} //end of edgeset apply function 
-struct updateEdge_push_ver
-{
-  void operator() (NodeID src, NodeID dst) 
-  {
-    writeAdd( &ngh_sum[dst], (array_of_struct_delta_out_degree[src].delta  / array_of_struct_delta_out_degree[src].out_degree ) ); 
-  };
-};
-struct generated_vector_op_apply_func_4
-{
-  void operator() (NodeID v) 
-  {
-    array_of_struct_delta_out_degree[v].out_degree  = generated_tmp_vector_3[v];
-  };
-};
-struct delta_generated_vector_op_apply_func_2
-{
-  void operator() (NodeID v) 
-  {
-    array_of_struct_delta_out_degree[v].delta  = (((float) 1)  / builtin_getVertices(edges) );
-  };
-};
-struct ngh_sum_generated_vector_op_apply_func_1
-{
-  void operator() (NodeID v) 
-  {
-    ngh_sum[v] = ((float) 0) ;
-  };
-};
-struct cur_rank_generated_vector_op_apply_func_0
-{
-  void operator() (NodeID v) 
-  {
-    cur_rank[v] = (0) ;
-  };
-};
-struct updateEdge
-{
-  void operator() (NodeID src, NodeID dst) 
-  {
-    ngh_sum[dst] += (array_of_struct_delta_out_degree[src].delta  / array_of_struct_delta_out_degree[src].out_degree );
-  };
-};
-struct updateVertexFirstRound
-{
-  bool operator() (NodeID v) 
-  {
-    bool output ;
-    array_of_struct_delta_out_degree[v].delta  = ((damp * ngh_sum[v]) + beta_score);
-    cur_rank[v] += array_of_struct_delta_out_degree[v].delta ;
-    array_of_struct_delta_out_degree[v].delta  = (array_of_struct_delta_out_degree[v].delta  - (((float) 1)  / builtin_getVertices(edges) ));
-    output = (fabs(array_of_struct_delta_out_degree[v].delta ) ) > ((epsilon2 * cur_rank[v]));
-    ngh_sum[v] = (0) ;
-    return output;
-  };
-};
-struct updateVertex
-{
-  bool operator() (NodeID v) 
-  {
-    bool output ;
-    array_of_struct_delta_out_degree[v].delta  = (ngh_sum[v] * damp);
-    cur_rank[v] += array_of_struct_delta_out_degree[v].delta ;
-    ngh_sum[v] = (0) ;
-    output = (fabs(array_of_struct_delta_out_degree[v].delta ) ) > ((epsilon2 * cur_rank[v]));
-    return output;
-  };
-};
-struct printRank
-{
-  void operator() (NodeID v) 
-  {
-    std::cout << cur_rank[v]<< std::endl;
-  };
-};
-struct reset
-{
-  void operator() (NodeID v) 
-  {
-    cur_rank[v] = (0) ;
-    ngh_sum[v] = ((float) 0) ;
-    array_of_struct_delta_out_degree[v].delta  = (((float) 1)  / builtin_getVertices(edges) );
-  };
-};
-int main(int argc, char * argv[])
-{
-  edges = builtin_loadEdgesFromFile ( argv[(1) ]) ;
-  edges.buildPullSegmentedGraphs("s1", atoi(argv[2]));
-  cur_rank = new double [ builtin_getVertices(edges) ];
-  ngh_sum = new double [ builtin_getVertices(edges) ];
-  array_of_struct_delta_out_degree = new struct_delta_out_degree [ builtin_getVertices(edges) ];
-  generated_tmp_vector_3 = new int [ builtin_getVertices(edges) ];
-  damp = ((float) 0.85) ;
-  beta_score = ((((float) 1)  - damp) / builtin_getVertices(edges) );
-  epsilon2 = ((float) 0.1) ;
-  epsilon = ((float) 1e-07) ;
-  parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-    cur_rank_generated_vector_op_apply_func_0()(i);
-  };
-  parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-    ngh_sum_generated_vector_op_apply_func_1()(i);
-  };
-  parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-    delta_generated_vector_op_apply_func_2()(i);
-  };
-  generated_tmp_vector_3 = builtin_getOutDegrees(edges) ;
-  parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-    generated_vector_op_apply_func_4()(i);
-  };
-  for ( int trail = (0) ; trail < (10) ; trail++ )
-  {
-    int n = builtin_getVertices(edges) ;
-    VertexSubset<int> *  frontier = new VertexSubset<int> ( builtin_getVertices(edges)  , n);
-    parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-      reset()(i);
-    };
-    startTimer() ;
-    for ( int i = (1) ; i < (11) ; i++ )
-    {
-      edgeset_apply_hybrid_dense_parallel_from_vertexset_pull_frontier_bitvector(edges, frontier, updateEdge(), updateEdge_push_ver()); 
-      if ((i) == ((1) ))
-       { 
-        auto ____graphit_tmp_out = new VertexSubset <NodeID> ( builtin_getVertices(edges)  , 0 );
-bool * next5 = newA(bool, builtin_getVertices(edges) );
-        parallel_for (int v = 0; v < builtin_getVertices(edges) ; v++) {
-          next5[v] = 0;
-if ( updateVertexFirstRound()( v ) )
-            next5[v] = 1;
-        } //end of loop
-____graphit_tmp_out->num_vertices_ = sequence::sum( next5, builtin_getVertices(edges)  );
-____graphit_tmp_out->bool_map_ = next5;
-
-        frontier  = ____graphit_tmp_out; 
-       } 
-      else
-       { 
-        auto ____graphit_tmp_out = new VertexSubset <NodeID> ( builtin_getVertices(edges)  , 0 );
-bool * next6 = newA(bool, builtin_getVertices(edges) );
-        parallel_for (int v = 0; v < builtin_getVertices(edges) ; v++) {
-          next6[v] = 0;
-if ( updateVertex()( v ) )
-            next6[v] = 1;
-        } //end of loop
-____graphit_tmp_out->num_vertices_ = sequence::sum( next6, builtin_getVertices(edges)  );
-____graphit_tmp_out->bool_map_ = next6;
-
-        frontier  = ____graphit_tmp_out; 
-
-       } 
-    }
-    double elapsed_time = stopTimer() ;
-    std::cout << "elapsed time: "<< std::endl;
-    std::cout << elapsed_time<< std::endl;
-  }
-};
-
diff --git a/graphit_eval/eval/table7/cpps/pagerankdelta_sparse_push.cpp b/graphit_eval/eval/table7/cpps/pagerankdelta_sparse_push.cpp
deleted file mode 100644
index f48800f6..00000000
--- a/graphit_eval/eval/table7/cpps/pagerankdelta_sparse_push.cpp
+++ /dev/null
@@ -1,163 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include <algorithm>
-#include "intrinsics.h"
-Graph edges; 
-typedef struct struct_delta_out_degree { 
-  double delta;
-  int out_degree;
-} struct_delta_out_degree;
-double  * __restrict cur_rank;
-double  * __restrict ngh_sum;
-struct_delta_out_degree  * __restrict array_of_struct_delta_out_degree;
-int  * __restrict generated_tmp_vector_3;
-double damp; 
-double beta_score; 
-double epsilon2; 
-double epsilon; 
-template <typename APPLY_FUNC > void edgeset_apply_push_parallel_from_vertexset(Graph & g , VertexSubset<NodeID>* from_vertexset, APPLY_FUNC apply_func) 
-{ 
-    int64_t numVertices = g.num_nodes(), numEdges = g.num_edges();
-
-    from_vertexset->toSparse();
-    long m = from_vertexset->size();
-  parallel_for (long i=0; i < m; i++) {
-    NodeID s = from_vertexset->dense_vertex_set_[i];
-    int j = 0;
-    for(NodeID d : g.out_neigh(s)){
-      apply_func ( s , d  );
-    } //end of for loop on neighbors
-  }
-} //end of edgeset apply function 
-struct generated_vector_op_apply_func_4
-{
-  void operator() (NodeID v) 
-  {
-    array_of_struct_delta_out_degree[v].out_degree  = generated_tmp_vector_3[v];
-  };
-};
-struct delta_generated_vector_op_apply_func_2
-{
-  void operator() (NodeID v) 
-  {
-    array_of_struct_delta_out_degree[v].delta  = (((float) 1)  / builtin_getVertices(edges) );
-  };
-};
-struct ngh_sum_generated_vector_op_apply_func_1
-{
-  void operator() (NodeID v) 
-  {
-    ngh_sum[v] = ((float) 0) ;
-  };
-};
-struct cur_rank_generated_vector_op_apply_func_0
-{
-  void operator() (NodeID v) 
-  {
-    cur_rank[v] = (0) ;
-  };
-};
-struct updateEdge
-{
-  void operator() (NodeID src, NodeID dst) 
-  {
-    writeAdd( &ngh_sum[dst], (array_of_struct_delta_out_degree[src].delta  / array_of_struct_delta_out_degree[src].out_degree ) ); 
-  };
-};
-struct updateVertexFirstRound
-{
-  bool operator() (NodeID v) 
-  {
-    bool output ;
-    array_of_struct_delta_out_degree[v].delta  = ((damp * ngh_sum[v]) + beta_score);
-    cur_rank[v] += array_of_struct_delta_out_degree[v].delta ;
-    array_of_struct_delta_out_degree[v].delta  = (array_of_struct_delta_out_degree[v].delta  - (((float) 1)  / builtin_getVertices(edges) ));
-    output = (fabs(array_of_struct_delta_out_degree[v].delta ) ) > ((epsilon2 * cur_rank[v]));
-    ngh_sum[v] = (0) ;
-    return output;
-  };
-};
-struct updateVertex
-{
-  bool operator() (NodeID v) 
-  {
-    bool output ;
-    array_of_struct_delta_out_degree[v].delta  = (ngh_sum[v] * damp);
-    cur_rank[v] += array_of_struct_delta_out_degree[v].delta ;
-    ngh_sum[v] = (0) ;
-    output = (fabs(array_of_struct_delta_out_degree[v].delta ) ) > ((epsilon2 * cur_rank[v]));
-    return output;
-  };
-};
-struct printRank
-{
-  void operator() (NodeID v) 
-  {
-    std::cout << cur_rank[v]<< std::endl;
-  };
-};
-int main(int argc, char * argv[])
-{
-  edges = builtin_loadEdgesFromFile ( argv[(1) ]) ;
-  cur_rank = new double [ builtin_getVertices(edges) ];
-  ngh_sum = new double [ builtin_getVertices(edges) ];
-  array_of_struct_delta_out_degree = new struct_delta_out_degree [ builtin_getVertices(edges) ];
-  generated_tmp_vector_3 = new int [ builtin_getVertices(edges) ];
-  damp = ((float) 0.85) ;
-  beta_score = ((((float) 1)  - damp) / builtin_getVertices(edges) );
-  epsilon2 = ((float) 0.1) ;
-  epsilon = ((float) 1e-07) ;
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    cur_rank_generated_vector_op_apply_func_0()(vertexsetapply_iter);
-  };
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    ngh_sum_generated_vector_op_apply_func_1()(vertexsetapply_iter);
-  };
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    delta_generated_vector_op_apply_func_2()(vertexsetapply_iter);
-  };
-  generated_tmp_vector_3 = builtin_getOutDegrees(edges) ;
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    generated_vector_op_apply_func_4()(vertexsetapply_iter);
-  };
-  int n = builtin_getVertices(edges) ;
-  VertexSubset<int> *  frontier = new VertexSubset<int> ( builtin_getVertices(edges)  , n);
-  startTimer() ;
-  for ( int i = (1) ; i < (11) ; i++ )
-  {
-    edgeset_apply_push_parallel_from_vertexset(edges, frontier, updateEdge()); 
-    if ((i) == ((1) ))
-     { 
-      auto ____graphit_tmp_out = new VertexSubset <NodeID> ( builtin_getVertices(edges)  , 0 );
-bool * next5 = newA(bool, builtin_getVertices(edges) );
-      parallel_for (int v = 0; v < builtin_getVertices(edges) ; v++) {
-        next5[v] = 0;
-if ( updateVertexFirstRound()( v ) )
-          next5[v] = 1;
-      } //end of loop
-____graphit_tmp_out->num_vertices_ = sequence::sum( next5, builtin_getVertices(edges)  );
-____graphit_tmp_out->bool_map_ = next5;
-
-      frontier  = ____graphit_tmp_out; 
-     } 
-    else
-     { 
-      auto ____graphit_tmp_out = new VertexSubset <NodeID> ( builtin_getVertices(edges)  , 0 );
-bool * next6 = newA(bool, builtin_getVertices(edges) );
-      parallel_for (int v = 0; v < builtin_getVertices(edges) ; v++) {
-        next6[v] = 0;
-if ( updateVertex()( v ) )
-          next6[v] = 1;
-      } //end of loop
-____graphit_tmp_out->num_vertices_ = sequence::sum( next6, builtin_getVertices(edges)  );
-____graphit_tmp_out->bool_map_ = next6;
-
-      frontier  = ____graphit_tmp_out; 
-
-     } 
-  }
-  double elapsed_time = stopTimer() ;
-  std::cout << "elapsed time: "<< std::endl;
-  std::cout << elapsed_time<< std::endl;
-};
-
diff --git a/graphit_eval/eval/table7/cpps/sssp_hybrid_denseforward.cpp b/graphit_eval/eval/table7/cpps/sssp_hybrid_denseforward.cpp
deleted file mode 100755
index 0ca00d67..00000000
--- a/graphit_eval/eval/table7/cpps/sssp_hybrid_denseforward.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include <algorithm>
-#include "intrinsics.h"
-WGraph edges;
-int  * __restrict SP;
-template <typename APPLY_FUNC > VertexSubset<NodeID>* edgeset_apply_hybrid_denseforward_parallel_weighted_deduplicatied_from_vertexset_with_frontier(WGraph & g , VertexSubset<NodeID>* from_vertexset, APPLY_FUNC apply_func) 
-{ 
-    int64_t numVertices = g.num_nodes(), numEdges = g.num_edges();
-    from_vertexset->toSparse();
-    long m = from_vertexset->size();
-    // used to generate nonzero indices to get degrees
-    uintT *degrees = newA(uintT, m);
-    // We probably need this when we get something that doesn't have a dense set, not sure
-    // We can also write our own, the eixsting one doesn't quite work for bitvectors
-    //from_vertexset->toSparse();
-    {
-        parallel_for (long i = 0; i < m; i++) {
-           NodeID v = from_vertexset->dense_vertex_set_[i];
-            degrees[i] = g.out_degree(v);
-        }
-    }
-    uintT outDegrees = sequence::plusReduce(degrees, m);
-    if (m + outDegrees > numEdges / 20) {
-  VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-  bool * next = newA(bool, g.num_nodes());
-  parallel_for (int i = 0; i < numVertices; i++)next[i] = 0;
-    from_vertexset->toDense();
-    parallel_for ( NodeID s=0; s < g.num_nodes(); s++) {
-        if (from_vertexset->bool_map_[s] ) { 
-          for(WNode d : g.out_neigh(s)){
-            if(  apply_func ( s , d.v, d.w ) ) { 
-              next[d.v] = 1; 
-            }
-          } // end of inner for loop
-        } // end of if for from func or from vertexset
-      } //end of outer for loop
-  next_frontier->num_vertices_ = sequence::sum(next, numVertices);
-  free(next_frontier->bool_map_);
-  next_frontier->bool_map_ = next;
-  free(degrees);
-  return next_frontier;
-} else {
-    if (g.flags_ == nullptr){
-      g.flags_ = new int[numVertices]();
-      parallel_for(int i = 0; i < numVertices; i++) g.flags_[i]=0;
-    }
-    VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-    if (numVertices != from_vertexset->getVerticesRange()) {
-        cout << "edgeMap: Sizes Don't match" << endl;
-        abort();
-    }
-    if (outDegrees == 0) {
-      free(degrees);
-      return next_frontier;
-    }
-    uintT *offsets = degrees;
-    long outEdgeCount = sequence::plusScan(offsets, degrees, m);
-    uintE *outEdges = newA(uintE, outEdgeCount);
-        parallel_for (long i=0; i < m; i++) {
-    NodeID s = from_vertexset->dense_vertex_set_[i];
-    int j = 0;
-    uintT offset = offsets[i];
-          for(WNode d : g.out_neigh(s)){
-            if( apply_func ( s , d.v, d.w ) && CAS(&(g.flags_[d.v]), 0, 1)  ) { 
-              outEdges[offset + j] = d.v; 
-            } else { outEdges[offset + j] = UINT_E_MAX; }
-            j++;
-          } //end of for loop on neighbors
-        }
-  uintE *nextIndices = newA(uintE, outEdgeCount);
-  long nextM = sequence::filter(outEdges, nextIndices, outEdgeCount, nonMaxF());
-  free(outEdges);
-  free(degrees);
-  next_frontier->num_vertices_ = nextM;
-  delete[] next_frontier->dense_vertex_set_;
-  next_frontier->dense_vertex_set_ = nextIndices;
-  parallel_for(int i = 0; i < nextM; i++){
-     g.flags_[nextIndices[i]] = 0;
-  }
-  return next_frontier;
-} //end of else
-} //end of edgeset apply function 
-struct SP_generated_vector_op_apply_func_0
-{
-  void operator() (NodeID v) 
-  {
-    SP[v] = (2147483647) ;
-  };
-};
-struct updateEdge
-{
-  bool operator() (NodeID src, NodeID dst, int weight) 
-  {
-    bool output2 ;
-    bool SP_trackving_var_1 = (bool) 0;
-    SP_trackving_var_1 = writeMin( &SP[dst], (SP[src] + weight) ); 
-    output2 = SP_trackving_var_1;
-    return output2;
-  };
-};
-struct reset
-{
-  void operator() (NodeID v) 
-  {
-    SP[v] = (2147483647) ;
-  };
-};
-int main(int argc, char * argv[])
-{
-  edges = builtin_loadWeightedEdgesFromFile ( argv[(1) ]) ;
-  SP = new int [ builtin_getVertices(edges) ];
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    SP_generated_vector_op_apply_func_0()(vertexsetapply_iter);
-  };
-  for ( int trail = (0) ; trail < (10) ; trail++ )
-  {
-    startTimer() ;
-    parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-      reset()(vertexsetapply_iter);
-    };
-    int n = builtin_getVertices(edges) ;
-    VertexSubset<int> *  frontier = new VertexSubset<int> ( builtin_getVertices(edges)  , (0) );
-    builtin_addVertex(frontier, atoi(argv[2]) ) ;
-    SP[atoi(argv[2]) ] = atoi(argv[2]) ;
-    int rounds = (0) ;
-    while ( (builtin_getVertexSetSize(frontier) ) != ((0) ))
-    {
-      frontier = edgeset_apply_hybrid_denseforward_parallel_weighted_deduplicatied_from_vertexset_with_frontier(edges, frontier, updateEdge()); 
-      rounds = (rounds + (1) );
-      if ((rounds) == (n))
-       { 
-        std::cout << "negative cycle"<< std::endl;
-        break;
-       } 
-    }
-    float elapsed_time = stopTimer() ;
-    std::cout << "elapsed time: "<< std::endl;
-    std::cout << elapsed_time<< std::endl;
-    std::cout << "rounds"<< std::endl;
-    std::cout << rounds<< std::endl;
-  }
-};
-
diff --git a/graphit_eval/eval/table7/cpps/sssp_hybrid_denseforward_numa.cpp b/graphit_eval/eval/table7/cpps/sssp_hybrid_denseforward_numa.cpp
deleted file mode 100755
index 25a089bd..00000000
--- a/graphit_eval/eval/table7/cpps/sssp_hybrid_denseforward_numa.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include <algorithm>
-#include "intrinsics.h"
-WGraph edges;
-int  * __restrict SP;
-template <typename APPLY_FUNC > VertexSubset<NodeID>* edgeset_apply_hybrid_denseforward_parallel_weighted_deduplicatied_from_vertexset_with_frontier(WGraph & g , VertexSubset<NodeID>* from_vertexset, APPLY_FUNC apply_func) 
-{ 
-    int64_t numVertices = g.num_nodes(), numEdges = g.num_edges();
-    from_vertexset->toSparse();
-    long m = from_vertexset->size();
-    // used to generate nonzero indices to get degrees
-    uintT *degrees = newA(uintT, m);
-    // We probably need this when we get something that doesn't have a dense set, not sure
-    // We can also write our own, the eixsting one doesn't quite work for bitvectors
-    //from_vertexset->toSparse();
-    {
-        parallel_for (long i = 0; i < m; i++) {
-            NodeID v = from_vertexset->dense_vertex_set_[i];
-            degrees[i] = g.out_degree(v);
-        }
-    }
-    uintT outDegrees = sequence::plusReduce(degrees, m);
-    if (m + outDegrees > numEdges / 20) {
-      VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-      bool * next = newA(bool, g.num_nodes());
-      parallel_for (int i = 0; i < numVertices; i++)next[i] = 0;
-      from_vertexset->toDense();
-      
-
-    // parallel_for ( NodeID s=0; s < g.num_nodes(); s++) {
-    //     if (from_vertexset->bool_map_[s] ) { 
-    //       for(WNode d : g.out_neigh(s)){
-    //         if(  apply_func ( s , d.v, d.w ) ) { 
-    //           next[d.v] = 1; 
-    //         }
-    //       } // end of inner for loop
-    //     } // end of if for from func or from vertexset
-    //   } //end of outer for loop
-    int numPlaces = omp_get_num_places();
-    int numSegments = g.getNumSegments("s1");
-    int segmentsPerSocket = (numSegments + numPlaces - 1) / numPlaces;
-#pragma omp parallel num_threads(numPlaces) proc_bind(spread)
-    {
-      int socketId = omp_get_place_num();
-      for (int i = 0; i < segmentsPerSocket; i++) {
-	int segmentId = socketId + i * numPlaces;
-	if (segmentId >= numSegments) break;
-	auto sg = g.getSegmentedGraph(std::string("s1"), segmentId);
-#pragma omp parallel num_threads(omp_get_place_num_procs(socketId)) proc_bind(close)
-	{
-#pragma omp for schedule(dynamic, 1024)
-	  for ( NodeID localId=0; localId < sg->numVertices; localId++) {
-	    NodeID s = sg->graphId[localId];
-	    if (from_vertexset->bool_map_[s] ) { 
-	      for (int64_t ngh = sg->vertexArray[localId]; ngh < sg->vertexArray[localId+1]; ngh++) {
-		WNode d = sg->edgeArray[ngh];
-		if(  apply_func ( s , d.v, d.w ) ) {
-		  next[d.v] = 1;
-		}
-	      } //end of loop on in neighbors
-	    } //end of outer for loop
-	  } // end of per-socket parallel_for
-	} // end of segment for loop
-      }// end of per-socket parallel region
-    }
-
-  // for (int segmentId = 0; segmentId < g.getNumSegments("s1"); segmentId++) {
-  //   auto sg = g.getSegmentedGraph(std::string("s1"), segmentId);
-  //   parallel_for ( NodeID localId=0; localId < sg->numVertices; localId++) {
-  //     NodeID s = sg->graphId[localId];
-  //     if (from_vertexset->bool_map_[s] ) { 
-  // 	for (int64_t ngh = sg->vertexArray[localId]; ngh < sg->vertexArray[localId+1]; ngh++) {
-  // 	  WNode d = sg->edgeArray[ngh];
-  // 	  if(  apply_func ( s , d.v, d.w ) ) { 
-  // 	    next[d.v] = 1; 
-  // 	  }
-  // 	} //end of loop on in neighbors
-  //     }
-  //   } //end of outer for loop
-  // }
-
-      next_frontier->num_vertices_ = sequence::sum(next, numVertices);
-      next_frontier->bool_map_ = next;
-      return next_frontier;
-    } else {
-      if (g.flags_ == nullptr){
-	g.flags_ = new int[numVertices]();
-	parallel_for(int i = 0; i < numVertices; i++) g.flags_[i]=0;
-      }
-      VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-      if (numVertices != from_vertexset->getVerticesRange()) {
-        cout << "edgeMap: Sizes Don't match" << endl;
-        abort();
-      }
-      if (outDegrees == 0) return next_frontier;
-      uintT *offsets = degrees;
-      long outEdgeCount = sequence::plusScan(offsets, degrees, m);
-      uintE *outEdges = newA(uintE, outEdgeCount);
-      parallel_for (long i=0; i < m; i++) {
-	NodeID s = from_vertexset->dense_vertex_set_[i];
-	int j = 0;
-	uintT offset = offsets[i];
-	for(WNode d : g.out_neigh(s)){
-	  if( apply_func ( s , d.v, d.w ) && CAS(&(g.flags_[d.v]), 0, 1)  ) { 
-	    outEdges[offset + j] = d.v; 
-	  } else { outEdges[offset + j] = UINT_E_MAX; }
-	  j++;
-	} //end of for loop on neighbors
-      }
-      uintE *nextIndices = newA(uintE, outEdgeCount);
-      long nextM = sequence::filter(outEdges, nextIndices, outEdgeCount, nonMaxF());
-      free(outEdges);
-      free(degrees);
-      next_frontier->num_vertices_ = nextM;
-      next_frontier->dense_vertex_set_ = nextIndices;
-      parallel_for(int i = 0; i < nextM; i++){
-	g.flags_[nextIndices[i]] = 0;
-      }
-      return next_frontier;
-    } //end of else
-    } //end of edgeset apply function 
-struct SP_generated_vector_op_apply_func_0
-{
-  void operator() (NodeID v) 
-  {
-    SP[v] = (2147483647) ;
-  };
-};
-struct updateEdge
-{
-  bool operator() (NodeID src, NodeID dst, int weight) 
-  {
-    bool output2 ;
-    bool SP_trackving_var_1 = (bool) 0;
-    SP_trackving_var_1 = writeMin( &SP[dst], (SP[src] + weight) ); 
-    output2 = SP_trackving_var_1;
-    return output2;
-  };
-};
-int main(int argc, char * argv[])
-{
-  edges = builtin_loadWeightedEdgesFromFile ( argv[(1) ]) ;
-  edges.buildPushSegmentedGraphs("s1", atoi(argv[3]), true);
-  std::cout<<"success"<<std::endl;
-  SP = new int [ builtin_getVertices(edges) ];
-  for ( int trail = (0) ; trail < (10) ; trail++ )
-  {
-    parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-      SP_generated_vector_op_apply_func_0()(i);
-    };
-    startTimer() ;
-    int n = builtin_getVertices(edges) ;
-    VertexSubset<int> *  frontier = new VertexSubset<int> ( builtin_getVertices(edges)  , (0) );
-    builtin_addVertex(frontier, atoi(argv[2]) ) ;
-    SP[atoi(argv[2]) ] = atoi(argv[2]) ;
-    //int rounds = (0) ;
-    while ( (builtin_getVertexSetSize(frontier) ) != ((0) ))
-      {
-	frontier = edgeset_apply_hybrid_denseforward_parallel_weighted_deduplicatied_from_vertexset_with_frontier(edges, frontier, updateEdge()); 
-	// int result = 0;
-	// for (NodeID n=0; n < builtin_getVertices(edges); n++) {
-	//   result += SP[n];
-	// }
-	// std::cout << "result=" << result << std::endl;
-	//rounds = (rounds + (1) );
-	// if ((rounds) == (n))
-	//   { 
-	//     std::cout << "negative cycle"<< std::endl;
-	//     break;
-	//   } 
-      }
-    float elapsed_time = stopTimer() ;
-    std::cout << "elapsed time: "<< std::endl;
-    std::cout << elapsed_time<< std::endl;
-    //std::cout << "rounds"<< std::endl;
-    //std::cout << rounds<< std::endl;
-  }
-};
-
diff --git a/graphit_eval/eval/table7/cpps/sssp_hybrid_denseforward_segment.cpp b/graphit_eval/eval/table7/cpps/sssp_hybrid_denseforward_segment.cpp
deleted file mode 100755
index 5b07c192..00000000
--- a/graphit_eval/eval/table7/cpps/sssp_hybrid_denseforward_segment.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include <algorithm>
-#include "intrinsics.h"
-WGraph edges;
-int  * __restrict SP;
-template <typename APPLY_FUNC > VertexSubset<NodeID>* edgeset_apply_hybrid_denseforward_parallel_weighted_deduplicatied_from_vertexset_with_frontier(WGraph & g , VertexSubset<NodeID>* from_vertexset, APPLY_FUNC apply_func) 
-{ 
-    int64_t numVertices = g.num_nodes(), numEdges = g.num_edges();
-    from_vertexset->toSparse();
-    long m = from_vertexset->size();
-    // used to generate nonzero indices to get degrees
-    uintT *degrees = newA(uintT, m);
-    // We probably need this when we get something that doesn't have a dense set, not sure
-    // We can also write our own, the eixsting one doesn't quite work for bitvectors
-    //from_vertexset->toSparse();
-    {
-        parallel_for (long i = 0; i < m; i++) {
-            NodeID v = from_vertexset->dense_vertex_set_[i];
-            degrees[i] = g.out_degree(v);
-        }
-    }
-    uintT outDegrees = sequence::plusReduce(degrees, m);
-    if (m + outDegrees > numEdges / 20) {
-  VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-  bool * next = newA(bool, g.num_nodes());
-  parallel_for (int i = 0; i < numVertices; i++)next[i] = 0;
-    from_vertexset->toDense();
-
-
-    // parallel_for ( NodeID s=0; s < g.num_nodes(); s++) {
-    //     if (from_vertexset->bool_map_[s] ) { 
-    //       for(WNode d : g.out_neigh(s)){
-    //         if(  apply_func ( s , d.v, d.w ) ) { 
-    //           next[d.v] = 1; 
-    //         }
-    //       } // end of inner for loop
-    //     } // end of if for from func or from vertexset
-    //   } //end of outer for loop
-
-  for (int segmentId = 0; segmentId < g.getNumSegments("s1"); segmentId++) {
-    auto sg = g.getSegmentedGraph(std::string("s1"), segmentId);
-    parallel_for ( NodeID localId=0; localId < sg->numVertices; localId++) {
-      NodeID s = sg->graphId[localId];
-      if (from_vertexset->bool_map_[s] ) { 
-	for (int64_t ngh = sg->vertexArray[localId]; ngh < sg->vertexArray[localId+1]; ngh++) {
-	  WNode d = sg->edgeArray[ngh];
-	  if(  apply_func ( s , d.v, d.w ) ) { 
-	    next[d.v] = 1; 
-	  }
-	} //end of loop on in neighbors
-      }
-    } //end of outer for loop
-  }
-
-      next_frontier->num_vertices_ = sequence::sum(next, numVertices);
-  next_frontier->bool_map_ = next;
-  return next_frontier;
-    } else {
-      if (g.flags_ == nullptr){
-	g.flags_ = new int[numVertices]();
-	parallel_for(int i = 0; i < numVertices; i++) g.flags_[i]=0;
-      }
-      VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-      if (numVertices != from_vertexset->getVerticesRange()) {
-        cout << "edgeMap: Sizes Don't match" << endl;
-        abort();
-      }
-      if (outDegrees == 0) return next_frontier;
-      uintT *offsets = degrees;
-      long outEdgeCount = sequence::plusScan(offsets, degrees, m);
-      uintE *outEdges = newA(uintE, outEdgeCount);
-      parallel_for (long i=0; i < m; i++) {
-	NodeID s = from_vertexset->dense_vertex_set_[i];
-	int j = 0;
-	uintT offset = offsets[i];
-	for(WNode d : g.out_neigh(s)){
-	  if( apply_func ( s , d.v, d.w ) && CAS(&(g.flags_[d.v]), 0, 1)  ) { 
-	    outEdges[offset + j] = d.v; 
-	  } else { outEdges[offset + j] = UINT_E_MAX; }
-	  j++;
-	} //end of for loop on neighbors
-      }
-  uintE *nextIndices = newA(uintE, outEdgeCount);
-  long nextM = sequence::filter(outEdges, nextIndices, outEdgeCount, nonMaxF());
-  free(outEdges);
-  free(degrees);
-  next_frontier->num_vertices_ = nextM;
-  next_frontier->dense_vertex_set_ = nextIndices;
-  parallel_for(int i = 0; i < nextM; i++){
-     g.flags_[nextIndices[i]] = 0;
-  }
-  return next_frontier;
-} //end of else
-} //end of edgeset apply function 
-struct SP_generated_vector_op_apply_func_0
-{
-  void operator() (NodeID v) 
-  {
-    SP[v] = (2147483647) ;
-  };
-};
-struct updateEdge
-{
-  bool operator() (NodeID src, NodeID dst, int weight) 
-  {
-    bool output2 ;
-    bool SP_trackving_var_1 = (bool) 0;
-    SP_trackving_var_1 = writeMin( &SP[dst], (SP[src] + weight) ); 
-    output2 = SP_trackving_var_1;
-    return output2;
-  };
-};
-int main(int argc, char * argv[])
-{
-  edges = builtin_loadWeightedEdgesFromFile ( argv[(1) ]) ;
-  edges.buildPushSegmentedGraphs("s1", atoi(argv[3]));
-  SP = new int [ builtin_getVertices(edges) ];
-  for ( int trail = (0) ; trail < (10) ; trail++ )
-  {
-    parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-      SP_generated_vector_op_apply_func_0()(i);
-    };
-    startTimer() ;
-    int n = builtin_getVertices(edges) ;
-    VertexSubset<int> *  frontier = new VertexSubset<int> ( builtin_getVertices(edges)  , (0) );
-    builtin_addVertex(frontier, atoi(argv[2]) ) ;
-    SP[atoi(argv[2]) ] = atoi(argv[2]) ;
-    //int rounds = (0) ;
-    while ( (builtin_getVertexSetSize(frontier) ) != ((0) ))
-      {
-	frontier = edgeset_apply_hybrid_denseforward_parallel_weighted_deduplicatied_from_vertexset_with_frontier(edges, frontier, updateEdge()); 
-	int result = 0;
-	for (NodeID n=0; n < builtin_getVertices(edges); n++) {
-	  result += SP[n];
-	}
-	std::cout << "result=" << result << std::endl;
-	//rounds = (rounds + (1) );
-	// if ((rounds) == (n))
-	//   { 
-	//     std::cout << "negative cycle"<< std::endl;
-	//     break;
-	//   } 
-      }
-    float elapsed_time = stopTimer() ;
-    std::cout << "elapsed time: "<< std::endl;
-    std::cout << elapsed_time<< std::endl;
-    //std::cout << "rounds"<< std::endl;
-    //std::cout << rounds<< std::endl;
-  }
-};
-
diff --git a/graphit_eval/eval/table7/cpps/sssp_hybrid_segment.cpp b/graphit_eval/eval/table7/cpps/sssp_hybrid_segment.cpp
deleted file mode 100644
index f82fe248..00000000
--- a/graphit_eval/eval/table7/cpps/sssp_hybrid_segment.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include <algorithm>
-#include "intrinsics.h"
-WGraph edges;
-int  * __restrict SP;
-template <typename APPLY_FUNC , typename PUSH_APPLY_FUNC> VertexSubset<NodeID>* edgeset_apply_hybrid_dense_parallel_weighted_deduplicatied_from_vertexset_with_frontier(WGraph & g , VertexSubset<NodeID>* from_vertexset, APPLY_FUNC apply_func, PUSH_APPLY_FUNC push_apply_func) 
-{ 
-    int64_t numVertices = g.num_nodes(), numEdges = g.num_edges();
-    from_vertexset->toSparse();
-    long m = from_vertexset->size();
-    // used to generate nonzero indices to get degrees
-    uintT *degrees = newA(uintT, m);
-    // We probably need this when we get something that doesn't have a dense set, not sure
-    // We can also write our own, the eixsting one doesn't quite work for bitvectors
-    //from_vertexset->toSparse();
-    {
-        parallel_for (long i = 0; i < m; i++) {
-            NodeID v = from_vertexset->dense_vertex_set_[i];
-            degrees[i] = g.out_degree(v);
-        }
-    }
-    uintT outDegrees = sequence::plusReduce(degrees, m);
-    if (m + outDegrees > numEdges / 20) {
-  VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-  bool * next = newA(bool, g.num_nodes());
-  parallel_for (int i = 0; i < numVertices; i++)next[i] = 0;
-    from_vertexset->toDense();
-      for (int segmentId = 0; segmentId < g.getNumSegments("s1"); segmentId++) {
-      auto sg = g.getSegmentedGraph(std::string("s1"), segmentId);
-parallel_for ( NodeID localId=0; localId < sg->numVertices; localId++) {
-      NodeID d = sg->graphId[localId];
-      for (int64_t ngh = sg->vertexArray[localId]; ngh < sg->vertexArray[localId+1]; ngh++) {
-        WNode s = sg->edgeArray[ngh];
-        if (from_vertexset->bool_map_[s.v] ) { 
-          if( apply_func ( s.v , d, s.w ) ) { 
-            next[d] = 1; 
-          }
-        }
-      } //end of loop on in neighbors
-    } //end of outer for loop
-    } // end of segment for loop
-  next_frontier->num_vertices_ = sequence::sum(next, numVertices);
-  next_frontier->bool_map_ = next;
-  return next_frontier;
-} else {
-      if (g.flags_ == nullptr){
-	g.flags_ = new int[numVertices]();
-	parallel_for(int i = 0; i < numVertices; i++) g.flags_[i]=0;
-      }
-      VertexSubset<NodeID> *next_frontier = new VertexSubset<NodeID>(g.num_nodes(), 0);
-      if (numVertices != from_vertexset->getVerticesRange()) {
-        cout << "edgeMap: Sizes Don't match" << endl;
-        abort();
-      }
-      if (outDegrees == 0) return next_frontier;
-      uintT *offsets = degrees;
-      long outEdgeCount = sequence::plusScan(offsets, degrees, m);
-      uintE *outEdges = newA(uintE, outEdgeCount);
-      parallel_for (long i=0; i < m; i++) {
-	NodeID s = from_vertexset->dense_vertex_set_[i];
-	int j = 0;
-	uintT offset = offsets[i];
-        for(WNode d : g.out_neigh(s)){
-          if( push_apply_func ( s , d.v, d.w ) && CAS(&(g.flags_[d.v]), 0, 1)  ) { 
-            outEdges[offset + j] = d.v; 
-          } else { outEdges[offset + j] = UINT_E_MAX; }
-          j++;
-        } //end of for loop on neighbors
-      }
-      uintE *nextIndices = newA(uintE, outEdgeCount);
-      long nextM = sequence::filter(outEdges, nextIndices, outEdgeCount, nonMaxF());
-      free(outEdges);
-      free(degrees);
-      next_frontier->num_vertices_ = nextM;
-      next_frontier->dense_vertex_set_ = nextIndices;
-      parallel_for(int i = 0; i < nextM; i++){
-	g.flags_[nextIndices[i]] = 0;
-      }
-      return next_frontier;
-    } //end of else
-} //end of edgeset apply function 
-struct updateEdge_push_ver
-{
-  bool operator() (NodeID src, NodeID dst, int weight) 
-  {
-    bool output4 ;
-    bool SP_trackving_var_3 = (bool) 0;
-    SP_trackving_var_3 = writeMin( &SP[dst], (SP[src] + weight) ); 
-    output4 = SP_trackving_var_3;
-    return output4;
-  };
-};
-struct SP_generated_vector_op_apply_func_0
-{
-  void operator() (NodeID v) 
-  {
-    SP[v] = (2147483647) ;
-  };
-};
-struct updateEdge
-{
-  bool operator() (NodeID src, NodeID dst, int weight) 
-  {
-    bool output2 ;
-    bool SP_trackving_var_1 = (bool) 0;
-    if ( ( SP[dst]) > ( (SP[src] + weight)) ) { 
-      SP[dst]= (SP[src] + weight); 
-      SP_trackving_var_1 = true ; 
-    } 
-    output2 = SP_trackving_var_1;
-    return output2;
-  };
-};
-struct reset
-{
-  void operator() (NodeID v) 
-  {
-    SP[v] = (2147483647) ;
-  };
-};
-int main(int argc, char * argv[])
-{
-  edges = builtin_loadWeightedEdgesFromFile ( argv[(1) ]) ;
-  edges.buildPullSegmentedGraphs("s1", 15);
-  SP = new int [ builtin_getVertices(edges) ];
-  parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-    SP_generated_vector_op_apply_func_0()(vertexsetapply_iter);
-  };
-  for ( int trail = (0) ; trail < (10) ; trail++ )
-  {
-    parallel_for (int vertexsetapply_iter = 0; vertexsetapply_iter < builtin_getVertices(edges) ; vertexsetapply_iter++) {
-      reset()(vertexsetapply_iter);
-    };
-    startTimer() ;
-    int n = builtin_getVertices(edges) ;
-    VertexSubset<int> *  frontier = new VertexSubset<int> ( builtin_getVertices(edges)  , (0) );
-    builtin_addVertex(frontier, (14) ) ;
-    SP[(14) ] = (14) ;
-    int rounds = (0) ;
-    while ( (builtin_getVertexSetSize(frontier) ) != ((0) ))
-    {
-      frontier = edgeset_apply_hybrid_dense_parallel_weighted_deduplicatied_from_vertexset_with_frontier(edges, frontier, updateEdge(), updateEdge_push_ver()); 
-      rounds = (rounds + (1) );
-      if ((rounds) == (n))
-       { 
-        std::cout << "negative cycle"<< std::endl;
-        break;
-       } 
-    }
-    float elapsed_time = stopTimer() ;
-    std::cout << "elapsed time: "<< std::endl;
-    std::cout << elapsed_time<< std::endl;
-    std::cout << "rounds"<< std::endl;
-    std::cout << rounds<< std::endl;
-  }
-};
-
diff --git a/graphit_eval/eval/table7/cpps/sssp_push_slq.cpp b/graphit_eval/eval/table7/cpps/sssp_push_slq.cpp
deleted file mode 100755
index 37766006..00000000
--- a/graphit_eval/eval/table7/cpps/sssp_push_slq.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-#include <iostream> 
-#include <vector>
-#include "intrinsics.h"
-
-//#define VERIFY
-
-WGraph edges;
-int  * __restrict SP;
-
-void SP_generated_vector_op_apply_func_0(NodeID v) 
-{
-  SP[v] = (2147483647) ;
-};
-bool updateEdge(NodeID src, NodeID dst, int weight) 
-{
-  bool output2 ;
-  bool SP_trackving_var_1 = (bool) 0;
-  SP_trackving_var_1 = writeMin( &SP[dst], (SP[src] + weight) ); 
-  output2 = SP_trackving_var_1;
-  return output2;
-};
-void printSP(NodeID v) 
-{
-  std::cout << SP[v]<< std::endl;
-};
-int main(int argc, char * argv[] ) 
-{
-  edges = builtin_loadWeightedEdgesFromFile (argv[1]) ;
-  SP = new int [ builtin_getVertices(edges) ];
-
-  for (int trail = 0; trail < 10; trail++){
-
-    parallel_for (int i = 0; i < builtin_getVertices(edges) ; i++) {
-      SP_generated_vector_op_apply_func_0(i);
-    };
-    startTimer() ;
-    int n = builtin_getVertices(edges) ;
-    VertexSubset<int> *  frontier = new VertexSubset<int> ( builtin_getVertices(edges)  , (0) );
-    int sp = std::stoi(argv[2]);
-    builtin_addVertex(frontier, sp ) ;
-    SP[sp ] = (0) ;
-    int rounds = (0) ;
-    while ( (builtin_getVertexSetSize(frontier) ) != ((0) ))
-      {
-	frontier = edgeset_apply_push_parallel_sliding_queue_weighted_deduplicatied_from_vertexset_with_frontier(edges, frontier, updateEdge); 
-	rounds = (rounds + (1) );
-	if ((rounds) == (n))
-	  { 
-	    std::cout << "negative cycle"<< std::endl;
-	    break;
-	  } 
-      }
-    float elapsed_time = stopTimer() ;
-    std::cout << "elapsed time: "<< std::endl;
-    std::cout << elapsed_time<< std::endl;
-  }
-
-#ifdef VERIFY
-  std::cout << "num_rounds: " << rounds << std::endl;
-  int sum = 0;
-  int count = 0;
-  for (int i = 0; i < n; i++){
-    if (SP[i] < 2147483647) {
-      sum += SP[i];
-      count++;
-    }
-  }
-  std::cout << "SP sum: " << sum << std::endl;
-  std::cout << "SP count: " << count << std::endl;
-#endif
-
-};
-
diff --git a/graphit_eval/eval/table7/table7_graphit.py b/graphit_eval/eval/table7/table7_graphit.py
index 628f4840..65538464 100644
--- a/graphit_eval/eval/table7/table7_graphit.py
+++ b/graphit_eval/eval/table7/table7_graphit.py
@@ -9,14 +9,25 @@ def main():
                         default=["testGraph"], help = "enable graphs with \
 socLive, road-usad, twitter, webGraph, friendster.Defaults to the test gra\
 ph.")
+    parser.add_argument('-a', '--applications', nargs='+',
+                        default=["bfs", "sssp", "pr", "cc", "prd"],
+                        help="applications to benchmark. Defaults to all  applications.")
 
     args = parser.parse_args()
     graphs_arg = ''
+    apps_args = ''
+
     for graph in args.graphs:
         graphs_arg = graphs_arg + " " + graph 
-    print("running benchmarks on: " + graphs_arg)
-    run_benchmark_cmd = "python benchmark.py --graph " + graphs_arg
-    parse_benchmark_results_cmd = "python parse.py --graph " + graphs_arg
+    print("running benchmarks on graphs: " + graphs_arg)
+
+    for app in args.applications:
+        apps_args = apps_args + " " + app
+    print("running benchmarks for applications: " + graphs_arg)
+
+    run_benchmark_cmd = "python benchmark.py --graph " + graphs_arg + " --applications " + apps_args
+    parse_benchmark_results_cmd = "python parse.py --graph " + graphs_arg + " --applications " + apps_args
+
     out = subprocess.check_call(run_benchmark_cmd, stderr=subprocess.PIPE, shell=True)
     out = subprocess.check_call(parse_benchmark_results_cmd,  stderr=subprocess.PIPE, shell=True)
     print ("Done parsing the run outputs")

From 3d133bd95b0c5dda677d4bed91430afdfb17ed81 Mon Sep 17 00:00:00 2001
From: Yunming <yunmingzhang@30-71-247.dynamic.csail.mit.edu>
Date: Tue, 13 Nov 2018 16:21:23 -0500
Subject: [PATCH 02/19] adding support for using argv in configApplyNumSSG

---
 .../graphit/frontend/high_level_schedule.h    |  9 ++-
 src/backend/codegen_cpp.cpp                   | 17 ++++-
 src/frontend/high_level_schedule.cpp          | 71 +++++++++++++++++--
 src/midend/apply_expr_lower.cpp               |  6 +-
 test/c++/high_level_schedule_test.cpp         | 19 +++++
 test/c++/test.cpp                             |  4 +-
 .../pagerank_pull_parallel_segment_argv.gt    |  4 ++
 test/python/test_with_schedules.py            | 13 +++-
 8 files changed, 128 insertions(+), 15 deletions(-)
 create mode 100644 test/input_with_schedules/pagerank_pull_parallel_segment_argv.gt

diff --git a/include/graphit/frontend/high_level_schedule.h b/include/graphit/frontend/high_level_schedule.h
index 87b33020..1ee4a882 100644
--- a/include/graphit/frontend/high_level_schedule.h
+++ b/include/graphit/frontend/high_level_schedule.h
@@ -14,6 +14,8 @@
 #include <graphit/frontend/low_level_schedule.h>
 #include <graphit/frontend/schedule.h>
 #include <map>
+#include <regex>
+
 
 namespace graphit {
     namespace fir {
@@ -129,10 +131,14 @@ namespace graphit {
                     return setApply(apply_label, "num_segment", num_segment);
                 }
 
-
+                // High level API for specifying the number of segments for a particular direction
                 high_level_schedule::ProgramScheduleNode::Ptr
                 configApplyNumSSG(std::string apply_label, std::string config, int num_segment, std::string direction="all");
 
+                // High level API for specifying the number of segments for a particular direction
+                // the user can specify a string "argv[x]" to use argv[x] as argument to number of segments at runtime
+                high_level_schedule::ProgramScheduleNode::Ptr
+                configApplyNumSSG(std::string apply_label, std::string config, string num_segment_argv, std::string direction="all");
 
                 // High level API for enabling NUMA optimization
                 // Deprecated, to be replaced with configApplyNUMA
@@ -174,6 +180,7 @@ namespace graphit {
                 std::map<string, string> parallelCompatibilityMap_;
 
                 void initGraphIterationSpaceIfNeeded(string label);
+                int extractIntegerFromString(string input_string);
 
             };
 
diff --git a/src/backend/codegen_cpp.cpp b/src/backend/codegen_cpp.cpp
index fb7b9336..47783d7d 100644
--- a/src/backend/codegen_cpp.cpp
+++ b/src/backend/codegen_cpp.cpp
@@ -387,9 +387,20 @@ namespace graphit {
                     auto label_iter_first = (*label_iter).first;
                     auto label_iter_second = (*label_iter).second;
                     auto numa_aware_flag = mir_context_->edgeset_to_label_to_merge_reduce[edge_iter_first][label_iter_first]->numa_aware;
-                    oss << "  " << edgeset->name << ".buildPullSegmentedGraphs(\"" << label_iter_first
-                        << "\", " << label_iter_second
-                        << (numa_aware_flag ? ", true" : "") << ");" << std::endl;
+
+                    if (label_iter_second < 0) {
+                        //do a specical case for negative number of segments. I
+                        // in the case of negative integer, we use the number as argument to runtimve argument argv
+                        // this is the only place in the generated code that we set the number of segments
+                        oss << "  " << edgeset->name << ".buildPullSegmentedGraphs(\"" << label_iter_first
+                            << "\", " << "atoi(argv[" << -1*label_iter_second << "])"
+                            << (numa_aware_flag ? ", true" : "") << ");" << std::endl;
+                    } else {
+                        // just use the positive integer as argument to number of segments
+                        oss << "  " << edgeset->name << ".buildPullSegmentedGraphs(\"" << label_iter_first
+                            << "\", " << label_iter_second
+                            << (numa_aware_flag ? ", true" : "") << ");" << std::endl;
+                    }
                 }
             }
 
diff --git a/src/frontend/high_level_schedule.cpp b/src/frontend/high_level_schedule.cpp
index d05d2dd3..30f2dd2c 100644
--- a/src/frontend/high_level_schedule.cpp
+++ b/src/frontend/high_level_schedule.cpp
@@ -284,7 +284,7 @@ namespace graphit {
 
             // If no schedule has been specified for the current label, create a new one
             if (schedule_->apply_schedules->find(apply_label) == schedule_->apply_schedules->end()) {
-                //Default schedule pull, serial
+                //Default schedule pull, serial, -100 for number of segments (we use -1 to -10 for argv)
                 (*schedule_->apply_schedules)[apply_label]
                         = {apply_label, ApplySchedule::DirectionType::PULL,
                            ApplySchedule::ParType::Serial,
@@ -292,7 +292,7 @@ namespace graphit {
                            ApplySchedule::OtherOpt::QUEUE,
                            ApplySchedule::PullFrontierType::BOOL_MAP,
                            ApplySchedule::PullLoadBalance::VERTEX_BASED,
-                           0, -1, false};
+                           0, -100, false};
             }
 
             if (apply_schedule_str == "pull_edge_based_load_balance") {
@@ -330,7 +330,7 @@ namespace graphit {
             // If no schedule has been specified for the current label, create a new one
 
             if (schedule_->apply_schedules->find(apply_label) == schedule_->apply_schedules->end()) {
-                //Default schedule pull, serial
+                //Default schedule pull, serial, -100 for number of segments (we use -1 to -10 for argv)
                 (*schedule_->apply_schedules)[apply_label]
                         = (*schedule_->apply_schedules)[apply_label]
                         = {apply_label, ApplySchedule::DirectionType::PULL,
@@ -339,7 +339,7 @@ namespace graphit {
                            ApplySchedule::OtherOpt::QUEUE,
                            ApplySchedule::PullFrontierType::BOOL_MAP,
                            ApplySchedule::PullLoadBalance::VERTEX_BASED,
-                           0, -1, false};
+                           0, -100, false};
             }
 
 
@@ -681,6 +681,68 @@ namespace graphit {
 
         }
 
+        // extract the integer from a string
+        int high_level_schedule::ProgramScheduleNode::extractIntegerFromString(string input_string){
+
+            std::size_t const n = input_string.find_first_of("0123456789");
+            if (n != std::string::npos)
+            {
+                std::size_t const m = input_string.find_first_not_of("0123456789", n);
+                return stoi(input_string.substr(n, m != std::string::npos ? m-n : m));
+            }
+            return -1;
+
+        }
+
+
+        high_level_schedule::ProgramScheduleNode::Ptr
+        high_level_schedule::ProgramScheduleNode::configApplyNumSSG(std::string apply_label, std::string config,
+                                                                    string num_segment_argv, std::string direction) {
+
+            initGraphIterationSpaceIfNeeded(apply_label);
+            auto gis_vec = (*schedule_->graph_iter_spaces)[apply_label];
+            int argv_number;
+
+            for (auto &gis : *gis_vec) {
+                if (gis.scheduling_api_direction == direction || direction == "all") {
+                    if (config == "fixed-vertex-count"){
+                        if (gis.scheduling_api_direction != "DensePull"){
+                            //currently, we don't support any direction other than DensePull for graph partitioning
+                            // push based partitioning is coming
+                            std::cout << "unsupported direction for partition SSGs: "  << gis.scheduling_api_direction << std::endl;
+                            throw "Unsupported Schedule!";
+                        }
+                        gis.setPTTag(GraphIterationSpace::Dimension::SSG, Tags::PT_Tag::FixedVertexCount);
+                    } else if (config == "edge-aware-vertex-count"){
+                        gis.setPTTag(GraphIterationSpace::Dimension::SSG, Tags::PT_Tag::EdgeAwareVertexCount);
+                        throw "Unsupported Schedule!";
+                    } else {
+                        throw "Unsupported Schedule!";
+                    }
+
+                    regex argv_regex ("argv\\[(\\d)\\]");
+
+                    // here we do a hack and uses a negative integer to denote the integer argument to argv
+                    // the code generation will treat negative numbers differently by generating a argv[negative_integer) run time argument
+                    // to use as number of segments
+                    // the user input argv string has to match a pattern argv[integer]
+                    if (regex_match(num_segment_argv, argv_regex)){
+                        argv_number = -1*extractIntegerFromString(num_segment_argv);
+                    } else {
+                        std::cerr <<  "Invalid string argument. It has to be of form argv[integer]" << std::endl;
+                        throw "Unsupported Schedule!";
+                    }
+
+                    //gis is not really used right now
+                    gis.num_ssg = argv_number;
+                }
+            }
+
+            // for now, we still use the old setApply API. We will probably switch to full graph iteration space soon
+            return setApply(apply_label, "num_segment", argv_number);
+
+        }
+
         high_level_schedule::ProgramScheduleNode::Ptr
         high_level_schedule::ProgramScheduleNode::configApplyNumSSG(std::string apply_label, std::string config,
                                                                     int num_segment, std::string direction) {
@@ -705,6 +767,7 @@ namespace graphit {
                     } else {
                         throw "Unsupported Schedule!";
                     }
+                    assert(num_segment > 0);
                     gis.num_ssg = num_segment;
                 }
             }
diff --git a/src/midend/apply_expr_lower.cpp b/src/midend/apply_expr_lower.cpp
index 0c2f4bcc..a0f42cb5 100644
--- a/src/midend/apply_expr_lower.cpp
+++ b/src/midend/apply_expr_lower.cpp
@@ -85,8 +85,10 @@ namespace graphit {
                     node = hybrid_dense_edgeset_apply;
                 }
 
-                // Check for number of segment
-                if (apply_schedule->second.num_segment > 0) {
+                // No longer need this check as we moved the check to high-level scheduling API
+                // We use negative integers between -1 and -10 to denote argv numbers
+                // it can't be 0 as well, which indicates that this schedule is not needed
+                if (apply_schedule->second.num_segment > -10 && apply_schedule->second.num_segment != 0) {
                     mir::to<mir::EdgeSetApplyExpr>(node)->scope_label_name = apply_schedule->second.scope_label_name;
                     mir_context_->edgeset_to_label_to_num_segment[edgeset_expr->var.getName()][apply_schedule->second.scope_label_name] =
                             apply_schedule->second.num_segment;
diff --git a/test/c++/high_level_schedule_test.cpp b/test/c++/high_level_schedule_test.cpp
index 0a70f9cd..a1196cfb 100644
--- a/test/c++/high_level_schedule_test.cpp
+++ b/test/c++/high_level_schedule_test.cpp
@@ -992,6 +992,25 @@ TEST_F(HighLevelScheduleTest, PRPullParallelTwoSegments) {
 }
 
 
+TEST_F(HighLevelScheduleTest, PRPullParallelRuntimeSegmentArgs) {
+    istringstream is (pr_str_);
+    fe_->parseStream(is, context_, errors_);
+    fir::high_level_schedule::ProgramScheduleNode::Ptr program
+            = std::make_shared<fir::high_level_schedule::ProgramScheduleNode>(context_);
+    // Set the pull parameter to 2 segments
+    program->configApplyDirection("l1:s1", "DensePull")->configApplyParallelization("l1:s1", "dynamic-vertex-parallel");
+    program->configApplyNumSSG("l1:s1", "fixed-vertex-count",  "argv[1]");
+    EXPECT_EQ (0, basicTestWithSchedule(program));
+
+    mir::FuncDecl::Ptr main_func_decl = mir_context_->getFunction("main");
+
+    mir::ForStmt::Ptr for_stmt = mir::to<mir::ForStmt>((*(main_func_decl->body->stmts))[0]);
+    mir::ExprStmt::Ptr expr_stmt = mir::to<mir::ExprStmt>((*(for_stmt->body->stmts))[0]);
+    EXPECT_EQ(true, mir::isa<mir::PullEdgeSetApplyExpr>(expr_stmt->expr));
+
+}
+
+
 
 
 
diff --git a/test/c++/test.cpp b/test/c++/test.cpp
index 559501fa..f290c08a 100644
--- a/test/c++/test.cpp
+++ b/test/c++/test.cpp
@@ -31,14 +31,14 @@ int main(int argc, char **argv) {
 //
 //    ::testing::GTEST_FLAG(filter) = "RuntimeLibTest.VertexSubsetSimpleTest";
 //
-//    ::testing::GTEST_FLAG(filter) = "LowLevelScheduleTest.SimpleInsertNameNodeBefore";
+//    ::testing::GTEST_FLAG(filter) = "LowLevelScheduleTest.SimpleEdgesetApplyPushSchedule";
 //    ::testing::GTEST_FLAG(filter) = "LowLevelScheduleTest.SimpleLoopFusion";
 //    ::testing::GTEST_FLAG(filter) = "LowLevelScheduleTest.SimpleLoopIndexSplit";
 //    ::testing::GTEST_FLAG(filter) = "LowLevelScheduleTest.SimpleLoopIndexSplit";
 //
 //    ::testing::GTEST_FLAG(filter) = "LowLevelScheduleTest.SimpleApplyFunctionFusion";
 //
-//    ::testing::GTEST_FLAG(filter) = "HighLevelScheduleTest.ClosenessCentralityWeightedDefaultSchedule";
+//    ::testing::GTEST_FLAG(filter) = "HighLevelScheduleTest.SimpleLabelForVarDecl";
 //    ::testing::GTEST_FLAG(filter) = "HighLevelScheduleTest.CCHybridDenseSchedule";
 //
 
diff --git a/test/input_with_schedules/pagerank_pull_parallel_segment_argv.gt b/test/input_with_schedules/pagerank_pull_parallel_segment_argv.gt
new file mode 100644
index 00000000..5b3b1504
--- /dev/null
+++ b/test/input_with_schedules/pagerank_pull_parallel_segment_argv.gt
@@ -0,0 +1,4 @@
+schedule:
+    program->configApplyDirection("s1", "DensePull")
+    ->configApplyParallelization("s1","dynamic-vertex-parallel")
+    ->configApplyNumSSG("s1", "fixed-vertex-count",  "argv[2]");
diff --git a/test/python/test_with_schedules.py b/test/python/test_with_schedules.py
index 77a0d3da..8652e64b 100644
--- a/test/python/test_with_schedules.py
+++ b/test/python/test_with_schedules.py
@@ -181,12 +181,16 @@ def sssp_verified_test(self, input_file_name, use_separate_algo_file=True):
         self.assertEqual(test_flag, True)
         os.chdir("bin")
 
-    def pr_verified_test(self, input_file_name, use_separate_algo_file=False):
+    def pr_verified_test(self, input_file_name, use_separate_algo_file=False, use_segment_argv=False):
         if use_separate_algo_file:
             self.basic_compile_test_with_separate_algo_schedule_files("pagerank_with_filename_arg.gt", input_file_name)
         else:
             self.basic_compile_test(input_file_name)
-        cmd = "OMP_PLACES=sockets ./"+ self.executable_file_name + " "+GRAPHIT_SOURCE_DIRECTORY+"/test/graphs/test.el"
+
+        if not use_segment_argv:
+            cmd = "OMP_PLACES=sockets ./"+ self.executable_file_name + " "+GRAPHIT_SOURCE_DIRECTORY+"/test/graphs/test.el"
+        else:
+            cmd = "OMP_PLACES=sockets ./"+ self.executable_file_name + " "+GRAPHIT_SOURCE_DIRECTORY+"/test/graphs/test.el  2"
         print (cmd)
         proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
         #check the value printed to stdout is as expected
@@ -376,6 +380,9 @@ def test_pagerank_parallel_pull_load_balance_expect(self):
     def test_pagerank_parallel_pull_segment_expect(self):
         self.pr_verified_test("pagerank_pull_parallel_segment.gt", True)
 
+    def test_pagerank_parallel_pull_segment_argv_expect(self):
+        self.pr_verified_test("pagerank_pull_parallel_segment_argv.gt", True, True)
+
     def test_pagerank_parallel_pull_numa_expect(self):
         if self.numa_flags:
             self.pr_verified_test("pagerank_pull_parallel_numa.gt", True)
@@ -465,7 +472,7 @@ def test_bc_SparsePush_basic(self):
     #used for enabling a specific test
 
     # suite = unittest.TestSuite()
-    # suite.addTest(TestGraphitCompiler('test_bc_SparsePushDensePull_bitvector_cache_basic'))
+    # suite.addTest(TestGraphitCompiler('test_pagerank_parallel_pull_segment_argv_expect'))
     # unittest.TextTestRunner(verbosity=2).run(suite)
 
 

From ded0a02c42ec67f54c7e4ebb1d4eed4230842601 Mon Sep 17 00:00:00 2001
From: Yunming Zhang <yunming@lanka.csail.mit.edu>
Date: Tue, 13 Nov 2018 16:43:19 -0500
Subject: [PATCH 03/19] adding one more schedule for bfs

---
 .../bfs_hybrid_dense_parallel_cas_bitvector.gt                | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 test/input_with_schedules/bfs_hybrid_dense_parallel_cas_bitvector.gt

diff --git a/test/input_with_schedules/bfs_hybrid_dense_parallel_cas_bitvector.gt b/test/input_with_schedules/bfs_hybrid_dense_parallel_cas_bitvector.gt
new file mode 100644
index 00000000..4a731eb7
--- /dev/null
+++ b/test/input_with_schedules/bfs_hybrid_dense_parallel_cas_bitvector.gt
@@ -0,0 +1,4 @@
+schedule:
+    program->configApplyDirection("s1", "SparsePush-DensePull")->configApplyParallelization("s1", "dynamic-vertex-parallel");
+    program->configApplyDenseVertexSet("s1","bitvector", "src-vertexset", "DensePull");
+    program->configApplyParallelization("s2", "serial");
\ No newline at end of file

From d415ee40c94b49d5c38db731915bb3caa72c39f5 Mon Sep 17 00:00:00 2001
From: Yunming <yunmingzhang@30-71-247.dynamic.csail.mit.edu>
Date: Tue, 13 Nov 2018 19:34:52 -0500
Subject: [PATCH 04/19] updating the binary generation for PageRank and CC in
 table 7 scripts

---
 graphit_eval/eval/table7/Makefile             |  8 ++-
 .../cc_benchmark_cache.gt                     | 39 ++-----------
 .../pagerank_benchmark_cache.gt               | 55 +------------------
 .../pagerank_benchmark_gapbs.gt               | 47 ----------------
 .../pagerank_benchmark_pull_parallel.gt       |  3 +
 5 files changed, 13 insertions(+), 139 deletions(-)
 delete mode 100644 test/input_with_schedules/pagerank_benchmark_gapbs.gt
 create mode 100644 test/input_with_schedules/pagerank_benchmark_pull_parallel.gt

diff --git a/graphit_eval/eval/table7/Makefile b/graphit_eval/eval/table7/Makefile
index 2f81fcd3..21e716cc 100644
--- a/graphit_eval/eval/table7/Makefile
+++ b/graphit_eval/eval/table7/Makefile
@@ -38,15 +38,19 @@ cf_cilk_cpps = cf_pull_load_balance_segment
 all: graphit_files cpps
 
 graphit_files:
+	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/cc.gt -f ${GRAPHIT_SCHEDULE_DIR}/cc_benchmark_cache.gt -o cpps/cc_hybrid_dense_bitvec_segment.cpp
+	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/cc.gt -f ${GRAPHIT_SCHEDULE_DIR}/cc_hybrid_dense_parallel_cas.gt -o cpps/cc_hybrid_dense.cpp
 	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/bfs.gt -f ${GRAPHIT_SCHEDULE_DIR}/bfs_hybrid_dense_parallel_cas.gt -o cpps/bfs_hybrid_dense.cpp
 	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/bfs.gt -f ${GRAPHIT_SCHEDULE_DIR}/bfs_hybrid_dense_parallel_cas_bitvector.gt -o cpps/bfs_hybrid_dense_bitvec.cpp
 	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/bfs.gt  -f ${GRAPHIT_SCHEDULE_DIR}/bfs_push_sliding_queue_parallel_cas.gt -o cpps/bfs_push_slq.cpp
 	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/sssp.gt -f ${GRAPHIT_SCHEDULE_DIR}/sssp_hybrid_denseforward_parallel_cas.gt -o cpps/sssp_hybrid_denseforward.cpp
 	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/sssp.gt -f ${GRAPHIT_SCHEDULE_DIR}/sssp_push_parallel_sliding_queue.gt -o cpps/sssp_push_slq.cpp
+	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/pagerank.gt -f ${GRAPHIT_SCHEDULE_DIR}/pagerank_benchmark_pull_parallel.gt -o cpps/pagerank_pull.cpp
+	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/pagerank.gt -f ${GRAPHIT_SCHEDULE_DIR}/pagerank_benchmark_cache.gt -o cpps/pagerank_pull_segment.cpp
 
 cpps:  $(prd_cilk_cpps) $(bfs_cilk_cpps) $(cc_cilk_cpps) $(sssp_cilk_cpps) $(pr_cilk_cpps) openmp_only_cpps
 
-test: $(bfs_cilk_cpps) ${sssp_cilk_cpps}
+test:  ${cc_cilk_cpps} $(bfs_cilk_cpps) ${sssp_cilk_cpps}
 	$(PCC) $(OPENMP_FLAGS) -o bin/bfs_push_slq cpps/bfs_push_slq.cpp
 	$(PCC) $(OPENMP_FLAGS) -o bin/sssp_push_slq cpps/sssp_push_slq.cpp
 
@@ -59,5 +63,3 @@ openmp_only_cpps:
 
 clean:
 	rm bin/*
-
-
diff --git a/test/input_with_schedules/cc_benchmark_cache.gt b/test/input_with_schedules/cc_benchmark_cache.gt
index 3589f27d..210dc2f6 100644
--- a/test/input_with_schedules/cc_benchmark_cache.gt
+++ b/test/input_with_schedules/cc_benchmark_cache.gt
@@ -1,37 +1,6 @@
-element Vertex end
-element Edge end
-
-const edges : edgeset{Edge}(Vertex,Vertex) = load (argv[1]);
-
-const vertices : vertexset{Vertex} = edges.getVertices();
-const IDs : vector{Vertex}(int) = 1;
-
-func updateEdge(src : Vertex, dst : Vertex)
-    IDs[dst] min= IDs[src];
-end
-
-func init(v : Vertex)
-     IDs[v] = v;
-end
-
-func main()
-
-    var n : int = edges.getVertices();
-    for trail in 0:10
-    	startTimer();
-    	var frontier : vertexset{Vertex} = new vertexset{Vertex}(n);
-    	vertices.apply(init);
-    	while (frontier.getVertexSetSize() != 0)
-            #s1# frontier = edges.from(frontier).applyModified(updateEdge,IDs);
-    	end
-
-    	var elapsed_time : float = stopTimer();
-    	print "elapsed time: ";
-    	print elapsed_time;
-    end
-end
-
 
 schedule:
-    program->configApplyDirection("s1", "SparsePush-DensePull")->configApplyParallelization("s1", "dynamic-vertex-parallel")->configApplyDenseVertexSet("s1","bitvector", "src-vertexset", "DensePull");
-    program->configApplyNumSSG("s1", "fixed-vertex-count", 15);
+    program->configApplyDirection("s1", "SparsePush-DensePull")
+    ->configApplyParallelization("s1", "dynamic-vertex-parallel")
+    ->configApplyDenseVertexSet("s1","bitvector", "src-vertexset", "DensePull");
+    program->configApplyNumSSG("s1", "fixed-vertex-count", "argv[2]", "DensePull");
diff --git a/test/input_with_schedules/pagerank_benchmark_cache.gt b/test/input_with_schedules/pagerank_benchmark_cache.gt
index 2ec6f099..987956af 100644
--- a/test/input_with_schedules/pagerank_benchmark_cache.gt
+++ b/test/input_with_schedules/pagerank_benchmark_cache.gt
@@ -1,56 +1,3 @@
-element Vertex end
-element Edge end
-const edges : edgeset{Edge}(Vertex,Vertex) = load (argv[1]);
-const vertices : vertexset{Vertex} = edges.getVertices();
-const old_rank : vector{Vertex}(double) = 1.0/vertices.size();
-const new_rank : vector{Vertex}(double) = 0.0;
-const out_degree : vector {Vertex}(int) = edges.getOutDegrees();
-const contrib : vector{Vertex}(double) = 0.0;
-const error : vector{Vertex}(double) = 0.0;
-const damp : double = 0.85;
-const beta_score : double = (1.0 - damp) / vertices.size();
-
-func computeContrib(v : Vertex)
-    contrib[v] = old_rank[v] / out_degree[v];
-end
-
-func updateEdge(src : Vertex, dst : Vertex)
-    new_rank[dst] += contrib[src];
-end
-
-func updateVertex(v : Vertex)
-    var old_score : double = old_rank[v];
-    new_rank[v] = beta_score + damp*(new_rank[v]);
-    error[v] = fabs(new_rank[v] - old_rank[v]);
-    old_rank[v] = new_rank[v];
-    new_rank[v] = 0.0;
-end
-
-func printRank(v : Vertex)
-    print old_rank[v];
-end
-
-func reset(v: Vertex)
-    old_rank[v] = 1.0/vertices.size();
-    new_rank[v] = 0.0;
-end
-
-func main()
-    for trail in 0:10
-        vertices.apply(reset);
-    	startTimer();
-    	for i in 0:20
-    	    vertices.apply(computeContrib);
-            #s1# edges.apply(updateEdge);
-            vertices.apply(updateVertex);
-    	end
-
-    	var elapsed_time : double = stopTimer();
-    	print "elapsed time: ";
-    	print elapsed_time;
-    end
-end
-
 schedule:
     program->configApplyDirection("s1", "DensePull")->configApplyParallelization("s1","dynamic-vertex-parallel");
-    program->configApplyNumSSG("s1", "fixed-vertex-count",  15);
+    program->configApplyNumSSG("s1", "fixed-vertex-count",  "argv[2]");
diff --git a/test/input_with_schedules/pagerank_benchmark_gapbs.gt b/test/input_with_schedules/pagerank_benchmark_gapbs.gt
deleted file mode 100644
index a227db8a..00000000
--- a/test/input_with_schedules/pagerank_benchmark_gapbs.gt
+++ /dev/null
@@ -1,47 +0,0 @@
-element Vertex end
-element Edge end
-const edges : edgeset{Edge}(Vertex,Vertex) = load (argv[1]);
-const vertices : vertexset{Vertex} = edges.getVertices();
-const old_rank : vector{Vertex}(double) = 1.0/vertices.size();
-const new_rank : vector{Vertex}(double) = 0.0;
-const out_degree : vector {Vertex}(int) = edges.getOutDegrees();
-const contrib : vector{Vertex}(double) = 0.0;
-const error : vector{Vertex}(double) = 0.0;
-const damp : double = 0.85;
-const beta_score : double = (1.0 - damp) / vertices.size();
-
-func computeContrib(v : Vertex)
-    contrib[v] = old_rank[v] / out_degree[v];
-end
-
-func updateEdge(src : Vertex, dst : Vertex)
-    new_rank[dst] = new_rank[dst] + contrib[src];
-end
-
-func updateVertex(v : Vertex)
-    var old_score : double = old_rank[v];
-    new_rank[v] = beta_score + damp*(new_rank[v]);
-    error[v] = fabs(new_rank[v] - old_rank[v]);
-    old_rank[v] = new_rank[v];
-    new_rank[v] = 0.0;
-end
-
-func printRank(v : Vertex)
-    print old_rank[v];
-end
-
-func main()
-    startTimer();
-    for i in 0:20
-    	vertices.apply(computeContrib);
-        #s1# edges.apply(updateEdge);
-        vertices.apply(updateVertex);
-    end
-
-    var elapsed_time : double = stopTimer();
-    print "elapsed time: ";
-    print elapsed_time;
-end
-
-schedule:
-    program->configApplyDirection("s1", "DensePull")->configApplyParallelization("s1","dynamic-vertex-parallel");
diff --git a/test/input_with_schedules/pagerank_benchmark_pull_parallel.gt b/test/input_with_schedules/pagerank_benchmark_pull_parallel.gt
new file mode 100644
index 00000000..68f0b202
--- /dev/null
+++ b/test/input_with_schedules/pagerank_benchmark_pull_parallel.gt
@@ -0,0 +1,3 @@
+
+schedule:
+    program->configApplyDirection("s1", "DensePull")->configApplyParallelization("s1","dynamic-vertex-parallel");

From f78dc8065e0460d7282a92de082ffdc194064d3e Mon Sep 17 00:00:00 2001
From: Yunming <yunmingzhang@30-71-247.dynamic.csail.mit.edu>
Date: Tue, 13 Nov 2018 20:09:08 -0500
Subject: [PATCH 05/19] adding collaborative filtering and pagerankdelta to the
 test script

---
 apps/pagerankdelta.gt                         |  1 +
 graphit_eval/eval/table7/Makefile             |  7 +-
 graphit_eval/eval/table7/cpps/.gitignore      |  4 ++
 .../pagerank_delta_benchmark_cache.gt         | 64 +------------------
 .../pagerank_delta_hybrid_dense.gt            |  3 +
 ...k_delta_hybrid_dense_parallel_bitvector.gt |  4 +-
 .../pagerank_delta_sparse_push_parallel.gt    |  3 +-
 7 files changed, 18 insertions(+), 68 deletions(-)
 create mode 100644 graphit_eval/eval/table7/cpps/.gitignore
 create mode 100644 test/input_with_schedules/pagerank_delta_hybrid_dense.gt

diff --git a/apps/pagerankdelta.gt b/apps/pagerankdelta.gt
index 3a0f17a2..08427496 100644
--- a/apps/pagerankdelta.gt
+++ b/apps/pagerankdelta.gt
@@ -49,6 +49,7 @@ func main()
 
         for i in 1:11
             #s1# edges.from(frontier).apply(updateEdge);
+            var output : vertexset{Vertex};
             if i == 1
                 output = vertices.where(updateVertexFirstRound);
             else
diff --git a/graphit_eval/eval/table7/Makefile b/graphit_eval/eval/table7/Makefile
index 21e716cc..60322984 100644
--- a/graphit_eval/eval/table7/Makefile
+++ b/graphit_eval/eval/table7/Makefile
@@ -38,6 +38,10 @@ cf_cilk_cpps = cf_pull_load_balance_segment
 all: graphit_files cpps
 
 graphit_files:
+	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/cf.gt -f ${GRAPHIT_SCHEDULE_DIR}/cf_pull_parallel_load_balance_segment.gt -o cpps/cf_pull_load_balance_segment.cpp
+	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/pagerankdelta.gt -f ${GRAPHIT_SCHEDULE_DIR}/pagerank_delta_hybrid_dense.gt -o cpps/pagerankdelta_hybrid_dense.cpp
+	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/pagerankdelta.gt -f ${GRAPHIT_SCHEDULE_DIR}/pagerank_delta_benchmark_cache.gt -o cpps/pagerankdelta_hybrid_dense_bitvec_segment.cpp
+	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/pagerankdelta.gt -f ${GRAPHIT_SCHEDULE_DIR}/pagerank_delta_sparse_push_parallel.gt -o cpps/pagerankdelta_sparse_push.cpp
 	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/cc.gt -f ${GRAPHIT_SCHEDULE_DIR}/cc_benchmark_cache.gt -o cpps/cc_hybrid_dense_bitvec_segment.cpp
 	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/cc.gt -f ${GRAPHIT_SCHEDULE_DIR}/cc_hybrid_dense_parallel_cas.gt -o cpps/cc_hybrid_dense.cpp
 	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/bfs.gt -f ${GRAPHIT_SCHEDULE_DIR}/bfs_hybrid_dense_parallel_cas.gt -o cpps/bfs_hybrid_dense.cpp
@@ -50,9 +54,6 @@ graphit_files:
 
 cpps:  $(prd_cilk_cpps) $(bfs_cilk_cpps) $(cc_cilk_cpps) $(sssp_cilk_cpps) $(pr_cilk_cpps) openmp_only_cpps
 
-test:  ${cc_cilk_cpps} $(bfs_cilk_cpps) ${sssp_cilk_cpps}
-	$(PCC) $(OPENMP_FLAGS) -o bin/bfs_push_slq cpps/bfs_push_slq.cpp
-	$(PCC) $(OPENMP_FLAGS) -o bin/sssp_push_slq cpps/sssp_push_slq.cpp
 
 # this two cpps files can only be compiled with openmp to get parallel performance
 openmp_only_cpps:
diff --git a/graphit_eval/eval/table7/cpps/.gitignore b/graphit_eval/eval/table7/cpps/.gitignore
new file mode 100644
index 00000000..86d0cb27
--- /dev/null
+++ b/graphit_eval/eval/table7/cpps/.gitignore
@@ -0,0 +1,4 @@
+# Ignore everything in this directory
+*
+# Except this file
+!.gitignore
\ No newline at end of file
diff --git a/test/input_with_schedules/pagerank_delta_benchmark_cache.gt b/test/input_with_schedules/pagerank_delta_benchmark_cache.gt
index 04597fe1..6b3aa6ca 100644
--- a/test/input_with_schedules/pagerank_delta_benchmark_cache.gt
+++ b/test/input_with_schedules/pagerank_delta_benchmark_cache.gt
@@ -1,69 +1,7 @@
-element Vertex end
-element Edge end
-const edges : edgeset{Edge}(Vertex,Vertex) = load (argv[1]);
-const vertices : vertexset{Vertex} = edges.getVertices();
-const cur_rank : vector{Vertex}(double) = 0;
-const ngh_sum : vector{Vertex}(double) = 0.0;
-const delta : vector{Vertex}(double) = 1.0/vertices.size();
-const out_degree : vector {Vertex}(int) = edges.getOutDegrees();
-const damp : double = 0.85;
-const beta_score : double = (1.0 - damp) / vertices.size();
-const epsilon2 : double = 0.1;
-const epsilon : double = 0.0000001;
 
-func updateEdge(src : Vertex, dst : Vertex)
-    ngh_sum[dst] += delta[src] /out_degree[src];
-end
-
-func updateVertexFirstRound(v : Vertex) -> output : bool
-    delta[v] = damp*(ngh_sum[v]) + beta_score;
-    cur_rank[v] += delta[v];
-    delta[v] = delta[v] - 1.0/vertices.size();
-    output = (fabs(delta[v]) > epsilon2*cur_rank[v]);
-    ngh_sum[v] = 0;
-end
-
-func updateVertex(v : Vertex) -> output : bool
-   delta[v] = ngh_sum[v]*damp;
-   cur_rank[v]+= delta[v];
-   ngh_sum[v] = 0;
-   output = fabs(delta[v]) > epsilon2*cur_rank[v];
-end
-
-func printRank(v : Vertex)
-    print cur_rank[v];
-end
-
-func reset(v: Vertex)
-    cur_rank[v] = 0;
-    ngh_sum[v] = 0.0;
-    delta[v] = 1.0/vertices.size();
-end
-
-func main()
-    var n : int = edges.getVertices();
-    for trail in 0:10
-	startTimer();
-    	var frontier : vertexset{Vertex} = new vertexset{Vertex}(n);
-        vertices.apply(reset);
-
-    	for i in 1:11
-            #s1# edges.from(frontier).apply(updateEdge);
-            if i == 1
-                frontier = vertices.where(updateVertexFirstRound);
-            else
-		frontier = vertices.where(updateVertex);
-       	    end
-    	end
-
-     	var elapsed_time : double = stopTimer();
-     	print "elapsed time: ";
-     	print elapsed_time;
-    end
-end
 
 schedule:
     program->configApplyDirection("s1", "SparsePush-DensePull")->configApplyParallelization("s1","dynamic-vertex-parallel")
     ->configApplyDenseVertexSet("s1","bitvector", "src-vertexset", "DensePull");
     program->fuseFields("delta", "out_degree");
-    program->configApplyNumSSG("s1", "fixed-vertex-count",  10, "DensePull");
+    program->configApplyNumSSG("s1", "fixed-vertex-count",  "argv[2]", "DensePull");
diff --git a/test/input_with_schedules/pagerank_delta_hybrid_dense.gt b/test/input_with_schedules/pagerank_delta_hybrid_dense.gt
new file mode 100644
index 00000000..47efd483
--- /dev/null
+++ b/test/input_with_schedules/pagerank_delta_hybrid_dense.gt
@@ -0,0 +1,3 @@
+schedule:
+    program->configApplyDirection("s1", "SparsePush-DensePull")->configApplyParallelization("s1","dynamic-vertex-parallel");
+    program->fuseFields("delta", "out_degree");
diff --git a/test/input_with_schedules/pagerank_delta_hybrid_dense_parallel_bitvector.gt b/test/input_with_schedules/pagerank_delta_hybrid_dense_parallel_bitvector.gt
index dc8fd86c..b8ea64b5 100644
--- a/test/input_with_schedules/pagerank_delta_hybrid_dense_parallel_bitvector.gt
+++ b/test/input_with_schedules/pagerank_delta_hybrid_dense_parallel_bitvector.gt
@@ -1,5 +1,7 @@
 
 
 schedule:
-    program->configApplyDirection("s1", "SparsePush-DensePull")->configApplyParallelization("s1","dynamic-vertex-parallel")->configApplyDenseVertexSet("s1","bitvector", "src-vertexset", "DensePull");
+    program->configApplyDirection("s1", "SparsePush-DensePull")
+    ->configApplyParallelization("s1","dynamic-vertex-parallel")
+    ->configApplyDenseVertexSet("s1","bitvector", "src-vertexset", "DensePull");
     program->fuseFields("delta", "out_degree");
diff --git a/test/input_with_schedules/pagerank_delta_sparse_push_parallel.gt b/test/input_with_schedules/pagerank_delta_sparse_push_parallel.gt
index 7832ddf5..1bf87acd 100644
--- a/test/input_with_schedules/pagerank_delta_sparse_push_parallel.gt
+++ b/test/input_with_schedules/pagerank_delta_sparse_push_parallel.gt
@@ -1,5 +1,6 @@
 
 
 schedule:
-    program->configApplyDirection("s1", "SparsePush")->configApplyParallelization("s1","dynamic-vertex-parallel")->configApplyDenseVertexSet("s1","bitvector", "src-vertexset", "DensePull");
+    program->configApplyDirection("s1", "SparsePush")
+    ->configApplyParallelization("s1","dynamic-vertex-parallel");
     program->fuseFields("delta", "out_degree");

From 0052b11b52c0df61a7d07457ea2c27cf54437f2b Mon Sep 17 00:00:00 2001
From: Yunming <yunmingzhang@30-71-247.dynamic.csail.mit.edu>
Date: Tue, 13 Nov 2018 21:10:27 -0500
Subject: [PATCH 06/19] switching fromr regular expression to check to a weaker
 substring check because gcc 4.8.1 did not support regular expression

---
 src/frontend/high_level_schedule.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/frontend/high_level_schedule.cpp b/src/frontend/high_level_schedule.cpp
index 30f2dd2c..e8eea411 100644
--- a/src/frontend/high_level_schedule.cpp
+++ b/src/frontend/high_level_schedule.cpp
@@ -720,13 +720,15 @@ namespace graphit {
                         throw "Unsupported Schedule!";
                     }
 
-                    regex argv_regex ("argv\\[(\\d)\\]");
+                    // use string rfind insted of regular expression because gcc older than 4.9.0 does not support regular expression
+                    //regex argv_regex ("argv\\[(\\d)\\]");
 
                     // here we do a hack and uses a negative integer to denote the integer argument to argv
                     // the code generation will treat negative numbers differently by generating a argv[negative_integer) run time argument
                     // to use as number of segments
                     // the user input argv string has to match a pattern argv[integer]
-                    if (regex_match(num_segment_argv, argv_regex)){
+                    //if (regex_match(num_segment_argv, argv_regex)){
+                    if (num_segment_argv.rfind("argv[", 0) == 0){
                         argv_number = -1*extractIntegerFromString(num_segment_argv);
                     } else {
                         std::cerr <<  "Invalid string argument. It has to be of form argv[integer]" << std::endl;

From 1a8d3a3a63d444181613519681feb9eafae73a9e Mon Sep 17 00:00:00 2001
From: Yunming <yunmingzhang@30-71-247.dynamic.csail.mit.edu>
Date: Tue, 13 Nov 2018 21:41:43 -0500
Subject: [PATCH 07/19] a fix to the pagerankdelta

---
 apps/pagerankdelta.gt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/apps/pagerankdelta.gt b/apps/pagerankdelta.gt
index 08427496..aa465b4a 100644
--- a/apps/pagerankdelta.gt
+++ b/apps/pagerankdelta.gt
@@ -55,8 +55,8 @@ func main()
             else
                 output = vertices.where(updateVertex);
             end
-	    delete frontier;
-	    delete output;
+        delete frontier;
+        frontier = output;
         end
 	delete frontier;
 

From e8647aaf9806edcbc9edea3600b8e3ef6aa3b834 Mon Sep 17 00:00:00 2001
From: Yunming Zhang <zhangyunming1990@gmail.com>
Date: Wed, 14 Nov 2018 10:13:17 -0500
Subject: [PATCH 08/19] fixing a bug in applyModified flag not working

---
 src/midend/apply_expr_lower.cpp       | 4 +++-
 test/c++/high_level_schedule_test.cpp | 3 ++-
 test/c++/test.cpp                     | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/midend/apply_expr_lower.cpp b/src/midend/apply_expr_lower.cpp
index a0f42cb5..77d5757c 100644
--- a/src/midend/apply_expr_lower.cpp
+++ b/src/midend/apply_expr_lower.cpp
@@ -117,8 +117,10 @@ namespace graphit {
                     }
                 }
 
+                //if this is applyModified with a tracking field
                 if (edgeset_apply->tracking_field != "") {
-                    if (apply_schedule->second.deduplication_type == ApplySchedule::DeduplicationType::Enable) {
+                    // only enable deduplication when the argument to ApplyModified is True (disable deduplication), or the user manually set disable
+                    if (edgeset_apply->enable_deduplication && apply_schedule->second.deduplication_type == ApplySchedule::DeduplicationType::Enable) {
                         //only enable deduplication if there is needed for tracking
                         mir::to<mir::EdgeSetApplyExpr>(node)->enable_deduplication = true;
                     }
diff --git a/test/c++/high_level_schedule_test.cpp b/test/c++/high_level_schedule_test.cpp
index a1196cfb..693a2672 100644
--- a/test/c++/high_level_schedule_test.cpp
+++ b/test/c++/high_level_schedule_test.cpp
@@ -759,7 +759,8 @@ TEST_F(HighLevelScheduleTest, BFSPushSlidingQueueSchedule) {
             = std::make_shared<fir::high_level_schedule::ProgramScheduleNode>(context_);
 
     program->configApplyDirection("s1", "SparsePush");
-    program->setApply("s1", "sliding_queue")->configApplyParallelization("s1", "dynamic-vertex-parallel")->setApply("s1", "disable_deduplication");
+    program->setApply("s1", "sliding_queue")
+            ->configApplyParallelization("s1", "dynamic-vertex-parallel");
     //generate c++ code successfully
     EXPECT_EQ (0, basicTestWithSchedule(program));
     mir::FuncDecl::Ptr main_func_decl = mir_context_->getFunction("main");
diff --git a/test/c++/test.cpp b/test/c++/test.cpp
index f290c08a..e13ea0d7 100644
--- a/test/c++/test.cpp
+++ b/test/c++/test.cpp
@@ -38,7 +38,7 @@ int main(int argc, char **argv) {
 //
 //    ::testing::GTEST_FLAG(filter) = "LowLevelScheduleTest.SimpleApplyFunctionFusion";
 //
-//    ::testing::GTEST_FLAG(filter) = "HighLevelScheduleTest.SimpleLabelForVarDecl";
+//    ::testing::GTEST_FLAG(filter) = "HighLevelScheduleTest.BFSPushSlidingQueueSchedule";
 //    ::testing::GTEST_FLAG(filter) = "HighLevelScheduleTest.CCHybridDenseSchedule";
 //
 

From a30c742610ae1f5c879fdde59aeb9bbcd300b386 Mon Sep 17 00:00:00 2001
From: "zhangyunming1990@gmail.com" <zhangyunming1990@gmail.com>
Date: Wed, 14 Nov 2018 19:29:07 -0500
Subject: [PATCH 09/19] fixing a wrong binary used in the benchmark script

---
 graphit_eval/eval/table7/benchmark.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/graphit_eval/eval/table7/benchmark.py b/graphit_eval/eval/table7/benchmark.py
index 0a8d9389..0a61c138 100755
--- a/graphit_eval/eval/table7/benchmark.py
+++ b/graphit_eval/eval/table7/benchmark.py
@@ -31,12 +31,12 @@
                       "webGraph" : {"pr":"pagerank_pull_segment",
                                     "sssp" : "sssp_hybrid_denseforward",
                                     "cc" : "cc_hybrid_dense_bitvec_segment",
-                                    "bfs" :"bfs_hybrid_dense_bitvec_segment",
+                                    "bfs" :"bfs_hybrid_dense_bitvec",
                                     "prd" : "pagerankdelta_hybrid_dense_bitvec_segment"},
                       "friendster" : {"pr":"pagerank_pull_segment",
                                       "sssp" : "sssp_hybrid_denseforward",
                                       "cc" : "cc_hybrid_dense_bitvec_segment",
-                                      "bfs" :"bfs_hybrid_dense_bitvec_segment",
+                                      "bfs" :"bfs_hybrid_dense_bitvec",
                                       "prd" : "pagerankdelta_hybrid_dense_bitvec_segment"},
                       "road-usad" : {"pr":"pagerank_pull",
                                      "sssp" : "sssp_hybrid_denseforward",

From 4197093e76932b0b08e8d9219a1581be88f63939 Mon Sep 17 00:00:00 2001
From: "zhangyunming1990@gmail.com" <zhangyunming1990@gmail.com>
Date: Wed, 14 Nov 2018 19:45:25 -0500
Subject: [PATCH 10/19] updating the benchmark script with the binary

---
 graphit_eval/eval/table7/Makefile     | 1 +
 graphit_eval/eval/table7/benchmark.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/graphit_eval/eval/table7/Makefile b/graphit_eval/eval/table7/Makefile
index 60322984..0fd2c255 100644
--- a/graphit_eval/eval/table7/Makefile
+++ b/graphit_eval/eval/table7/Makefile
@@ -63,4 +63,5 @@ openmp_only_cpps:
 	$(PCC) $(CILK_FLAGS) -o bin/$@ $<
 
 clean:
+	rm cpps/*.cpp
 	rm bin/*
diff --git a/graphit_eval/eval/table7/benchmark.py b/graphit_eval/eval/table7/benchmark.py
index 0a61c138..c400a080 100755
--- a/graphit_eval/eval/table7/benchmark.py
+++ b/graphit_eval/eval/table7/benchmark.py
@@ -39,7 +39,7 @@
                                       "bfs" :"bfs_hybrid_dense_bitvec",
                                       "prd" : "pagerankdelta_hybrid_dense_bitvec_segment"},
                       "road-usad" : {"pr":"pagerank_pull",
-                                     "sssp" : "sssp_hybrid_denseforward",
+                                     "sssp" : "sssp_push_slq",
                                      "cc" : "cc_hybrid_dense",
                                      "bfs" :"bfs_push_slq",
                                      "prd" : "pagerankdelta_sparse_push"},

From 87bf1495992f1c0c3c1f647c764957638902fd63 Mon Sep 17 00:00:00 2001
From: "zhangyunming1990@gmail.com" <zhangyunming1990@gmail.com>
Date: Wed, 14 Nov 2018 21:56:25 -0500
Subject: [PATCH 11/19] fixing up the benchmarking script for collaborative
 filtering

---
 graphit_eval/eval/table7/Makefile                     |  8 ++++++--
 graphit_eval/eval/table7/benchmark.py                 | 11 +++++++++++
 .../cf_pull_parallel_load_balance_segment_argv.gt     |  4 ++++
 3 files changed, 21 insertions(+), 2 deletions(-)
 create mode 100644 test/input_with_schedules/cf_pull_parallel_load_balance_segment_argv.gt

diff --git a/graphit_eval/eval/table7/Makefile b/graphit_eval/eval/table7/Makefile
index 0fd2c255..a2695505 100644
--- a/graphit_eval/eval/table7/Makefile
+++ b/graphit_eval/eval/table7/Makefile
@@ -33,12 +33,16 @@ sssp_cilk_cpps = sssp_hybrid_denseforward
 cc_cilk_cpps = cc_hybrid_dense cc_hybrid_dense_bitvec_segment
 bfs_cilk_cpps = bfs_hybrid_dense bfs_hybrid_dense_bitvec 
 prd_cilk_cpps = pagerankdelta_hybrid_dense pagerankdelta_hybrid_dense_bitvec_segment pagerankdelta_sparse_push
+
+#this can only be compiled with icpc for now, not sure why
 cf_cilk_cpps = cf_pull_load_balance_segment 
 
-all: graphit_files cpps
+all: 
+	make graphit_files
+	make cpps
 
 graphit_files:
-	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/cf.gt -f ${GRAPHIT_SCHEDULE_DIR}/cf_pull_parallel_load_balance_segment.gt -o cpps/cf_pull_load_balance_segment.cpp
+	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/cf.gt -f ${GRAPHIT_SCHEDULE_DIR}/cf_pull_parallel_load_balance_segment_argv.gt -o cpps/cf_pull_load_balance_segment.cpp
 	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/pagerankdelta.gt -f ${GRAPHIT_SCHEDULE_DIR}/pagerank_delta_hybrid_dense.gt -o cpps/pagerankdelta_hybrid_dense.cpp
 	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/pagerankdelta.gt -f ${GRAPHIT_SCHEDULE_DIR}/pagerank_delta_benchmark_cache.gt -o cpps/pagerankdelta_hybrid_dense_bitvec_segment.cpp
 	python ${GRAPHITC_PY} -a ${GRAPHIT_APP_DIR}/pagerankdelta.gt -f ${GRAPHIT_SCHEDULE_DIR}/pagerank_delta_sparse_push_parallel.gt -o cpps/pagerankdelta_sparse_push.cpp
diff --git a/graphit_eval/eval/table7/benchmark.py b/graphit_eval/eval/table7/benchmark.py
index c400a080..e7fde8c3 100755
--- a/graphit_eval/eval/table7/benchmark.py
+++ b/graphit_eval/eval/table7/benchmark.py
@@ -105,6 +105,17 @@ def get_cmd_graphit(g, p, point):
             command += " 30"
         else:
             command += " 16"
+
+    if g in ["netflix", "netflix_2x"]:
+        if use_NUMACTL:
+            command = "numactl -i all " + command
+
+        #set the number of segments
+        if g == "netflix":
+            command += " 5"
+        else:
+            command += " 15"
+
     return command
 
 
diff --git a/test/input_with_schedules/cf_pull_parallel_load_balance_segment_argv.gt b/test/input_with_schedules/cf_pull_parallel_load_balance_segment_argv.gt
new file mode 100644
index 00000000..8af3af77
--- /dev/null
+++ b/test/input_with_schedules/cf_pull_parallel_load_balance_segment_argv.gt
@@ -0,0 +1,4 @@
+
+schedule:
+    program->configApplyDirection("s1", "DensePull")->configApplyParallelization("s1","dynamic-vertex-parallel")->setApply("s1", "pull_edge_based_load_balance");
+    program->configApplyNumSSG("s1", "fixed-vertex-count",  "argv[2]", "DensePull");

From 2ecf9672be3b88eafba270cd81f025bcf148be64 Mon Sep 17 00:00:00 2001
From: Yunming Zhang <zhangyunming1990@gmail.com>
Date: Wed, 14 Nov 2018 22:33:24 -0500
Subject: [PATCH 12/19] starting to update the documentation for reproducing
 GraphIt performance

---
 ...verview.md => GraphIt_Evaluation_Guide.md} | 138 +-----------------
 graphit_eval/README.md                        |   1 -
 2 files changed, 4 insertions(+), 135 deletions(-)
 rename graphit_eval/{artifact_eval_overview.md => GraphIt_Evaluation_Guide.md} (52%)
 delete mode 100644 graphit_eval/README.md

diff --git a/graphit_eval/artifact_eval_overview.md b/graphit_eval/GraphIt_Evaluation_Guide.md
similarity index 52%
rename from graphit_eval/artifact_eval_overview.md
rename to graphit_eval/GraphIt_Evaluation_Guide.md
index b7d3d110..d3cad6cc 100644
--- a/graphit_eval/artifact_eval_overview.md
+++ b/graphit_eval/GraphIt_Evaluation_Guide.md
@@ -1,139 +1,9 @@
-Overview for OOPSLA 2018 Artifact Evaluation
+#GraphIt Code Generation and Performance Evaluation Guide
 
-**GraphIt - A High-Performance Graph DSL**
+The following overview consists of a Step by Step Instructions explaining how to reproduce Figure 6 (PageRankDelta with different schedules) and Table 8 (GraphIt performance on our 2-socket machine) in the paper. We refer users to the [Getting Started Guide](https://github.com/GraphIt-DSL/graphit/blob/master/README.md ) for instructions to set up GraphIt. NOTE: the schedules we used here are almost certainly **NOT** the fastest schedules for your machine. **Please only use the instructions here as examples for writing and compiling different schedules, and tune schedules to best fit your machine's features, such as cache size, number of sockets, and number of cores.**
 
-The following overview consists of two parts: a Getting Started Guide that contains instructions for setting up GraphIt, running tests, compiling and running GraphIt programs; a Step by Step Instructions explaining how to reproduce some tables and figures in the paper.
 
-# Getting Started Guide
-
-The GraphIt compiler is available as an open source project under the MIT license at [github](https://github.com/yunmingzhang17/graphit) with documentation of the language available at [graphit-lang.org](http://graphit-lang.org/). It currently supports Linux and MacOS, but not Windows.
-
-To start, please download and uncompress the `oopsla_artifact.tar.gz` file. The `oopsla18_artifact` directory contains a VM with GraphIt installed, a compressed version of the master branch of GraphIt Github repository and two GraphIt graph files (socLive.sg and socLive.wsg for the Live Journal file). There is also an uncompressed version (`oopsla_artifact`) in the Dropbox directory in case there are issues with decompression. We have included a md5 hash of the compressed file in the directory.
-
-The GraphIt github repo (`graphit-master.zip`) and graph files (`socLive.sg` and `socLive.wsg`) in `oopsla18_artifact` are **not required** to complete the artifact evaluation. They can be used by reviewers who are interested in setting up GraphIt on a more powerful machine to replicate performance numbers from the paper. In addition to `oopsla_artifact.tar.gz`, we have also included an `additional_graphit_graphs` folder that contains larger test graphs. The `additional_graphit_graphs` is also **not necessary** for the artifact evaluation and is very large (30GB).
-
-## Set up the virtual machine
-
-For convenience, we provide a Linux VirtualBox VM image (OOPSLA2018) with GraphIt pre-installed, as well as the benchmarks we used to evaluate GraphIt in the paper. Instructions for downloading, installing, and using VirtualBox can be found at [virtualbox.org](http://virtualbox.org). The virtual machine needs 4GB of memory.
-
-Import the VM using the `Machine -> Add` menu in the VirtualBox application.  When the VM boots, log in with the `graphit` username. The **password is oopsla2018** (lower case). Once you have logged in you will see the directory under the home directory `~/OOPSLA_Artifact/graphit`. This directory contains a prebuilt version of GraphIt.
-
-## Manually download, build, and test GraphIt (optional)
-
-If you choose not to use the GraphIt version already installed in the VM, you can install GraphIt on your own machine. Simply clone the directory from [the github repository](https://github.com/yunmingzhang17/graphit), or use `graphit-master.zip` (a snapshot of the repository) in the artifact evaluation folder.
-
-### Dependencies
-
-To build GraphIt you need
-[CMake 3.5.0 or greater](http://www.cmake.org/cmake/resources/software.html). This dependency alone will allow you to build GraphIt and generate high-performance C++ implementations. Currently, we use Python 2.7 for the end-to-end tests.
-
-To compile the generated C++ implementations with support for parallelism, you need Cilk and OpenMP. One easy way to set up both Cilk and OpenMP is to use intel parallel compiler (icpc). The compiler is free for [students](https://software.intel.com/en-us/qualify-for-free-software/student). There are also open source Cilk (g++ >= 5.3.0 with support for Cilk Plus), and [OpenMP](https://www.openmp.org/resources/openmp-compilers-tools/) implementations.
-
-To use NUMA optimizations on multi-socket machines, libnuma needs to be installed (on Ubuntu, sudo apt-get install libnuma-dev). We do note, a good number of optimized implementations do not require enabling NUMA optimizations. You can give GraphIt a try even if you do not have libnuma installed.
-
-### Build GraphIt
-
-To perform an out-of-tree build of GraphIt do:
-
-After you have cloned the directory:
-
-```
-    cd graphit
-    mkdir build
-    cd build
-    cmake ..
-    make
-```
-Currently, we do require you to name the build directory `build` for the unit tests to work.
-
-## Basic Evaluation of GraphIt
-
-### Run Test Programs
-
-
-Once you have GraphIt set up (through the VM or manually installed), You can run the following test to verify the basic functionalities of GraphIt.
-
-To run the C++ test suite do (all tests should pass):
-
-```
-    cd build/bin
-    ./graphit_test
-```
-
-To run the Python end-to-end test suite:
-
-start at the top level graphit directory cloned from Github, NOT the build directory
-(All tests would pass, but some would generate error messages from the g++ compiler. This is expected.)
-Currently the project supports Python 2.x and not Python 3.x (the print syntax is different)
-
-```
-    cd graphit/test/python
-    python test.py
-    python test_with_schedules.py
-```
-
-When running `test_with_schedules.py`, commands used for compiling GraphIt files, compiling the generated C++ file, and running the compiled binary file are printed in stdout. You can reproduce each test and examine the generated C++ files by typing the printed commands in the shell (make sure you are in the `build/bin` directory). You can also selectively enable a specific test using the TestSuite commands. We provide examples of enabling a subset of Python tests in the comments of the main function of `test_with_schedules.py`.
-
-Note when running `test.py`, some error message may be printed during the run and they are **expected**. Please check the final output to see if tests passed. **Running `test.py` and  `test_with_schedules.py` in the VM might take 15-20 minutes**.
-
-### Compile GraphIt Programs
-
-GraphIt compiler currently generates a C++ output file from the .gt input GraphIt programs.
-To compile an input GraphIt file with schedules in the same file (assuming the build directory is in the root project directory). For now, graphitc.py ONLY works in the `build/bin` directory.
-
-```
-    cd build/bin
-    python graphitc.py -f ../../test/input_with_schedules/pagerank_benchmark_cache.gt -o test.cpp
-
-```
-To compile an input algorithm file and another separate schedule file (some of the test files have hardcoded paths to test inputs, be sure to modify that or change the directory you run the compiled files)
-
-The example below compiles the algorithm file (../../test/input/pagerank.gt), with a separate schedule file (../../test/input_with_schedules/pagerank_pull_parallel.gt)
-
-```
-   cd build/bin
-   python graphitc.py -a ../../test/input/pagerank_with_filename_arg.gt -f ../../test/input_with_schedules/pagerank_pull_parallel.gt -o test.cpp
-```
-
-### Compile and Run Generated C++ Programs
-
-To compile a serial version of the C++ program, you can use g++ with support of c++11 standard to compile the generated C++ file (assuming it is named test.cpp).
-
-```
-    #assuming you are still in the bin directory under build/bin. If not, just do cd build/bin from the root of the directory
-    g++ -std=c++11 -I ../../src/runtime_lib/ -O3 test.cpp  -o test
-    ./test ../../test/graphs/4.el
-```
-
-To compile a parallel version of the C++ program, you will need both Cilk and OpenMP. OpenMP is required for programs using NUMA optimized schedule (configApplyNUMA enabled) and static parallel optimizations (static-vertex-parallel option in configApplyParallelization). All other programs can be compiled with Cilk. For analyzing large graphs (e.g., Twitter, Friendster, WebGraph) on NUMA machines, `numactl -i all` improves the parallel performance. For smaller graphs, such as LiveJournal and Road graphs, not using numactl can be faster.
-
-```
-    #assuming you are still in the bin directory under build/bin. If not, just do cd build/bin from the root of the directory
-
-    #compile and run with Cilk
-    #icpc
-    icpc -std=c++11 -I ../../src/runtime_lib/ -DCILK -O3 test.cpp -o test
-    #g++ (gcc) with cilk support
-    g++ -std=c++11 -I ../../src/runtime_lib/ -DCILK -fcilkplus -lcilkrts -O3 test.cpp -o test
-    #run the compiled binary on a small test graph 4.el
-    numactl -i all ./test ../../test/graphs/4.el
-
-    #compile and run with OpenMP
-    #icpc
-    icpc -std=c++11 -I ../../src/runtime_lib/ -DOPENMP -qopenmp -O3 test.cpp -o test
-    #g++ (gcc) with openmp support
-    g++ -std=c++11 -I ../../src/runtime_lib/ -DOPENMP -fopenmp -O3 test.cpp -o test
-    #run the compiled binary on a small test graph 4.el
-    numactl -i all ./test ../../test/graphs/4.el
-
-```
-
-You should see some running times printed. The pagerank example files require a command-line argument for the input graph file. If you see a segfault, then it probably means you did not specify an input graph.
-
-
-# Step By Step Instructions
-
-## Reproducing Figure 6
+## Reproducing Figure 6 (PageRankDelta with different schedules)
 Figure 6 in the paper shows the different C++ code generated by applying different schedules to PageRankDelta. We have build a script to generate the code for PageRankDelta with different schedules and make sure the generated C++ code compiles successfully.
 
 **This script might run for 4-5 minutes as compiling GraphIt code currently is a bit slow. Please wait for a few minutes for the compilation process to finish.**
@@ -147,7 +17,7 @@ Figure 6 in the paper shows the different C++ code generated by applying differe
 
 The program should output the information on each schedule, print the generated C++ file to stdout, and save the generated file in .cpp files in the directory. The schedules we used are stored in `pagerankdelta_example/schedules`. We added a cache optimized schedule that was not included in the paper due to space constraints. This experiment demonstrates GraphIt's ability to compose together cache, direction, parallelization and data structure optimizations.
 
-## Reproducing Table 7 for GraphIt
+## Reproducing Table 8 for GraphIt
 Table 7 in the paper shows the performance numbers of GraphIt and other frameworks on 6 applications. Here we provide a script that can produce GraphIt's performance for PageRank, PageRankDelta, Breadth-First Search, Single Source Shortest Paths, and Connected Components. Collaborative Filtering is not included in the script as we do not have the right to distribute the Netflix dataset, but we leave instructions for reviewers to compile collaborative filtering in case Netflix dataset is available. Reviewers can download the other frameworks from their github repositories to replicate the performance of the other frameworks.
 
 ### Running GraphIt generated programs
diff --git a/graphit_eval/README.md b/graphit_eval/README.md
deleted file mode 100644
index 8ca92802..00000000
--- a/graphit_eval/README.md
+++ /dev/null
@@ -1 +0,0 @@
-This folder contains scripts to evaluate different aspects of the GraphIt compiler, such as code generation and performance.
\ No newline at end of file

From 48806abf80cfaef770236bec0a6350522021e17f Mon Sep 17 00:00:00 2001
From: "zhangyunming1990@gmail.com" <zhangyunming1990@gmail.com>
Date: Thu, 15 Nov 2018 10:15:14 -0500
Subject: [PATCH 13/19] adding a check for invalid log file

---
 graphit_eval/eval/table7/parse.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/graphit_eval/eval/table7/parse.py b/graphit_eval/eval/table7/parse.py
index 8a8680ef..0f6f3b86 100644
--- a/graphit_eval/eval/table7/parse.py
+++ b/graphit_eval/eval/table7/parse.py
@@ -30,7 +30,10 @@ def parse_result(log_file_name, app, time_key, delimiter, index, strip_end, divi
     with open(log_file_name) as f:
         content = f.readlines()
     content = [x.strip() for x in content]
-
+    # if the file is empty, don't try to parse it
+    if (len(content) < 3):
+        print "invalid log file" + log_file_name
+        return -1
     initial_inner_cnt = inner_cnt
     min_time = 10000000
     sum_time = 0

From f6b1bb45cbe729ba272973988f642c3ef8102408 Mon Sep 17 00:00:00 2001
From: Yunming <yunmingzhang@30-71-247.dynamic.csail.mit.edu>
Date: Thu, 15 Nov 2018 11:51:56 -0500
Subject: [PATCH 14/19] updating the evaluation guide with the new scripts

---
 graphit_eval/GraphIt_Evaluation_Guide.md | 103 ++++++++++++++---------
 1 file changed, 65 insertions(+), 38 deletions(-)

diff --git a/graphit_eval/GraphIt_Evaluation_Guide.md b/graphit_eval/GraphIt_Evaluation_Guide.md
index d3cad6cc..ac8e9dbc 100644
--- a/graphit_eval/GraphIt_Evaluation_Guide.md
+++ b/graphit_eval/GraphIt_Evaluation_Guide.md
@@ -1,9 +1,9 @@
-#GraphIt Code Generation and Performance Evaluation Guide
+# GraphIt Code Generation and Performance Evaluation Guide
 
-The following overview consists of a Step by Step Instructions explaining how to reproduce Figure 6 (PageRankDelta with different schedules) and Table 8 (GraphIt performance on our 2-socket machine) in the paper. We refer users to the [Getting Started Guide](https://github.com/GraphIt-DSL/graphit/blob/master/README.md ) for instructions to set up GraphIt. NOTE: the schedules we used here are almost certainly **NOT** the fastest schedules for your machine. **Please only use the instructions here as examples for writing and compiling different schedules, and tune schedules to best fit your machine's features, such as cache size, number of sockets, and number of cores.**
+The following overview consists of a Step by Step Instructions explaining how to reproduce Figure 6 (PageRankDelta with different schedules) and Table 8 (GraphIt performance on our 2-socket machine) in the paper. We refer users to the [Getting Started Guide](https://github.com/GraphIt-DSL/graphit/blob/master/README.md ) for instructions to set up GraphIt. **NOTE:** the schedules we used here are almost certainly **NOT** the fastest schedules for your machine. **Please only use the instructions here as examples for writing and compiling different schedules, and tune schedules to best fit your machine's features, such as cache size, number of sockets, and number of cores.**
 
 
-## Reproducing Figure 6 (PageRankDelta with different schedules)
+## Reproducing PageRankDelta with different schedules
 Figure 6 in the paper shows the different C++ code generated by applying different schedules to PageRankDelta. We have build a script to generate the code for PageRankDelta with different schedules and make sure the generated C++ code compiles successfully.
 
 **This script might run for 4-5 minutes as compiling GraphIt code currently is a bit slow. Please wait for a few minutes for the compilation process to finish.**
@@ -17,25 +17,68 @@ Figure 6 in the paper shows the different C++ code generated by applying differe
 
 The program should output the information on each schedule, print the generated C++ file to stdout, and save the generated file in .cpp files in the directory. The schedules we used are stored in `pagerankdelta_example/schedules`. We added a cache optimized schedule that was not included in the paper due to space constraints. This experiment demonstrates GraphIt's ability to compose together cache, direction, parallelization and data structure optimizations.
 
-## Reproducing Table 8 for GraphIt
-Table 7 in the paper shows the performance numbers of GraphIt and other frameworks on 6 applications. Here we provide a script that can produce GraphIt's performance for PageRank, PageRankDelta, Breadth-First Search, Single Source Shortest Paths, and Connected Components. Collaborative Filtering is not included in the script as we do not have the right to distribute the Netflix dataset, but we leave instructions for reviewers to compile collaborative filtering in case Netflix dataset is available. Reviewers can download the other frameworks from their github repositories to replicate the performance of the other frameworks.
+## Reproducing GraphIt Performance on 2-socekt Intel Xeon E5-2695 v3 CPUs with 30 MB LLC, TPH enabled.
+Table 8 in the paper shows the performance numbers of GraphIt for a few applications. Here we provide a script that can produce GraphIt's performance for PageRank, PageRankDelta, Breadth-First Search, Single Source Shortest Paths, and Connected Components. Collaborative Filtering currently only works 
+with Intel ICPC compiler. **NOTE:** the schedules we used here are almost certainly **NOT** the fastest schedules for your machine. **Please only use the instructions here as examples for writing and compiling different schedules, and tune schedules to best fit your machine's features, such as cache size, number of sockets, and number of cores.**
 
-### Running GraphIt generated programs
+### Generating the C++ files from GraphIt programs
 
-The following commands run the serial version of GraphIt on a small test graph (both the unweighted and weighted versions are in `graphit/graphit_eval/data/testGraph `) that is included in the repository. We have included the generated optimized C++ files for our dual-socket machine in the `graphit_eval/eval/table7/cpps` directory.
+The algorithms we used for benchmarking, such as PageRank, PageRankDelta, BFS, Connected Components, Single Source Shortest Paths and Collaborative Filtering are in the [apps](https://github.com/GraphIt-DSL/graphit/tree/master/apps) directory.
+These files include **ONLY** the algorithm and **NO** schedule. You need to use the appropriate schedules for the specific algorithm and input graph to get the best performance.
+
+In the [arxiv paper](https://arxiv.org/abs/1805.00923) (Table 8), we described the schedules used for each algorithm on each graph on a dual socket system with Intel Xeon E5-2695 v3 CPUs with 12 cores
+each for a total of 24 cores and 48 hyper-threads. The system has 128GB of DDR3-1600 memory
+and 30 MB last level cache on each socket, and runs with Transparent Huge Pages (THP) enabled. The best schedule for a different machine can be different. You might need to try a few different set of schedules for the best performance.
+
+In the schedules shown in Table 8, the keyword ’Program’ and the continuation symbol ’->’ are omitted. ’ca’ is the abbreviation for ’configApply’. Note that configApplyNumSSG uses an integer parameter (X) which is dependent on the graph size and the cache size of a system. For example, the complete schedule used for CC on Twitter graph is the following (X is tuned based on the cache size)
+
+```
+schedule:
+    program->configApplyDirection("s1", "SparsePush-DensePull")->configApplyParallelization("s1", "dynamic-vertex-parallel")->configApplyDenseVertexSet("s1","bitvector", "src-vertexset", "DensePull");
+    program->configApplyNumSSG("s1", "fixed-vertex-count",  X, "DensePull");
+```
+
+The **test/input** and **test/input\_with\_schedules** directories contain many examples of the algorithm and schedule files. Use them as references when writing your own schedule and generate C++ implementations. 
+
+Here we provide a script that will compile (displaying the commands used) the graphit programs (with .gt extensions) into C++ programs using the schedules shown in the paper. 
+
+```
+#start from graphit root directory
+cd  graphit/graphit_eval/eval/table7
+
+#automatically compile the graphit files (.gt) into C++ files with schedules used in the paper (from  graphit/test/input_with_schedules directory)
+make graphit_files
+
+```
+
+Here we show the abbreviated output of the script below. These are essentially the commands we used to compiler the graphit files using schedules in the test directory. The output cpp files are stored in graphit/graphit_eval/eval/table7/cpps. You can look at the schedules files here to figure out the schedules we used to get high performance for our machines.  
+```
+yunming:table7$ make graphit_files
+
+python ../../../build/bin/graphitc.py -a ../../../apps/cf.gt -f ../../../test/input_with_schedules/cf_pull_parallel_load_balance_segment_argv.gt -o cpps/cf_pull_load_balance_segment.cpp
+python ../../../build/bin/graphitc.py -a ../../../apps/pagerankdelta.gt -f ../../../test/input_with_schedules/pagerank_delta_hybrid_dense.gt -o cpps/pagerankdelta_hybrid_dense.cpp
+python ../../../build/bin/graphitc.py -a ../../../apps/pagerankdelta.gt -f ../../../test/input_with_schedules/pagerank_delta_benchmark_cache.gt -o cpps/pagerankdelta_hybrid_dense_bitvec_segment.cpp
+python ../../../build/bin/graphitc.py -a ../../../apps/pagerankdelta.gt -f ../../../test/input_with_schedules/pagerank_delta_sparse_push_parallel.gt -o cpps/pagerankdelta_sparse_push.cpp
+python ../../../build/bin/graphitc.py -a ../../../apps/cc.gt -f ../../../test/input_with_schedules/cc_benchmark_cache.gt -o cpps/cc_hybrid_dense_bitvec_segment.cpp
+...
+```
+
+### Running GraphIt generated programs with a single thread on a tiny graph
+
+The following commands run the **serial** version of GraphIt on a small test graph (both the unweighted and weighted versions are in `graphit/graphit_eval/data/testGraph `) that is included in the repository. We assume that you have already generated optimized C++ files for our dual-socket machine in the `graphit_eval/eval/table7/cpps` directory following the last step. (The names of the files generated need to match those used in the last step). Please use the Makefile here to figure out the commands we used to compile the serial C++ files.
 
 ```
 #start from graphit root directory
 cd  graphit_eval/eval/table7
 
 #first compile the generated cpp files
-make
+make cpps
 
 #run and benchmark the performance
 python table7_graphit.py
 ```
 
-The script first runs the benchmarks and then saves the outputs to the `graphit_eval/eval/table7/outputs/` directory. The benchmark script choose the binary based on the graph. Then a separate script parses the outputs to generate the final table of performance in the following form. The application and graph information are shown in the leftmost column, and the running times are shown in the second column in seconds.
+The script table7_graphit.py first runs the benchmarks and then saves the outputs to the `graphit_eval/eval/table7/outputs/` directory. The benchmark script choose the binary based on the graph. Then a separate script parses the outputs to generate the final table of performance in the following form. The application and graph information are shown in the leftmost column, and the running times are shown in the second column in seconds.
 
 ```
 {'graphit': {'testGraph': {'bfs': 3.3333333333333335e-07, 'pr': 1e-06, 'sssp': 5e-07, 'cc': 1e-06, 'prd': 3e-06}}}
@@ -53,11 +96,9 @@ Done parsing the run outputs
 ```
 These runs should complete very quickly.
 
-The performance in the VM does not reflect the actual performance because the VM has a single core and has limited memory. This script shows the ability for users / reviewers to replicate the performance on some hardware. Optional instructions on replicating the performance in a physical machine are described in later sections.
-
-### Running on additional graphs (optional)
+### Testing on larger graphs 
 
-We have provided a few slightly larger graphs for testing. In the folder we have socLive.sg (unweighted binary Live Journal graph), socLive.wsg (weighted binary Live Journal graph). Outside of the compressed file, we have road graph and Twitter graph in the `additional_graphit_graphs` directory. The VM has **insufficient memory** to run Live Journal and the additional graphs. We recommend running these graphs on a machine with at least 8 GB memory. Running the experiments on Twitter graph can potentially take a significant amount of time if your machine does not have a 100 GB memory and many cores. Running these other graphs with serial C++ implementations are very slow. Try to use the parallel implementations if possible (instructions given in later sections).
+We have provided a few slightly larger graphs for testing. In the folder we have socLive.sg (unweighted binary Live Journal graph), socLive.wsg (weighted binary Live Journal graph). Outside of the compressed file, we have road graph and Twitter graph in the [`additional_graphit_graphs`](https://www.dropbox.com/sh/1vm9guw2oudb37x/AADzVnBQ6dFnCaPOiwa_FnRNa?dl=0) link. Running the experiments on Twitter graph can potentially take a significant amount of time if your machine does not have a 100 GB memory and many cores. Running these other graphs with serial C++ implementations are very slow. **Please try to use the parallel implementations if possible (instructions given in later sections).**
 
 Below we first show the instructions for running the socLive (Live Journal) graph.
 
@@ -72,8 +113,9 @@ cp socLive.wsg graphit/graphit_eval/eval/data/socLive
 #start from graphit root directory
 cd  graphit_eval/eval/table7
 
-#first compile the generated cpp files
-make
+#first compile the graphit files and the generated cpp files
+make graphit_files
+make cpps
 
 #run and benchmark the performance
 python table7_graphit.py --graph socLive
@@ -107,30 +149,32 @@ python table7_graphit.py --graph road-usad twitter
 ```
 
 
-### Running parallel versions and replicating performance (optional)
+### Running parallel versions and replicating performance on our machine
 
 
-Here we list the instructions for compiling the generated C++ files using icpc or gcc with Cilk and OpenMP. The user mostly need to define a few variables for the Makefile.
+Here we list the instructions for compiling the generated C++ files using icpc or gcc with Cilk and OpenMP. The user mostly need to define a few variables for the Makefile.  **We used CILK for most of the files because the work-stealing performs bettern than untuned OPENMP schedule dynamic. For sssp_push_slq.cpp and bfs_push_slq.cpp, we had to use OPENMP for compilation as we needed features specific to OPENMP. The user can look at the Makefile, or the output of the Makefile to figure out the exact commands to compile each individual cpp file.**
 
 ```
 #start from graphit root directory
 cd  graphit_eval/eval/table7
 
 #compile with icpc if you installed the intel compiler
-make ICPC_PAR=1
+make ICPC_PAR=1 cpps
 
 #compile with gcc with Cilk and OpenMP
-make GCC_PAR=1
+make GCC_PAR=1 cpps
 
 #run and benchmark the performance
 python table7_graphit.py --graph socLive
 ```
 
-As we mentioned earlier, the VM is not a good place to replicate the performance numbers we reported in the paper. To replicate the performance, you will need to 1) use the parallel versions of the generated C++ programs 2) run them on a machine with similar configurations as ours. We used Intel Xeon E5-2695 v3 CPUs with 12 cores
+As we mentioned earlier, to replicate the performance, you will need to 1) use the parallel versions of the generated C++ programs 2) run them on a machine with similar configurations as ours. We used Intel Xeon E5-2695 v3 CPUs with 12 cores
 each for a total of 24 cores and 48 hyper-threads. The system has 128GB of DDR3-1600 memory
 and 30 MB last level cache on each socket, and runs with Transparent Huge Pages (THP) enabled. The generated C++ files are also not optimized for single-socket or 4-socket machines.
 
-### Generating, converting and testing graphs (optional)
+**If you are trying to evaluate GraphIt on a different machine, you need to tune the schedules to best fit your machine's features, such as cache size, number of sockets, and number of cores. These schedules and scripts are only meant to be examples of the compilation and running commands for you to look at.** 
+
+### Generating, converting and testing graphs
 
 GraphIt reuses [GAPBS input formats](https://github.com/sbeamer/gapbs). Specifically, we have tested with edge list file (.el), weighted edge list file (.wel), binary edge list (.sg), and weighted binary edge list (.wsg) formats. Users can use the converter in GAPBS (GAPBS/src/converter.cc) to convert other graph formats into the supported formats, or convert weighted and unweighted edge list files into their respective binary formats.
 
@@ -139,21 +183,4 @@ For the additional graphs, you can use the compiled binaries in the `graphit_eva
 To use the script for additional graphs, follow the example of socLive on creating directories in `graphit/graphit_eval/eval/data/`. However, certain graphs have to be named in a certain way in order to use our provided script. For example, road graph and Twitter graph need to be named as `road-usad` and `twitter`. Please take a look at `graphit_eval/eval/table7/benchmark.py` for more details.
 
 
-### Generating the C++ files from GraphIt programs (optional)
-
-The algorithms we used for benchmarking, such as PageRank, PageRankDelta, BFS, Connected Components, Single Source Shortest Paths and Collaborative Filtering are in the **apps** directory.
-These files include ONLY the algorithm and NO schedule. You need to use the appropriate schedules for the specific algorithm and input graph to get the best performance.
-
-In the [arxiv paper](https://arxiv.org/abs/1805.00923) (Table 8), we described the schedules used for each algorithm on each graph on a dual socket system with Intel Xeon E5-2695 v3 CPUs with 12 cores
-each for a total of 24 cores and 48 hyper-threads. The system has 128GB of DDR3-1600 memory
-and 30 MB last level cache on each socket, and runs with Transparent Huge Pages (THP) enabled. The best schedule for a different machine can be different. You might need to try a few different set of schedules for the best performance.
-
-In the schedules shown in Table 8, the keyword ’Program’ and the continuation symbol ’->’ are omitted. ’ca’ is the abbreviation for ’configApply’. Note that configApplyNumSSG uses an integer parameter (X) which is dependent on the graph size and the cache size of a system. For example, the complete schedule used for CC on Twitter graph is the following (X is tuned based on the cache size)
-
-```
-schedule:
-    program->configApplyDirection("s1", "SparsePush-DensePull")->configApplyParallelization("s1", "dynamic-vertex-parallel")->configApplyDenseVertexSet("s1","bitvector", "src-vertexset", "DensePull");
-    program->configApplyNumSSG("s1", "fixed-vertex-count",  X, "DensePull");
-```
 
-The **test/input** and **test/input\_with\_schedules** directories contain many examples of the algorithm and schedule files. Use them as references when writing your own schedule and generate C++ implementations.

From 72c85670ae9a2b2a9566514282e15b0949ce6018 Mon Sep 17 00:00:00 2001
From: Yunming <yunmingzhang@30-71-247.dynamic.csail.mit.edu>
Date: Thu, 15 Nov 2018 11:58:02 -0500
Subject: [PATCH 15/19] adding a reference to the more detailed evaluation
 instruction

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 4a08d784..a1a6c106 100644
--- a/README.md
+++ b/README.md
@@ -147,6 +147,8 @@ schedule:
 
 The **test/input** and **test/input\_with\_schedules** directories contain many examples of the algorithm and schedule files. Use them as references when writing your own schedule.
 
+ We provide **more detailed instructions on evaluating the code generation and performance capability** of GraphIt in **graphit/graphit_eval/GraphIt_Evaluation_Guide.md**. In the guide, we provide instructions for using a series of scripts that make it easeir for people to evaluate GraphIt.. 
+
 Input Graph Formats
 ===========
 

From e32f46f59fea8b24d10383647ccc9ec65c60843f Mon Sep 17 00:00:00 2001
From: Yunming <yunmingzhang@30-71-247.dynamic.csail.mit.edu>
Date: Thu, 15 Nov 2018 16:43:02 -0500
Subject: [PATCH 16/19] starting to update the documentation for autotuning

---
 autotune/apps/bfs_benchmark.gt |  2 +-
 autotune/graphit_autotuner.py  | 11 ++++---
 autotune/main.cpp              | 56 ----------------------------------
 3 files changed, 8 insertions(+), 61 deletions(-)
 delete mode 100644 autotune/main.cpp

diff --git a/autotune/apps/bfs_benchmark.gt b/autotune/apps/bfs_benchmark.gt
index ed8cb796..74764eed 100644
--- a/autotune/apps/bfs_benchmark.gt
+++ b/autotune/apps/bfs_benchmark.gt
@@ -27,7 +27,7 @@ func main()
     	parent[14] = 14;
 
     	while (frontier.getVertexSetSize() != 0)
-            #s1# frontier = edges.from(frontier).to(toFilter).applyModified(updateEdge,parent);
+            #s1# frontier = edges.from(frontier).to(toFilter).applyModified(updateEdge,parent, true);
     	end
 
         var elapsed_time : float = stopTimer();
diff --git a/autotune/graphit_autotuner.py b/autotune/graphit_autotuner.py
index 1c5c34bb..ba16a926 100644
--- a/autotune/graphit_autotuner.py
+++ b/autotune/graphit_autotuner.py
@@ -13,6 +13,9 @@
 from sys import exit
 import argparse
 
+py_graphitc_file = "../build/bin/graphitc.py"
+serial_compiler = "icc"
+par_compiler = "icpc"
 
 class GraphItTuner(MeasurementInterface):
     new_schedule_file_name = ''
@@ -167,15 +170,15 @@ def compile(self, cfg,  id):
 
 
         #compile the schedule file along with the original algorithm file
-        compile_graphit_cmd = 'python graphitc.py -a  {algo_file} -f {schedule_file} -i ../include/ -l ../build/lib/libgraphitlib.a  -o test.cpp'.format(algo_file=self.args.algo_file, schedule_file=self.new_schedule_file_name) 
+        compile_graphit_cmd = 'python ' + py_graphitc_file +  ' -a  {algo_file} -f {schedule_file} -i ../include/ -l ../build/lib/libgraphitlib.a  -o test.cpp'.format(algo_file=self.args.algo_file, schedule_file=self.new_schedule_file_name) 
 
         if not self.use_NUMA:
             if not self.enable_parallel_tuning:
                 # if parallel icpc compiler is not needed (only tuning serial schedules)
-                compile_cpp_cmd = 'g++ -std=c++11  -I ../src/runtime_lib/ -O3  test.cpp -o test'
+                compile_cpp_cmd = serial_compiler + ' -std=c++11  -I ../src/runtime_lib/ -O3  test.cpp -o test'
             else:
                 # if parallel icpc compiler is supported and needed
-                compile_cpp_cmd = 'icpc -std=c++11 -DCILK  -I ../src/runtime_lib/ -O3  test.cpp -o test'
+                compile_cpp_cmd = par_compiler + ' -std=c++11 -DCILK  -I ../src/runtime_lib/ -O3  test.cpp -o test'
         else:
             #add the additional flags for NUMA
             compile_cpp_cmd = 'icpc -std=c++11 -DOPENMP -lnuma -DNUMA -qopenmp -I ../src/runtime_lib/ -O3  test.cpp -o test'
@@ -307,7 +310,7 @@ def save_final_config(self, configuration):
     parser = argparse.ArgumentParser(parents=opentuner.argparsers())
     parser.add_argument('--graph', type=str, default="../test/graphs/4.sg",
                     help='the graph to tune on')
-    parser.add_argument('--enable_NUMA_tuning', type=int, default=1, help='enable tuning NUMA-aware schedules. 1 for enable (default), 0 for disable')
+    parser.add_argument('--enable_NUMA_tuning', type=int, default=0, help='enable tuning NUMA-aware schedules. 1 for enable (default), 0 for disable')
     parser.add_argument('--enable_parallel_tuning', type=int, default=1, help='enable tuning paralleliation schedules. 1 for enable (default), 0 for disable')
     parser.add_argument('--enable_denseVertexSet_tuning', type=int, default=1, help='enable tuning denseVertexSet schedules. 1 for enable (default), 0 for disable')
     parser.add_argument('--algo_file', type=str, required=True, help='input algorithm file')
diff --git a/autotune/main.cpp b/autotune/main.cpp
deleted file mode 100644
index aeaf9e54..00000000
--- a/autotune/main.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-#include <graphit/frontend/scanner.h>
-#include <graphit/midend/midend.h>
-#include <fstream>
-#include <graphit/frontend/frontend.h>
-
-#include <graphit/utils/command_line.h>
-#include <graphit/backend/backend.h>
-#include <graphit/frontend/error.h>
-#include <fstream>
-#include <graphit/frontend/high_level_schedule.h>
-
-using namespace graphit;
-
-int main(int argc, char* argv[]) {
-    // Set up various data structures
-    CLBase cli(argc, argv, "graphit compiler");
-    graphit::FIRContext* context = new graphit::FIRContext();
-    std::vector<ParseError> * errors = new std::vector<ParseError>();
-    Frontend * fe = new Frontend();
-    graphit::MIRContext* mir_context  = new graphit::MIRContext();
-
-    //parse the arguments
-    if (!cli.ParseArgs())
-        return -1;
-
-    //read input file into buffer
-    std::ifstream file(cli.input_filename());
-    std::stringstream buffer;
-    if(!file) {
-        std::cout << "error reading the input file" << std::endl;
-    }
-    buffer << file.rdbuf();
-    file.close();
-
-    //set up the output file
-    std::ofstream output_file;
-    output_file.open(cli.output_filename());
-
-    //compile the input file
-    fe->parseStream(buffer, context, errors);
-
-    fir::high_level_schedule::ProgramScheduleNode::Ptr program
-            = std::make_shared<fir::high_level_schedule::ProgramScheduleNode>(context);
-    //insert schedule here
-
-    graphit::Midend* me = new graphit::Midend(context, program->getSchedule());
-    me->emitMIR(mir_context);
-    graphit::Backend* be = new graphit::Backend(mir_context);
-    be->emitCPP(output_file);
-
-    output_file.close();
-
-    return 0;
-
-}
-

From c144c83fb0b9ef9acd3e6aead88d2bb4cdae09cf Mon Sep 17 00:00:00 2001
From: Yunming <yunmingzhang@30-71-247.dynamic.csail.mit.edu>
Date: Thu, 15 Nov 2018 18:02:03 -0500
Subject: [PATCH 17/19] adding pointers in the main README file, updating the
 autotune readme file a bit more

---
 README.md                      |  5 ++++
 autotune/README.md             | 44 ++++++++++++++++++++++++++++++++++
 autotune/apps/bfs_benchmark.gt |  2 +-
 3 files changed, 50 insertions(+), 1 deletion(-)
 create mode 100644 autotune/README.md

diff --git a/README.md b/README.md
index a1a6c106..2126fdd3 100644
--- a/README.md
+++ b/README.md
@@ -155,3 +155,8 @@ Input Graph Formats
 GraphIt reuses [GAPBS input formats](https://github.com/sbeamer/gapbs). Specifically, we have tested with edge list file (.el), weighted edge list file (.wel), binary edge list (.sg), and weighted binary edge list (.wsg) formats. Users can use the converters in GAPBS (GAPBS/src/converter.cc) to convert other graph formats into the supported formats, or convert weighted and unweighted edge list files into their respective binary formats. 
 
 We have provided sample input graph files in the `graphit/test/graphs/` directory. The python tests use the sample input files. 
+
+Autotuning GraphIt Schedules
+===========
+Pleaes refer to **README.md** in **graphit/auotune** for more details. 
+The auotuner is still somehwat experimental. Please read the instructions carefully before trying it out. 
\ No newline at end of file
diff --git a/autotune/README.md b/autotune/README.md
new file mode 100644
index 00000000..00237bd9
--- /dev/null
+++ b/autotune/README.md
@@ -0,0 +1,44 @@
+GraphIt Auotuner
+=========
+The autotuner for GraphIt aims to automatically find the best schedules for a given input algorithm specification and input graph. The autotuner is built on top of OpenTuner to efficiently search for the best combination of schedules (optimizations). **Currently, the autotuner still requires the user to place a label "s1" on the operator that nees to be tuned.**
+
+The autotuner is still very experimental. Please read the instructions below carefully before using it. 
+
+Dependencies
+-------------------
+
+
+Please first install [OpenTuner](https://github.com/jansel/opentuner). GraphIt's autotuner is built on top of OpenTuner. 
+
+Autotune Schedules
+-------------------
+
+Currently it is the easiet to use the graphit files under autotune/apps because we need to have hardcoded starting points for the BFS and SSSP. If you want to change the starting point for BFS or SSSP, simply edit bfs_benmark.gt or sssp_benchmark.gt. 
+
+
+To tune the performance of bfs on the small test graph 4.el, use the following command. `--algo_file` specifies the algorithm file, and `--graph` specifies the input graph to tune on. The autotuner have options that enable/disable parallelization, enable/disable NUMA optimizations (by default we disabled NUMA optimization), and other options such as setting an upper bound on the number of segments to use for configNumSSG. We usually put a **time limt** on tuning with the `--stop-after`  option. Even for programs that run on a large input graph, we can usually finish within 5000 seconds. 
+```
+#change into the directory for autotuner
+cd graphit/auotune
+
+#autotune serial pagerank with 4.el graph for 10 seconds
+python graphit_autotuner.py --enable_parallel_tuning 0 --algo_file apps/pagerank_benchmark.gt --graph ../test/graphs/4.el --stop-after 10
+
+#autotune parallel pagerank with 4.el graph for 10 seconds
+python graphit_autotuner.py --enable_parallel_tuning 1 --algo_file apps/pagerank_benchmark.gt --graph ../test/graphs/4.el --stop-after 10
+
+```
+
+To see all the options  
+```
+python graphit_autotuner.py -h
+```
+
+The final result will be displayed as a configuration in the standard output, and also in `final_config.json`. One example is shown below. You can then translate that into the scheduling commands. At this time, this translation step still has to be done manually. 
+```
+('Final Configuration:', {'parallelization': 'serial', 'direction': 'SparsePush', 'numSSG': 5, 'DenseVertexSet': 'boolean-array'})
+```
+
+You can also set the serial and parallel compiler used for the C++ files in the graphit_autotune.py file by modifying the variables `serial_compiler` and `par_compiler`.
+
+Currently, the autotuner can not tune the data layout. So the user would need to provide schedules for fusing together different vectors. The user can provide some scheduling commands in a file as a necessary schedule using the `--default_schedule_file`  option. 
\ No newline at end of file
diff --git a/autotune/apps/bfs_benchmark.gt b/autotune/apps/bfs_benchmark.gt
index 74764eed..147d47ae 100644
--- a/autotune/apps/bfs_benchmark.gt
+++ b/autotune/apps/bfs_benchmark.gt
@@ -24,7 +24,7 @@ func main()
         vertices.apply(reset);
     	var frontier : vertexset{Vertex} = new vertexset{Vertex}(0);
     	frontier.addVertex(14);
-    	parent[14] = 14;
+    	parent[5] = 5;
 
     	while (frontier.getVertexSetSize() != 0)
             #s1# frontier = edges.from(frontier).to(toFilter).applyModified(updateEdge,parent, true);

From 445d166909f8c5bfd49dc600a902a89b719c3df3 Mon Sep 17 00:00:00 2001
From: Yunming <yunmingzhang@30-71-247.dynamic.csail.mit.edu>
Date: Thu, 15 Nov 2018 18:25:50 -0500
Subject: [PATCH 18/19] fixing a bug when generating DensePush-SparsePush
 schedule for BFS

---
 src/backend/gen_edge_apply_func_decl.cpp      | 10 +++++-----
 test/c++/high_level_schedule_test.cpp         | 20 +++++++++++++++++++
 test/c++/test.cpp                             |  2 +-
 .../bfs_hybrid_denseforward_serial.gt         |  3 +++
 4 files changed, 29 insertions(+), 6 deletions(-)
 create mode 100644 test/input_with_schedules/bfs_hybrid_denseforward_serial.gt

diff --git a/src/backend/gen_edge_apply_func_decl.cpp b/src/backend/gen_edge_apply_func_decl.cpp
index 89fa3618..9b93d617 100644
--- a/src/backend/gen_edge_apply_func_decl.cpp
+++ b/src/backend/gen_edge_apply_func_decl.cpp
@@ -790,16 +790,16 @@ namespace graphit {
             oss_ << "next[" << dst_type <<  "] = 1; " << std::endl;
             dedent();
             printIndent();
-            oss_ << "}" << std::endl;
+            oss_ << "} //end of generating the next frontier" << std::endl;
         }
 
 
 
-        // end of from filtering
-        if (apply->from_func != "" && !from_vertexset_specified) {
+        // end of to filtering
+        if (apply->to_func != "") {
             dedent();
             printIndent();
-            oss_ << "}" << std::endl;
+            oss_ << "} // end of if to_func filtering" << std::endl;
         }
 
 
@@ -810,7 +810,7 @@ namespace graphit {
         if (apply->from_func != "") {
             dedent();
             printIndent();
-            oss_ << "} // end of if for from func or from vertexset" << std::endl;
+            oss_ << "} // end of if for from_func or from vertexset" << std::endl;
         }
 
         dedent();
diff --git a/test/c++/high_level_schedule_test.cpp b/test/c++/high_level_schedule_test.cpp
index 693a2672..bb2330fa 100644
--- a/test/c++/high_level_schedule_test.cpp
+++ b/test/c++/high_level_schedule_test.cpp
@@ -1355,6 +1355,26 @@ TEST_F(HighLevelScheduleTest, SimpleBFSWithHyrbidDenseParallelCASSchedule){
     EXPECT_EQ(true, apply_expr->is_parallel);
 }
 
+TEST_F(HighLevelScheduleTest, SimpleBFSWithHyrbidDenseForwardSerialSchedule){
+    fir::high_level_schedule::ProgramScheduleNode::Ptr program_schedule_node
+            = std::make_shared<fir::high_level_schedule::ProgramScheduleNode>(context_);
+    program_schedule_node->configApplyDirection("s1", "DensePush-SparsePush")
+    ->configApplyParallelization("s1", "serial");
+    istringstream is (bfs_str_);
+    fe_->parseStream(is, context_, errors_);
+
+    EXPECT_EQ (0,  basicTestWithSchedule(program_schedule_node));
+
+    mir::FuncDecl::Ptr main_func_decl = mir_context_->getFunction("main");
+    mir::WhileStmt::Ptr while_stmt = mir::to<mir::WhileStmt>((*(main_func_decl->body->stmts))[2]);
+    mir::AssignStmt::Ptr assign_stmt = mir::to<mir::AssignStmt>((*(while_stmt->body->stmts))[0]);
+
+    //check that the apply expr is push and parallel
+    EXPECT_EQ(true, mir::isa<mir::HybridDenseForwardEdgeSetApplyExpr>(assign_stmt->expr));
+    mir::HybridDenseForwardEdgeSetApplyExpr::Ptr apply_expr = mir::to<mir::HybridDenseForwardEdgeSetApplyExpr>(assign_stmt->expr);
+    EXPECT_EQ(false, apply_expr->is_parallel);
+}
+
 TEST_F(HighLevelScheduleTest, BFSWithPullParallelSchedule){
     fir::high_level_schedule::ProgramScheduleNode::Ptr program_schedule_node
             = std::make_shared<fir::high_level_schedule::ProgramScheduleNode>(context_);
diff --git a/test/c++/test.cpp b/test/c++/test.cpp
index e13ea0d7..3a94176b 100644
--- a/test/c++/test.cpp
+++ b/test/c++/test.cpp
@@ -38,7 +38,7 @@ int main(int argc, char **argv) {
 //
 //    ::testing::GTEST_FLAG(filter) = "LowLevelScheduleTest.SimpleApplyFunctionFusion";
 //
-//    ::testing::GTEST_FLAG(filter) = "HighLevelScheduleTest.BFSPushSlidingQueueSchedule";
+//    ::testing::GTEST_FLAG(filter) = "HighLevelScheduleTest.SimpleBFSWithHyrbidDenseForwardSerialSchedule";
 //    ::testing::GTEST_FLAG(filter) = "HighLevelScheduleTest.CCHybridDenseSchedule";
 //
 
diff --git a/test/input_with_schedules/bfs_hybrid_denseforward_serial.gt b/test/input_with_schedules/bfs_hybrid_denseforward_serial.gt
new file mode 100644
index 00000000..f730891b
--- /dev/null
+++ b/test/input_with_schedules/bfs_hybrid_denseforward_serial.gt
@@ -0,0 +1,3 @@
+schedule:
+    program->configApplyDirection("s1", "DensePush-SparsePush");
+    program->configApplyParallelization("s1", "serial");
\ No newline at end of file

From 862dcb18a93620a496e9dad04af431673764197d Mon Sep 17 00:00:00 2001
From: Yunming Zhang <zhangyunming1990@gmail.com>
Date: Sat, 17 Nov 2018 15:04:10 -0500
Subject: [PATCH 19/19] updating the evaluation instructions based on Ajay's
 feedback

---
 graphit_eval/GraphIt_Evaluation_Guide.md | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/graphit_eval/GraphIt_Evaluation_Guide.md b/graphit_eval/GraphIt_Evaluation_Guide.md
index ac8e9dbc..1ba231dd 100644
--- a/graphit_eval/GraphIt_Evaluation_Guide.md
+++ b/graphit_eval/GraphIt_Evaluation_Guide.md
@@ -1,12 +1,10 @@
 # GraphIt Code Generation and Performance Evaluation Guide
 
-The following overview consists of a Step by Step Instructions explaining how to reproduce Figure 6 (PageRankDelta with different schedules) and Table 8 (GraphIt performance on our 2-socket machine) in the paper. We refer users to the [Getting Started Guide](https://github.com/GraphIt-DSL/graphit/blob/master/README.md ) for instructions to set up GraphIt. **NOTE:** the schedules we used here are almost certainly **NOT** the fastest schedules for your machine. **Please only use the instructions here as examples for writing and compiling different schedules, and tune schedules to best fit your machine's features, such as cache size, number of sockets, and number of cores.**
+**This instruction assumes that the users have followed the [Getting Started Guide](https://github.com/GraphIt-DSL/graphit/blob/master/README.md ) to set up GraphIt.** The following overview consists of a Step by Step Instructions explaining how to reproduce Figure 6 (PageRankDelta with different schedules) and Table 8 (GraphIt performance on our 2-socket machine) in the paper.  **NOTE:** the schedules we used here are almost certainly **NOT** the fastest schedules for your machine. **Please only use the instructions here as examples for writing and compiling different schedules, and tune schedules to best fit your machine's features, such as cache size, number of sockets, and number of cores.**
 
 
 ## Reproducing PageRankDelta with different schedules
-Figure 6 in the paper shows the different C++ code generated by applying different schedules to PageRankDelta. We have build a script to generate the code for PageRankDelta with different schedules and make sure the generated C++ code compiles successfully.
-
-**This script might run for 4-5 minutes as compiling GraphIt code currently is a bit slow. Please wait for a few minutes for the compilation process to finish.**
+Figure 6 in the paper shows the different C++ code generated by applying different schedules to PageRankDelta. We have built a script to generate the code for PageRankDelta with different schedules and make sure the generated C++ code compiles successfully.
 
 ```
    #start from graphit root directory
@@ -51,7 +49,7 @@ make graphit_files
 
 ```
 
-Here we show the abbreviated output of the script below. These are essentially the commands we used to compiler the graphit files using schedules in the test directory. The output cpp files are stored in graphit/graphit_eval/eval/table7/cpps. You can look at the schedules files here to figure out the schedules we used to get high performance for our machines.  
+Here we show the abbreviated output of the script below. These are essentially the commands we used to compile the graphit files using schedules in the test directory. The output cpp files are stored in graphit/graphit_eval/eval/table7/cpps. You can look at the schedules files here to figure out the schedules we used to get high performance for our machines.  
 ```
 yunming:table7$ make graphit_files
 
@@ -98,7 +96,7 @@ These runs should complete very quickly.
 
 ### Testing on larger graphs 
 
-We have provided a few slightly larger graphs for testing. In the folder we have socLive.sg (unweighted binary Live Journal graph), socLive.wsg (weighted binary Live Journal graph). Outside of the compressed file, we have road graph and Twitter graph in the [`additional_graphit_graphs`](https://www.dropbox.com/sh/1vm9guw2oudb37x/AADzVnBQ6dFnCaPOiwa_FnRNa?dl=0) link. Running the experiments on Twitter graph can potentially take a significant amount of time if your machine does not have a 100 GB memory and many cores. Running these other graphs with serial C++ implementations are very slow. **Please try to use the parallel implementations if possible (instructions given in later sections).**
+We have provided a few slightly larger graphs for testing in the Dropbox folder [`additional_graphit_graphs`](https://www.dropbox.com/sh/1vm9guw2oudb37x/AADzVnBQ6dFnCaPOiwa_FnRNa?dl=0). In the folder we have socLive.sg (unweighted binary Live Journal graph), socLive.wsg (weighted binary Live Journal graph), road graph (binary unweighted and weighted), and Twitter graph (binary unweighted and weighted). The weights are currently just set to 1. Running the experiments on Twitter graph can potentially take a significant amount of time if your machine does not have a 100 GB memory and many cores. Running these other graphs with serial C++ implementations are very slow. **Please try to use the parallel implementations if possible (instructions given in later sections).**
 
 Below we first show the instructions for running the socLive (Live Journal) graph.
 
@@ -149,7 +147,7 @@ python table7_graphit.py --graph road-usad twitter
 ```
 
 
-### Running parallel versions and replicating performance on our machine
+### Running parallel versions and replicating performance on a machine similar to our configuration
 
 
 Here we list the instructions for compiling the generated C++ files using icpc or gcc with Cilk and OpenMP. The user mostly need to define a few variables for the Makefile.  **We used CILK for most of the files because the work-stealing performs bettern than untuned OPENMP schedule dynamic. For sssp_push_slq.cpp and bfs_push_slq.cpp, we had to use OPENMP for compilation as we needed features specific to OPENMP. The user can look at the Makefile, or the output of the Makefile to figure out the exact commands to compile each individual cpp file.**
@@ -158,6 +156,9 @@ Here we list the instructions for compiling the generated C++ files using icpc o
 #start from graphit root directory
 cd  graphit_eval/eval/table7
 
+#remove previously compiled binaries
+make clean
+
 #compile with icpc if you installed the intel compiler
 make ICPC_PAR=1 cpps