sort files and separate factor with datagen (#111)

* sort files * update tools markdown doc * seperate factor and generate * add note
ldbc · Sep 29, 2024 · 946fc48 · 946fc48
1 parent 7599200
commit 946fc48
Show file tree

Hide file tree

Showing 117 changed files with 281 additions and 266 deletions.
diff --git a/scripts/run_local.sh b/scripts/run_local.sh
@@ -3,12 +3,19 @@
 LDBC_FINBENCH_DATAGEN_JAR=target/ldbc_finbench_datagen-0.2.0-SNAPSHOT-jar-with-dependencies.jar
 OUTPUT_DIR=out
 
+# Note: generate factor tables with --generate-factors
+
 # run locally with the python script
 # time python3 scripts/run.py --jar $LDBC_FINBENCH_DATAGEN_JAR --main-class ldbc.finbench.datagen.LdbcDatagen --memory 500g -- --scale-factor 30 --output-dir ${OUTPUT_DIR}
 
 # run locally with spark-submit command
 # **({'spark.driver.extraJavaOptions': '-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005'}), # Debug
 # **({'spark.executor.extraJavaOptions': '-verbose:gc -XX:+UseG1GC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps'}),
+# --conf "spark.memory.offHeap.enabled=true" \
+# --conf "spark.memory.offHeap.size=100g" \
+# --conf "spark.storage.memoryFraction=0" \
+# --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \
+
 time spark-submit --master local[*] \
     --class ldbc.finbench.datagen.LdbcDatagen \
     --driver-memory 480g \
@@ -24,3 +31,20 @@ time spark-submit --master local[*] \
     ${LDBC_FINBENCH_DATAGEN_JAR} \
     --scale-factor 10 \
     --output-dir ${OUTPUT_DIR}
+
+# currently works on SF100
+#time spark-submit --master local[*] \
+#    --class ldbc.finbench.datagen.LdbcDatagen \
+#    --driver-memory 400g \
+#    --conf "spark.default.parallelism=800" \
+#    --conf "spark.shuffle.compress=true" \
+#    --conf "spark.shuffle.spill.compress=true" \
+#    --conf "spark.kryoserializer.buffer.max=512m" \
+#    --conf "spark.driver.maxResultSize=0" \
+#    --conf "spark.driver.extraJavaOptions=-Xss512m" \
+#    --conf "spark.executor.extraJavaOptions=-Xss512m -XX:+UseG1GC" \
+#    --conf "spark.kryo.referenceTracking=false" \
+#    ${LDBC_FINBENCH_DATAGEN_JAR} \
+#    --scale-factor 100 \
+#    --output-dir ${OUTPUT_DIR}
+
diff --git a/src/main/scala/ldbc/finbench/datagen/LdbcDatagen.scala b/src/main/scala/ldbc/finbench/datagen/LdbcDatagen.scala
@@ -1,9 +1,7 @@
 package ldbc.finbench.datagen
 
 import ldbc.finbench.datagen.factors.FactorGenerationStage
-import ldbc.finbench.datagen.generation.dictionary.Dictionaries
 import ldbc.finbench.datagen.generation.GenerationStage
-import ldbc.finbench.datagen.transformation.TransformationStage
 import ldbc.finbench.datagen.util.{Logging, SparkApp}
 import shapeless.lens
 
@@ -24,7 +22,7 @@ object LdbcDatagen extends SparkApp with Logging {
       format: String = "csv",
       formatOptions: Map[String, String] = Map.empty,
       epochMillis: Boolean = false,
-      generateFactors: Boolean = true,
+      generateFactors: Boolean = false,
       factorFormat: String = "parquet"
   )
 
@@ -121,22 +119,22 @@ object LdbcDatagen extends SparkApp with Logging {
   }
 
   override def run(args: ArgsType): Unit = {
-    val generationArgs = GenerationStage.Args(
-      scaleFactor = args.scaleFactor,
-      outputDir = args.outputDir,
-      format = args.format,
-      partitionsOpt = args.numPartitions
-    )
-    log.info("[Main] Starting generation stage")
-    GenerationStage.run(generationArgs)
-
-    if (args.generateFactors) {
-      val factorArgs = FactorGenerationStage.Args(
-        outputDir = args.outputDir,
-        format = args.factorFormat
+    if (!args.generateFactors) {
+      GenerationStage.run(
+        GenerationStage.Args(
+          scaleFactor = args.scaleFactor,
+          outputDir = args.outputDir,
+          format = args.format,
+          partitionsOpt = args.numPartitions
+        )
+      )
+    } else {
+      FactorGenerationStage.run(
+        FactorGenerationStage.Args(
+          outputDir = args.outputDir,
+          format = args.factorFormat
+        )
       )
-      log.info("[Main] Starting factoring stage")
-//       FactorGenerationStage.run(factorArgs)
     }
   }
 }
diff --git a/src/main/scala/ldbc/finbench/datagen/factors/FactorGenerationStage.scala b/src/main/scala/ldbc/finbench/datagen/factors/FactorGenerationStage.scala
@@ -1,5 +1,6 @@
 package ldbc.finbench.datagen.factors
 
+import ldbc.finbench.datagen.LdbcDatagen.log
 import ldbc.finbench.datagen.util.DatagenStage
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.{DataFrame, SparkSession, functions => F}
@@ -10,7 +11,6 @@ import shapeless.lens
 import scala.util.matching.Regex
 
 object FactorGenerationStage extends DatagenStage {
-
   @transient lazy val log: Logger = LoggerFactory.getLogger(this.getClass)
 
   case class Args(
@@ -64,14 +64,14 @@ object FactorGenerationStage extends DatagenStage {
     run(parsedArgs)
   }
 
-  // execute factorization process
-  // TODO: finish all
+
   override def run(args: Args) = {
-    parameterCuration(args)
+    factortables(args)
   }
 
-  def parameterCuration(args: Args)(implicit spark: SparkSession) = {
+  def factortables(args: Args)(implicit spark: SparkSession) = {
     import spark.implicits._
+    log.info("[Main] Starting factoring stage")
 
     val transferRDD = spark.read
       .format("org.apache.spark.sql.execution.datasources.csv.CSVFileFormat")
@@ -533,6 +533,5 @@ object FactorGenerationStage extends DatagenStage {
       .option("delimiter", "|")
       .format("org.apache.spark.sql.execution.datasources.csv.CSVFileFormat")
       .save(s"${args.outputDir}/factor_table/upstream_amount")
-
   }
 }
diff --git a/src/main/scala/ldbc/finbench/datagen/generation/GenerationStage.scala b/src/main/scala/ldbc/finbench/datagen/generation/GenerationStage.scala
@@ -1,5 +1,6 @@
 package ldbc.finbench.datagen.generation
 
+import ldbc.finbench.datagen.LdbcDatagen.log
 import ldbc.finbench.datagen.config.{ConfigParser, DatagenConfiguration}
 import ldbc.finbench.datagen.io.raw.{Csv, Parquet, RawSink}
 import ldbc.finbench.datagen.util._
@@ -25,6 +26,7 @@ object GenerationStage extends DatagenStage with Logging {
   override type ArgsType = Args
 
   override def run(args: Args): Unit = {
+    log.info("[Main] Starting generation stage")
     // build and initialize the configs
     val config = buildConfig(args)
     // OPT: It is called in each SparkGenerator in Spark to initialize the context on the executors.

diff --git a/tools/DataProfiler/result/db139/profile.log b/tools/DataProfiler/result/db139/profile.log
diff --git a/tools/DataProfiler/result/db177/edges.log b/tools/DataProfiler/result/db177/edges.log
diff --git a/tools/DataProfiler/result/db177/profile.log b/tools/DataProfiler/result/db177/profile.log
diff --git a/tools/DataProfiler/result/db184/profile.log b/tools/DataProfiler/result/db184/profile.log
diff --git a/tools/DataProfiler/result/transfer/edges.log b/tools/DataProfiler/result/transfer/edges.log
diff --git a/tools/DataProfiler/result/transfer/profile.log b/tools/DataProfiler/result/transfer/profile.log
diff --git a/tools/README.md b/tools/README.md
@@ -1,6 +1,62 @@
 # Tools
 
-Here lists some tools for graph data processing.
-- dataprofiler: a tool for profiling graph data, including degree distribution, etc.
-- graphgen: a simple tool/example code to generate power-law distributed graph data.
-- paramgen: a Parameter Search tool to generate parameters for queries using TuGraph.
+- paramgen:
+  - parameter_curation: a tool for generating parameters for finbench queries
+- check_*.py: python scripts used for check the data features like consistency, distribution
+- merge_cluster_output.py: a python script to merge the output in cluster mode
+- statistic.py: a python script to calculate the statistics of the data
+- legacy: some legacy tools
+  - dataprofiler: a tool for profiling graph data, including degree distribution, etc.
+  - graphgen: a simple tool/example code to generate power-law distributed graph data.
+  - factorgen: factor table generators in python version
+
+
+## ParamsGen
+
+`params_gen.py` uses the CREATE_VALIDATION feature to generate parameters.
+
+The specific steps are as follows:
+
+1. Select vertices of type Account, Person, and Loan from the dataset, and generate a parameter file that meets the input specifications for ldbc_finbench_driver.
+2. Execute CREATE_VALIDATION to generate validation_params.csv.
+3. Select non-empty results from validation_params.csv.
+
+Example:
+
+```bash
+python3 params_gen.py 1 # gen tcr1 params
+```
+
+Other notes:
+
+1. The generated start_timestamp and end_timestamp in the current version are fixed values.
+2. For tcr4 and tcr10, this method is not efficient enough. Use the following Cypher query to search for parameters:
+
+```Cypher
+// tcr4
+MATCH
+    (n1:Account)-[:transfer]->
+    (n2:Account)-[:transfer]->
+    (n3:Account)-[:transfer]->(n4:Account)
+WHERE
+    n1.id = n4.id AND n1.id > n2.id AND n2.id > n3.id
+WITH
+	  n1.id as n1id,
+    n2.id as n2id,
+    n3.id as n3id,
+    n4.id as n4id
+LIMIT 1000
+RETURN DISTINCT toString(n1id)+"|"+toString(n2id)
+
+// tcr10
+MATCH
+    (c:Company)<-[:invest]-(p:Person)
+WITH
+	  c.id as cid,
+    count(p.id) as num,
+		collect(p.id) as person
+WHERE num >= 2
+RETURN
+    tostring(person[0])+"|"+tostring(person[1])
+LIMIT 1000
+```
diff --git a/tools/DataProfiler/.gitignore → tools/legacy/dataprofiler/.gitignore b/tools/DataProfiler/.gitignore → tools/legacy/dataprofiler/.gitignore
diff --git a/tools/DataProfiler/CMakeLists.txt → tools/legacy/dataprofiler/CMakeLists.txt b/tools/DataProfiler/CMakeLists.txt → tools/legacy/dataprofiler/CMakeLists.txt
diff --git a/tools/DataProfiler/README.md → tools/legacy/dataprofiler/README.md b/tools/DataProfiler/README.md → tools/legacy/dataprofiler/README.md
diff --git a/tools/DataProfiler/algo.h → tools/legacy/dataprofiler/algo.h b/tools/DataProfiler/algo.h → tools/legacy/dataprofiler/algo.h
diff --git a/tools/DataProfiler/compile.sh → tools/legacy/dataprofiler/compile.sh b/tools/DataProfiler/compile.sh → tools/legacy/dataprofiler/compile.sh
diff --git a/tools/DataProfiler/de_core.cpp → tools/legacy/dataprofiler/de_core.cpp b/tools/DataProfiler/de_core.cpp → tools/legacy/dataprofiler/de_core.cpp
diff --git a/tools/DataProfiler/plot.py → tools/legacy/dataprofiler/plot.py b/tools/DataProfiler/plot.py → tools/legacy/dataprofiler/plot.py
diff --git a/tools/DataProfiler/profiler.cpp → tools/legacy/dataprofiler/profiler.cpp b/tools/DataProfiler/profiler.cpp → tools/legacy/dataprofiler/profiler.cpp
diff --git a/tools/DataProfiler/result/db139/edges.txt → ...egacy/dataprofiler/result/db139/edges.txt b/tools/DataProfiler/result/db139/edges.txt → ...egacy/dataprofiler/result/db139/edges.txt
diff --git a/tools/DataProfiler/result/db139/in-out.txt → ...gacy/dataprofiler/result/db139/in-out.txt b/tools/DataProfiler/result/db139/in-out.txt → ...gacy/dataprofiler/result/db139/in-out.txt
diff --git a/...aProfiler/result/db139/in_degree_dist.png → ...aprofiler/result/db139/in_degree_dist.png b/...aProfiler/result/db139/in_degree_dist.png → ...aprofiler/result/db139/in_degree_dist.png
diff --git a/...aProfiler/result/db139/in_degree_dist.txt → ...aprofiler/result/db139/in_degree_dist.txt b/...aProfiler/result/db139/in_degree_dist.txt → ...aprofiler/result/db139/in_degree_dist.txt
diff --git a/...esult/db139/in_degree_dist_regression.png → ...esult/db139/in_degree_dist_regression.png b/...esult/db139/in_degree_dist_regression.png → ...esult/db139/in_degree_dist_regression.png
diff --git a/...esult/db139/in_degree_dist_regression.txt → ...esult/db139/in_degree_dist_regression.txt b/...esult/db139/in_degree_dist_regression.txt → ...esult/db139/in_degree_dist_regression.txt
diff --git a/tools/DataProfiler/result/db139/out-in.txt → ...gacy/dataprofiler/result/db139/out-in.txt b/tools/DataProfiler/result/db139/out-in.txt → ...gacy/dataprofiler/result/db139/out-in.txt
diff --git a/...Profiler/result/db139/out_degree_dist.png → ...profiler/result/db139/out_degree_dist.png b/...Profiler/result/db139/out_degree_dist.png → ...profiler/result/db139/out_degree_dist.png
diff --git a/...Profiler/result/db139/out_degree_dist.txt → ...profiler/result/db139/out_degree_dist.txt b/...Profiler/result/db139/out_degree_dist.txt → ...profiler/result/db139/out_degree_dist.txt
diff --git a/...sult/db139/out_degree_dist_regression.png → ...sult/db139/out_degree_dist_regression.png b/...sult/db139/out_degree_dist_regression.png → ...sult/db139/out_degree_dist_regression.png
diff --git a/...sult/db139/out_degree_dist_regression.txt → ...sult/db139/out_degree_dist_regression.txt b/...sult/db139/out_degree_dist_regression.txt → ...sult/db139/out_degree_dist_regression.txt
diff --git a/tools/DataProfiler/result/db177/in-out.txt → ...gacy/dataprofiler/result/db177/in-out.txt b/tools/DataProfiler/result/db177/in-out.txt → ...gacy/dataprofiler/result/db177/in-out.txt
diff --git a/...aProfiler/result/db177/in_degree_dist.png → ...aprofiler/result/db177/in_degree_dist.png b/...aProfiler/result/db177/in_degree_dist.png → ...aprofiler/result/db177/in_degree_dist.png
diff --git a/...aProfiler/result/db177/in_degree_dist.txt → ...aprofiler/result/db177/in_degree_dist.txt b/...aProfiler/result/db177/in_degree_dist.txt → ...aprofiler/result/db177/in_degree_dist.txt
diff --git a/...esult/db177/in_degree_dist_regression.png → ...esult/db177/in_degree_dist_regression.png b/...esult/db177/in_degree_dist_regression.png → ...esult/db177/in_degree_dist_regression.png
diff --git a/...esult/db177/in_degree_dist_regression.txt → ...esult/db177/in_degree_dist_regression.txt b/...esult/db177/in_degree_dist_regression.txt → ...esult/db177/in_degree_dist_regression.txt
diff --git a/tools/DataProfiler/result/db177/out-in.txt → ...gacy/dataprofiler/result/db177/out-in.txt b/tools/DataProfiler/result/db177/out-in.txt → ...gacy/dataprofiler/result/db177/out-in.txt
diff --git a/...Profiler/result/db177/out_degree_dist.png → ...profiler/result/db177/out_degree_dist.png b/...Profiler/result/db177/out_degree_dist.png → ...profiler/result/db177/out_degree_dist.png
diff --git a/...Profiler/result/db177/out_degree_dist.txt → ...profiler/result/db177/out_degree_dist.txt b/...Profiler/result/db177/out_degree_dist.txt → ...profiler/result/db177/out_degree_dist.txt
diff --git a/...sult/db177/out_degree_dist_regression.png → ...sult/db177/out_degree_dist_regression.png b/...sult/db177/out_degree_dist_regression.png → ...sult/db177/out_degree_dist_regression.png
diff --git a/...sult/db177/out_degree_dist_regression.txt → ...sult/db177/out_degree_dist_regression.txt b/...sult/db177/out_degree_dist_regression.txt → ...sult/db177/out_degree_dist_regression.txt
diff --git a/tools/DataProfiler/result/db184/edges.txt → ...egacy/dataprofiler/result/db184/edges.txt b/tools/DataProfiler/result/db184/edges.txt → ...egacy/dataprofiler/result/db184/edges.txt
diff --git a/tools/DataProfiler/result/db184/in-out.txt → ...gacy/dataprofiler/result/db184/in-out.txt b/tools/DataProfiler/result/db184/in-out.txt → ...gacy/dataprofiler/result/db184/in-out.txt
diff --git a/...aProfiler/result/db184/in_degree_dist.png → ...aprofiler/result/db184/in_degree_dist.png b/...aProfiler/result/db184/in_degree_dist.png → ...aprofiler/result/db184/in_degree_dist.png
diff --git a/...aProfiler/result/db184/in_degree_dist.txt → ...aprofiler/result/db184/in_degree_dist.txt b/...aProfiler/result/db184/in_degree_dist.txt → ...aprofiler/result/db184/in_degree_dist.txt
diff --git a/...esult/db184/in_degree_dist_regression.png → ...esult/db184/in_degree_dist_regression.png b/...esult/db184/in_degree_dist_regression.png → ...esult/db184/in_degree_dist_regression.png
diff --git a/...esult/db184/in_degree_dist_regression.txt → ...esult/db184/in_degree_dist_regression.txt b/...esult/db184/in_degree_dist_regression.txt → ...esult/db184/in_degree_dist_regression.txt
diff --git a/tools/DataProfiler/result/db184/out-in.txt → ...gacy/dataprofiler/result/db184/out-in.txt b/tools/DataProfiler/result/db184/out-in.txt → ...gacy/dataprofiler/result/db184/out-in.txt
diff --git a/...Profiler/result/db184/out_degree_dist.png → ...profiler/result/db184/out_degree_dist.png b/...Profiler/result/db184/out_degree_dist.png → ...profiler/result/db184/out_degree_dist.png
diff --git a/...Profiler/result/db184/out_degree_dist.txt → ...profiler/result/db184/out_degree_dist.txt b/...Profiler/result/db184/out_degree_dist.txt → ...profiler/result/db184/out_degree_dist.txt
diff --git a/...sult/db184/out_degree_dist_regression.png → ...sult/db184/out_degree_dist_regression.png b/...sult/db184/out_degree_dist_regression.png → ...sult/db184/out_degree_dist_regression.png
diff --git a/...sult/db184/out_degree_dist_regression.txt → ...sult/db184/out_degree_dist_regression.txt b/...sult/db184/out_degree_dist_regression.txt → ...sult/db184/out_degree_dist_regression.txt
diff --git a/...er/result/hubvertex_indeg/hub_indeg_1.png → ...er/result/hubvertex_indeg/hub_indeg_1.png b/...er/result/hubvertex_indeg/hub_indeg_1.png → ...er/result/hubvertex_indeg/hub_indeg_1.png
diff --git a/...er/result/hubvertex_indeg/hub_indeg_1.txt → ...er/result/hubvertex_indeg/hub_indeg_1.txt b/...er/result/hubvertex_indeg/hub_indeg_1.txt → ...er/result/hubvertex_indeg/hub_indeg_1.txt
diff --git a/...ubvertex_indeg/hub_indeg_1_regression.png → ...ubvertex_indeg/hub_indeg_1_regression.png b/...ubvertex_indeg/hub_indeg_1_regression.png → ...ubvertex_indeg/hub_indeg_1_regression.png
diff --git a/...ubvertex_indeg/hub_indeg_1_regression.txt → ...ubvertex_indeg/hub_indeg_1_regression.txt b/...ubvertex_indeg/hub_indeg_1_regression.txt → ...ubvertex_indeg/hub_indeg_1_regression.txt
diff --git a/...er/result/hubvertex_indeg/hub_indeg_2.png → ...er/result/hubvertex_indeg/hub_indeg_2.png b/...er/result/hubvertex_indeg/hub_indeg_2.png → ...er/result/hubvertex_indeg/hub_indeg_2.png
diff --git a/...er/result/hubvertex_indeg/hub_indeg_2.txt → ...er/result/hubvertex_indeg/hub_indeg_2.txt b/...er/result/hubvertex_indeg/hub_indeg_2.txt → ...er/result/hubvertex_indeg/hub_indeg_2.txt
diff --git a/...ubvertex_indeg/hub_indeg_2_regression.png → ...ubvertex_indeg/hub_indeg_2_regression.png b/...ubvertex_indeg/hub_indeg_2_regression.png → ...ubvertex_indeg/hub_indeg_2_regression.png
diff --git a/...ubvertex_indeg/hub_indeg_2_regression.txt → ...ubvertex_indeg/hub_indeg_2_regression.txt b/...ubvertex_indeg/hub_indeg_2_regression.txt → ...ubvertex_indeg/hub_indeg_2_regression.txt
diff --git a/...er/result/hubvertex_indeg/hub_indeg_3.png → ...er/result/hubvertex_indeg/hub_indeg_3.png b/...er/result/hubvertex_indeg/hub_indeg_3.png → ...er/result/hubvertex_indeg/hub_indeg_3.png
diff --git a/...er/result/hubvertex_indeg/hub_indeg_3.txt → ...er/result/hubvertex_indeg/hub_indeg_3.txt b/...er/result/hubvertex_indeg/hub_indeg_3.txt → ...er/result/hubvertex_indeg/hub_indeg_3.txt
diff --git a/...ubvertex_indeg/hub_indeg_3_regression.png → ...ubvertex_indeg/hub_indeg_3_regression.png b/...ubvertex_indeg/hub_indeg_3_regression.png → ...ubvertex_indeg/hub_indeg_3_regression.png
diff --git a/...ubvertex_indeg/hub_indeg_3_regression.txt → ...ubvertex_indeg/hub_indeg_3_regression.txt b/...ubvertex_indeg/hub_indeg_3_regression.txt → ...ubvertex_indeg/hub_indeg_3_regression.txt
diff --git a/...er/result/hubvertex_indeg/hub_indeg_4.png → ...er/result/hubvertex_indeg/hub_indeg_4.png b/...er/result/hubvertex_indeg/hub_indeg_4.png → ...er/result/hubvertex_indeg/hub_indeg_4.png
diff --git a/...er/result/hubvertex_indeg/hub_indeg_4.txt → ...er/result/hubvertex_indeg/hub_indeg_4.txt b/...er/result/hubvertex_indeg/hub_indeg_4.txt → ...er/result/hubvertex_indeg/hub_indeg_4.txt
diff --git a/...ubvertex_indeg/hub_indeg_4_regression.png → ...ubvertex_indeg/hub_indeg_4_regression.png b/...ubvertex_indeg/hub_indeg_4_regression.png → ...ubvertex_indeg/hub_indeg_4_regression.png
diff --git a/...ubvertex_indeg/hub_indeg_4_regression.txt → ...ubvertex_indeg/hub_indeg_4_regression.txt b/...ubvertex_indeg/hub_indeg_4_regression.txt → ...ubvertex_indeg/hub_indeg_4_regression.txt
diff --git a/...er/result/hubvertex_indeg/hub_indeg_5.png → ...er/result/hubvertex_indeg/hub_indeg_5.png b/...er/result/hubvertex_indeg/hub_indeg_5.png → ...er/result/hubvertex_indeg/hub_indeg_5.png
diff --git a/...er/result/hubvertex_indeg/hub_indeg_5.txt → ...er/result/hubvertex_indeg/hub_indeg_5.txt b/...er/result/hubvertex_indeg/hub_indeg_5.txt → ...er/result/hubvertex_indeg/hub_indeg_5.txt
diff --git a/...ubvertex_indeg/hub_indeg_5_regression.png → ...ubvertex_indeg/hub_indeg_5_regression.png b/...ubvertex_indeg/hub_indeg_5_regression.png → ...ubvertex_indeg/hub_indeg_5_regression.png
diff --git a/...ubvertex_indeg/hub_indeg_5_regression.txt → ...ubvertex_indeg/hub_indeg_5_regression.txt b/...ubvertex_indeg/hub_indeg_5_regression.txt → ...ubvertex_indeg/hub_indeg_5_regression.txt
diff --git a/.../result/hubvertex_outdeg/hub_outdeg_1.png → .../result/hubvertex_outdeg/hub_outdeg_1.png b/.../result/hubvertex_outdeg/hub_outdeg_1.png → .../result/hubvertex_outdeg/hub_outdeg_1.png
diff --git a/.../result/hubvertex_outdeg/hub_outdeg_1.txt → .../result/hubvertex_outdeg/hub_outdeg_1.txt b/.../result/hubvertex_outdeg/hub_outdeg_1.txt → .../result/hubvertex_outdeg/hub_outdeg_1.txt
diff --git a/...vertex_outdeg/hub_outdeg_1_regression.png → ...vertex_outdeg/hub_outdeg_1_regression.png b/...vertex_outdeg/hub_outdeg_1_regression.png → ...vertex_outdeg/hub_outdeg_1_regression.png
diff --git a/...vertex_outdeg/hub_outdeg_1_regression.txt → ...vertex_outdeg/hub_outdeg_1_regression.txt b/...vertex_outdeg/hub_outdeg_1_regression.txt → ...vertex_outdeg/hub_outdeg_1_regression.txt
diff --git a/.../result/hubvertex_outdeg/hub_outdeg_2.png → .../result/hubvertex_outdeg/hub_outdeg_2.png b/.../result/hubvertex_outdeg/hub_outdeg_2.png → .../result/hubvertex_outdeg/hub_outdeg_2.png
diff --git a/.../result/hubvertex_outdeg/hub_outdeg_2.txt → .../result/hubvertex_outdeg/hub_outdeg_2.txt b/.../result/hubvertex_outdeg/hub_outdeg_2.txt → .../result/hubvertex_outdeg/hub_outdeg_2.txt
diff --git a/...vertex_outdeg/hub_outdeg_2_regression.png → ...vertex_outdeg/hub_outdeg_2_regression.png b/...vertex_outdeg/hub_outdeg_2_regression.png → ...vertex_outdeg/hub_outdeg_2_regression.png
diff --git a/...vertex_outdeg/hub_outdeg_2_regression.txt → ...vertex_outdeg/hub_outdeg_2_regression.txt b/...vertex_outdeg/hub_outdeg_2_regression.txt → ...vertex_outdeg/hub_outdeg_2_regression.txt
diff --git a/.../result/hubvertex_outdeg/hub_outdeg_3.png → .../result/hubvertex_outdeg/hub_outdeg_3.png b/.../result/hubvertex_outdeg/hub_outdeg_3.png → .../result/hubvertex_outdeg/hub_outdeg_3.png
diff --git a/.../result/hubvertex_outdeg/hub_outdeg_3.txt → .../result/hubvertex_outdeg/hub_outdeg_3.txt b/.../result/hubvertex_outdeg/hub_outdeg_3.txt → .../result/hubvertex_outdeg/hub_outdeg_3.txt
diff --git a/...vertex_outdeg/hub_outdeg_3_regression.png → ...vertex_outdeg/hub_outdeg_3_regression.png b/...vertex_outdeg/hub_outdeg_3_regression.png → ...vertex_outdeg/hub_outdeg_3_regression.png
diff --git a/...vertex_outdeg/hub_outdeg_3_regression.txt → ...vertex_outdeg/hub_outdeg_3_regression.txt b/...vertex_outdeg/hub_outdeg_3_regression.txt → ...vertex_outdeg/hub_outdeg_3_regression.txt
diff --git a/.../result/hubvertex_outdeg/hub_outdeg_4.png → .../result/hubvertex_outdeg/hub_outdeg_4.png b/.../result/hubvertex_outdeg/hub_outdeg_4.png → .../result/hubvertex_outdeg/hub_outdeg_4.png
diff --git a/.../result/hubvertex_outdeg/hub_outdeg_4.txt → .../result/hubvertex_outdeg/hub_outdeg_4.txt b/.../result/hubvertex_outdeg/hub_outdeg_4.txt → .../result/hubvertex_outdeg/hub_outdeg_4.txt
diff --git a/...vertex_outdeg/hub_outdeg_4_regression.png → ...vertex_outdeg/hub_outdeg_4_regression.png b/...vertex_outdeg/hub_outdeg_4_regression.png → ...vertex_outdeg/hub_outdeg_4_regression.png
diff --git a/...vertex_outdeg/hub_outdeg_4_regression.txt → ...vertex_outdeg/hub_outdeg_4_regression.txt b/...vertex_outdeg/hub_outdeg_4_regression.txt → ...vertex_outdeg/hub_outdeg_4_regression.txt
diff --git a/.../result/hubvertex_outdeg/hub_outdeg_5.png → .../result/hubvertex_outdeg/hub_outdeg_5.png b/.../result/hubvertex_outdeg/hub_outdeg_5.png → .../result/hubvertex_outdeg/hub_outdeg_5.png
diff --git a/.../result/hubvertex_outdeg/hub_outdeg_5.txt → .../result/hubvertex_outdeg/hub_outdeg_5.txt b/.../result/hubvertex_outdeg/hub_outdeg_5.txt → .../result/hubvertex_outdeg/hub_outdeg_5.txt
diff --git a/...vertex_outdeg/hub_outdeg_5_regression.png → ...vertex_outdeg/hub_outdeg_5_regression.png b/...vertex_outdeg/hub_outdeg_5_regression.png → ...vertex_outdeg/hub_outdeg_5_regression.png
diff --git a/...vertex_outdeg/hub_outdeg_5_regression.txt → ...vertex_outdeg/hub_outdeg_5_regression.txt b/...vertex_outdeg/hub_outdeg_5_regression.txt → ...vertex_outdeg/hub_outdeg_5_regression.txt
diff --git a/...s/DataProfiler/result/transfer/in-out.txt → ...y/dataprofiler/result/transfer/in-out.txt b/...s/DataProfiler/result/transfer/in-out.txt → ...y/dataprofiler/result/transfer/in-out.txt
diff --git a/...ofiler/result/transfer/in_degree_dist.png → ...ofiler/result/transfer/in_degree_dist.png b/...ofiler/result/transfer/in_degree_dist.png → ...ofiler/result/transfer/in_degree_dist.png
diff --git a/...ofiler/result/transfer/in_degree_dist.txt → ...ofiler/result/transfer/in_degree_dist.txt b/...ofiler/result/transfer/in_degree_dist.txt → ...ofiler/result/transfer/in_degree_dist.txt
diff --git a/...lt/transfer/in_degree_dist_regression.png → ...lt/transfer/in_degree_dist_regression.png b/...lt/transfer/in_degree_dist_regression.png → ...lt/transfer/in_degree_dist_regression.png
diff --git a/...lt/transfer/in_degree_dist_regression.txt → ...lt/transfer/in_degree_dist_regression.txt b/...lt/transfer/in_degree_dist_regression.txt → ...lt/transfer/in_degree_dist_regression.txt
diff --git a/...s/DataProfiler/result/transfer/out-in.txt → ...y/dataprofiler/result/transfer/out-in.txt b/...s/DataProfiler/result/transfer/out-in.txt → ...y/dataprofiler/result/transfer/out-in.txt
diff --git a/...filer/result/transfer/out_degree_dist.png → ...filer/result/transfer/out_degree_dist.png b/...filer/result/transfer/out_degree_dist.png → ...filer/result/transfer/out_degree_dist.png
diff --git a/...filer/result/transfer/out_degree_dist.txt → ...filer/result/transfer/out_degree_dist.txt b/...filer/result/transfer/out_degree_dist.txt → ...filer/result/transfer/out_degree_dist.txt
diff --git a/...t/transfer/out_degree_dist_regression.png → ...t/transfer/out_degree_dist_regression.png b/...t/transfer/out_degree_dist_regression.png → ...t/transfer/out_degree_dist_regression.png
diff --git a/...t/transfer/out_degree_dist_regression.txt → ...t/transfer/out_degree_dist_regression.txt b/...t/transfer/out_degree_dist_regression.txt → ...t/transfer/out_degree_dist_regression.txt
diff --git a/tools/DataProfiler/wcc_core.cpp → tools/legacy/dataprofiler/wcc_core.cpp b/tools/DataProfiler/wcc_core.cpp → tools/legacy/dataprofiler/wcc_core.cpp
diff --git a/tools/paramgen/legacy/factor_table.sh → tools/legacy/factorgen/factor_table.sh b/tools/paramgen/legacy/factor_table.sh → tools/legacy/factorgen/factor_table.sh
diff --git a/tools/paramgen/legacy/generate_account.py → tools/legacy/factorgen/generate_account.py b/tools/paramgen/legacy/generate_account.py → tools/legacy/factorgen/generate_account.py
diff --git a/tools/paramgen/legacy/loan.py → tools/legacy/factorgen/loan.py b/tools/paramgen/legacy/loan.py → tools/legacy/factorgen/loan.py
diff --git a/tools/paramgen/legacy/params_gen.properties → tools/legacy/factorgen/params_gen.properties b/tools/paramgen/legacy/params_gen.properties → tools/legacy/factorgen/params_gen.properties
diff --git a/tools/paramgen/legacy/params_gen.py → tools/legacy/factorgen/params_gen.py b/tools/paramgen/legacy/params_gen.py → tools/legacy/factorgen/params_gen.py
diff --git a/tools/paramgen/legacy/split_amount.py → tools/legacy/factorgen/split_amount.py b/tools/paramgen/legacy/split_amount.py → tools/legacy/factorgen/split_amount.py
diff --git a/tools/paramgen/legacy/time_split.py → tools/legacy/factorgen/time_split.py b/tools/paramgen/legacy/time_split.py → tools/legacy/factorgen/time_split.py
diff --git a/tools/GraphGen/Makefile → tools/legacy/graphgen/Makefile b/tools/GraphGen/Makefile → tools/legacy/graphgen/Makefile
diff --git a/tools/GraphGen/README.md → tools/legacy/graphgen/README.md b/tools/GraphGen/README.md → tools/legacy/graphgen/README.md
diff --git a/tools/GraphGen/graph_gen.c → tools/legacy/graphgen/graph_gen.c b/tools/GraphGen/graph_gen.c → tools/legacy/graphgen/graph_gen.c
diff --git a/scripts/merge_cluster_output.py → tools/merge_cluster_output.py b/scripts/merge_cluster_output.py → tools/merge_cluster_output.py
diff --git a/tools/paramgen/README.md b/tools/paramgen/README.md