From 570c508fb2a9f3386ce256566d7c33e2bc430bf8 Mon Sep 17 00:00:00 2001 From: Shipeng Qi Date: Wed, 16 Oct 2024 20:12:09 +0800 Subject: [PATCH 1/4] move param curation scripts and update scale factor parameters (#113) --- .gitignore | 1 + .../parameter_curation.py | 2 +- {tools/paramgen => paramgen}/search_params.py | 0 {tools/paramgen => paramgen}/time_select.py | 0 scripts/run_paramgen.sh | 18 ++++++++ src/main/resources/scale_factors.xml | 45 +++++-------------- tools/statistic.py | 14 +++++- 7 files changed, 43 insertions(+), 37 deletions(-) rename {tools/paramgen => paramgen}/parameter_curation.py (99%) rename {tools/paramgen => paramgen}/search_params.py (100%) rename {tools/paramgen => paramgen}/time_select.py (100%) create mode 100644 scripts/run_paramgen.sh diff --git a/.gitignore b/.gitignore index 0640ec27..53852d66 100644 --- a/.gitignore +++ b/.gitignore @@ -25,4 +25,5 @@ sf*/ sf*.tar sf*.tar.gz +paramgen/__pycache__/ tools/paramgen/__pycache__/ \ No newline at end of file diff --git a/tools/paramgen/parameter_curation.py b/paramgen/parameter_curation.py similarity index 99% rename from tools/paramgen/parameter_curation.py rename to paramgen/parameter_curation.py index 6a4de363..fbf9c8b8 100644 --- a/tools/paramgen/parameter_curation.py +++ b/paramgen/parameter_curation.py @@ -24,7 +24,7 @@ THRESH_HOLD = 0 THRESH_HOLD_6 = 0 -TRUNCATION_LIMIT = 10000 +TRUNCATION_LIMIT = 500 BATCH_SIZE = 5000 TIME_TRUNCATE = True diff --git a/tools/paramgen/search_params.py b/paramgen/search_params.py similarity index 100% rename from tools/paramgen/search_params.py rename to paramgen/search_params.py diff --git a/tools/paramgen/time_select.py b/paramgen/time_select.py similarity index 100% rename from tools/paramgen/time_select.py rename to paramgen/time_select.py diff --git a/scripts/run_paramgen.sh b/scripts/run_paramgen.sh new file mode 100644 index 00000000..67138c61 --- /dev/null +++ b/scripts/run_paramgen.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +LDBC_FINBENCH_DATAGEN_JAR=target/ldbc_finbench_datagen-0.2.0-SNAPSHOT-jar-with-dependencies.jar +OUTPUT_DIR=out/sf3/ + +# Note: generate factor tables with --generate-factors + +echo "start factor table generation" + +time spark-submit --master local[*] \ + --class ldbc.finbench.datagen.LdbcDatagen \ + --driver-memory 480g \ + ${LDBC_FINBENCH_DATAGEN_JAR} \ + --output-dir ${OUTPUT_DIR} \ + --factor-format csv \ + --generate-factors + +echo "start parameter curation" \ No newline at end of file diff --git a/src/main/resources/scale_factors.xml b/src/main/resources/scale_factors.xml index 18d70cdb..192a223e 100644 --- a/src/main/resources/scale_factors.xml +++ b/src/main/resources/scale_factors.xml @@ -1,40 +1,17 @@ - - - generator.numPersons - 800 - - - generator.numCompanies - 400 - - - generator.numMediums - 1000 - - - transfer.minNumDegree - 1 - - - transfer.maxNumDegree - 1000 - - - generator.numPersons - 8000 + 1000 generator.numCompanies - 4000 + 1000 generator.numMediums - 10000 + 2000 transfer.minNumDegree @@ -49,15 +26,15 @@ generator.numPersons - 24000 + 3000 generator.numCompanies - 12000 + 3000 generator.numMediums - 30000 + 6000 transfer.minNumDegree @@ -92,18 +69,18 @@ - + diff --git a/tools/statistic.py b/tools/statistic.py index 39623a03..45e5a2c9 100644 --- a/tools/statistic.py +++ b/tools/statistic.py @@ -4,10 +4,18 @@ import collections -def print_counts(counts): +labels = ["person","personOwnAccount","personApplyLoan","personGuarantee","personInvest","blank","company","companyOwnAccount","companyApplyLoan","companyGuarantee","companyInvest","blank","account","transfer","withdraw","blank","loan","loantransfer","deposit","repay","blank","medium","signIn"] + +def print_original_counts(counts): for key, value in collections.OrderedDict(sorted(counts.items())).items(): print("{}:{}".format(key, value)) +def print_formatted_counts(counts): + for label in labels: + if label == "blank": + print("================================") + else: + print("{}:{}".format(label, counts[label])) def count_entites(path): counts = {} @@ -18,7 +26,9 @@ def count_entites(path): for file in glob.glob(os.path.join(subdir_path, "*.csv")): num_entites += sum(1 for _ in open(file)) - 1 counts[subdir] = num_entites - print_counts(counts) + print_original_counts(counts) + print("\n========== Formatted Output ============\n") + print_formatted_counts(counts) if __name__ == "__main__": From f6287be5b594d5fb198e41f25b57b3a0a1e50e83 Mon Sep 17 00:00:00 2001 From: qishipengqsp Date: Thu, 17 Oct 2024 11:01:43 +0800 Subject: [PATCH 2/4] rename output param csv filenames --- paramgen/parameter_curation.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/paramgen/parameter_curation.py b/paramgen/parameter_curation.py index fbf9c8b8..baa29fda 100644 --- a/paramgen/parameter_curation.py +++ b/paramgen/parameter_curation.py @@ -345,7 +345,7 @@ def process_iter_queries(query_id): else: amount_bucket_path = os.path.join(table_dir, 'trans_withdraw_bucket') time_bucket_path = os.path.join(table_dir, 'trans_withdraw_month') - output_path = os.path.join(out_dir, 'tcr8.txt') + output_path = os.path.join(out_dir, 'complex_8_param.csv') steps = 2 elif query_id == 1: @@ -356,7 +356,7 @@ def process_iter_queries(query_id): else: amount_bucket_path = os.path.join(table_dir, 'transfer_out_bucket') time_bucket_path = os.path.join(table_dir, 'transfer_out_month') - output_path = os.path.join(out_dir, 'tcr1.txt') + output_path = os.path.join(out_dir, 'complex_1_param.csv') steps = 2 elif query_id == 5: @@ -378,7 +378,7 @@ def process_iter_queries(query_id): else: amount_bucket_path = os.path.join(table_dir, 'transfer_in_bucket') time_bucket_path = os.path.join(table_dir, 'transfer_in_month') - output_path = os.path.join(out_dir, 'tcr2.txt') + output_path = os.path.join(out_dir, 'complex_2_param.csv') steps = 3 elif query_id == 3: @@ -394,7 +394,7 @@ def process_iter_queries(query_id): account_account_path = os.path.join(table_dir, 'person_guarantee_list') amount_bucket_path = os.path.join(table_dir, 'person_guarantee_count') time_bucket_path = os.path.join(table_dir, 'person_guarantee_month') - output_path = os.path.join(out_dir, 'tcr11.txt') + output_path = os.path.join(out_dir, 'complex_11_param.csv') steps = 3 first_account_df = process_csv(first_account_path) @@ -474,14 +474,14 @@ def process_iter_queries(query_id): elif query_id == 5: csvWriter_5 = CSVSerializer() - csvWriter_5.setOutputFile(output_path + 'tcr5.txt') + csvWriter_5.setOutputFile(output_path + 'complex_5_param.csv') csvWriter_5.registerHandler(hendleIdParam, final_first_items, "id") csvWriter_5.registerHandler(handleTimeDurationParam, time_list, "startTime|endTime") csvWriter_5.registerHandler(handleTruncateLimitParam, truncate_limit_list, "truncationLimit") csvWriter_5.registerHandler(handleTruncateOrderParam, truncate_order_list, "truncationOrder") csvWriter_12 = CSVSerializer() - csvWriter_12.setOutputFile(output_path + 'tcr12.txt') + csvWriter_12.setOutputFile(output_path + 'complex_12_param.csv') csvWriter_12.registerHandler(hendleIdParam, final_first_items, "id") csvWriter_12.registerHandler(handleTimeDurationParam, time_list, "startTime|endTime") csvWriter_12.registerHandler(handleTruncateLimitParam, truncate_limit_list, "truncationLimit") @@ -519,7 +519,7 @@ def process_iter_queries(query_id): final_second_items_4.append(final_first_items[k]) csvWriter_3 = CSVSerializer() - csvWriter_3.setOutputFile(output_path + 'tcr3.txt') + csvWriter_3.setOutputFile(output_path + 'complex_3_param.csv') csvWriter_3.registerHandler(hendleIdParam, final_first_items, "id1") csvWriter_3.registerHandler(hendleIdParam, final_second_items_3, "id2") csvWriter_3.registerHandler(handleTimeDurationParam, time_list, "startTime|endTime") @@ -527,7 +527,7 @@ def process_iter_queries(query_id): csvWriter_3.registerHandler(handleTruncateOrderParam, truncate_order_list, "truncationOrder") csvWriter_4 = CSVSerializer() - csvWriter_4.setOutputFile(output_path + 'tcr4.txt') + csvWriter_4.setOutputFile(output_path + 'complex_4_param.csv') csvWriter_4.registerHandler(hendleIdParam, final_first_items, "id1") csvWriter_4.registerHandler(hendleIdParam, final_second_items_4, "id2") csvWriter_4.registerHandler(handleTimeDurationParam, time_list, "startTime|endTime") @@ -559,7 +559,7 @@ def process_1_hop_query(query_id): elif query_id == 10: first_count_path = os.path.join(table_dir, 'person_invest_company') time_bucket_path = os.path.join(table_dir, 'invest_month') - output_path = os.path.join(out_dir, 'tcr10.txt') + output_path = os.path.join(out_dir, 'complex_10_param.csv') first_count_df = process_csv(first_count_path) time_bucket_df = process_csv(time_bucket_path) @@ -579,7 +579,7 @@ def process_1_hop_query(query_id): if query_id == 7: csvWriter_7 = CSVSerializer() - csvWriter_7.setOutputFile(output_path + 'tcr7.txt') + csvWriter_7.setOutputFile(output_path + 'complex_7_param.csv') csvWriter_7.registerHandler(hendleIdParam, final_first_items, "id") csvWriter_7.registerHandler(handleThresholdParam, thresh_list, "threshold") csvWriter_7.registerHandler(handleTimeDurationParam, time_list, "startTime|endTime") @@ -587,7 +587,7 @@ def process_1_hop_query(query_id): csvWriter_7.registerHandler(handleTruncateOrderParam, truncate_order_list, "truncationOrder") csvWriter_9 = CSVSerializer() - csvWriter_9.setOutputFile(output_path + 'tcr9.txt') + csvWriter_9.setOutputFile(output_path + 'complex_9_param.csv') csvWriter_9.registerHandler(hendleIdParam, final_first_items, "id") csvWriter_9.registerHandler(handleThresholdParam, thresh_list, "threshold") csvWriter_9.registerHandler(handleTimeDurationParam, time_list, "startTime|endTime") @@ -627,7 +627,7 @@ def process_withdraw_query(): else: withdraw_bucket_path = os.path.join(table_dir, 'withdraw_in_bucket') transfer_bucket_path = os.path.join(table_dir, 'transfer_in_bucket') - output_path = os.path.join(out_dir, 'tcr6.txt') + output_path = os.path.join(out_dir, 'complex_6_param.csv') first_account_df = process_csv(first_account_path) time_bucket_df = process_csv(time_bucket_path) From e2c9fedc66fea74e90108ac8c2a10ceb84b86737 Mon Sep 17 00:00:00 2001 From: hujiatao <50831728+hujiatao0@users.noreply.github.com> Date: Sun, 24 Nov 2024 13:44:07 +0800 Subject: [PATCH 3/4] fix random and split time in paramgen (#115) * fix random and split time * fix time convert --------- Co-authored-by: hujiatao Co-authored-by: hujiatao --- paramgen/parameter_curation.py | 23 ++++---- scripts/run_paramgen.sh | 2 +- .../factors/FactorGenerationStage.scala | 58 +++++++++++++++---- 3 files changed, 62 insertions(+), 21 deletions(-) diff --git a/paramgen/parameter_curation.py b/paramgen/parameter_curation.py index baa29fda..01782076 100644 --- a/paramgen/parameter_curation.py +++ b/paramgen/parameter_curation.py @@ -21,6 +21,7 @@ table_dir = sys.argv[1] out_dir = sys.argv[2] +random.seed(42) THRESH_HOLD = 0 THRESH_HOLD_6 = 0 @@ -676,19 +677,21 @@ def process_withdraw_query(): def main(): - queries = [3, 1, 8, 7, 10, 11, 2, 5, 6] + queries = [6, 2, 3, 5, 7, 11, 8, 10, 1] # queries = [3] multiprocessing.set_start_method('forkserver') - processes = [] - - for query_id in queries: - p = multiprocessing.Process(target=process_query, args=(query_id,)) - p.start() - processes.append(p) - - for p in processes: - p.join() + + batch_size = 5 + for i in range(0, len(queries), batch_size): + processes = [] + for query_id in queries[i:i + batch_size]: + p = multiprocessing.Process(target=process_query, args=(query_id,)) + p.start() + processes.append(p) + + for p in processes: + p.join() if __name__ == "__main__": diff --git a/scripts/run_paramgen.sh b/scripts/run_paramgen.sh index 67138c61..1f830204 100644 --- a/scripts/run_paramgen.sh +++ b/scripts/run_paramgen.sh @@ -1,7 +1,7 @@ #!/bin/bash LDBC_FINBENCH_DATAGEN_JAR=target/ldbc_finbench_datagen-0.2.0-SNAPSHOT-jar-with-dependencies.jar -OUTPUT_DIR=out/sf3/ +OUTPUT_DIR=out/ # Note: generate factor tables with --generate-factors diff --git a/src/main/scala/ldbc/finbench/datagen/factors/FactorGenerationStage.scala b/src/main/scala/ldbc/finbench/datagen/factors/FactorGenerationStage.scala index 993348d0..82dc85fb 100644 --- a/src/main/scala/ldbc/finbench/datagen/factors/FactorGenerationStage.scala +++ b/src/main/scala/ldbc/finbench/datagen/factors/FactorGenerationStage.scala @@ -77,43 +77,81 @@ object FactorGenerationStage extends DatagenStage { .format("org.apache.spark.sql.execution.datasources.csv.CSVFileFormat") .option("header", "true") .option("delimiter", "|") - .load(s"${args.outputDir}/raw/transfer/*.csv") - .select($"fromId", $"toId", $"amount".cast("double"), $"createTime") + .load(s"${args.outputDir}/snapshot/AccountTransferAccount.csv") + .select( + $"fromId", + $"toId", + $"amount".cast("double"), + (unix_timestamp( + coalesce( + to_timestamp($"createTime", "yyyy-MM-dd HH:mm:ss.SSS"), + to_timestamp($"createTime", "yyyy-MM-dd HH:mm:ss") + ) + ) * 1000).alias("createTime") + ) val withdrawRDD = spark.read .format("org.apache.spark.sql.execution.datasources.csv.CSVFileFormat") .option("header", "true") .option("delimiter", "|") - .load(s"${args.outputDir}/raw/withdraw/*.csv") - .select($"fromId", $"toId", $"amount".cast("double"), $"createTime") + .load(s"${args.outputDir}/snapshot/AccountWithdrawAccount.csv") + .select( + $"fromId", + $"toId", + $"amount".cast("double"), + (unix_timestamp( + coalesce( + to_timestamp($"createTime", "yyyy-MM-dd HH:mm:ss.SSS"), + to_timestamp($"createTime", "yyyy-MM-dd HH:mm:ss") + ) + ) * 1000).alias("createTime") + ) val depositRDD = spark.read .format("org.apache.spark.sql.execution.datasources.csv.CSVFileFormat") .option("header", "true") .option("delimiter", "|") - .load(s"${args.outputDir}/raw/deposit/*.csv") + .load(s"${args.outputDir}/snapshot/LoanDepositAccount.csv") .select($"accountId", $"loanId") val personInvestRDD = spark.read .format("org.apache.spark.sql.execution.datasources.csv.CSVFileFormat") .option("header", "true") .option("delimiter", "|") - .load(s"${args.outputDir}/raw/personInvest/*.csv") - .select($"investorId", $"companyId", $"createTime") + .load(s"${args.outputDir}/snapshot/PersonInvestCompany.csv") + .select( + $"investorId", + $"companyId", + (unix_timestamp( + coalesce( + to_timestamp($"createTime", "yyyy-MM-dd HH:mm:ss.SSS"), + to_timestamp($"createTime", "yyyy-MM-dd HH:mm:ss") + ) + ) * 1000).alias("createTime") + ) val OwnRDD = spark.read .format("org.apache.spark.sql.execution.datasources.csv.CSVFileFormat") .option("header", "true") .option("delimiter", "|") - .load(s"${args.outputDir}/raw/personOwnAccount/*.csv") + .load(s"${args.outputDir}/snapshot/PersonOwnAccount.csv") .select($"personId", $"accountId") val personGuaranteeRDD = spark.read .format("org.apache.spark.sql.execution.datasources.csv.CSVFileFormat") .option("header", "true") .option("delimiter", "|") - .load(s"${args.outputDir}/raw/personGuarantee/*.csv") - .select($"fromId", $"toId", $"createTime") + .load(s"${args.outputDir}/snapshot/PersonGuaranteePerson.csv") + .select( + $"fromId", + $"toId", + (unix_timestamp( + coalesce( + to_timestamp($"createTime", "yyyy-MM-dd HH:mm:ss.SSS"), + to_timestamp($"createTime", "yyyy-MM-dd HH:mm:ss") + ) + ) * 1000).alias("createTime") + ) def transformItems( df: DataFrame, From 9f10c0020a60ea6bc05e2bb13b200e5a130514ad Mon Sep 17 00:00:00 2001 From: Shipeng Qi Date: Sun, 24 Nov 2024 17:20:10 +0800 Subject: [PATCH 4/4] add zero-padding to fix millisecond string format in dataset (#116) --- transformation/snapshot.sql | 219 ++++++++++++++++++------------------ 1 file changed, 109 insertions(+), 110 deletions(-) diff --git a/transformation/snapshot.sql b/transformation/snapshot.sql index 5d4b10f3..8ebe6ea9 100644 --- a/transformation/snapshot.sql +++ b/transformation/snapshot.sql @@ -1,14 +1,13 @@ --- Person COPY ( -SELECT Person.id AS personId, - Person.name AS personName, - Person.isBlocked AS isBlocked, - epoch_ms(Person.createTime) AS createTime, - Person.gender AS gender, - epoch_ms(Person.birthday)::DATE AS birthday, - Person.country AS country, - Person.city AS city +SELECT Person.id AS personId, + Person.name AS personName, + Person.isBlocked AS isBlocked, + strftime(epoch_ms(Person.createTime), '%Y-%m-%d %H:%M:%S.%g') AS createTime, + Person.gender AS gender, + epoch_ms(Person.birthday)::DATE AS birthday, Person.country AS country, + Person.city AS city FROM Person WHERE Person.createTime <= :start_date_long ORDER BY Person.createTime ) @@ -17,15 +16,15 @@ TO ':output_dir/snapshot/Person.:output_format'; --- Company COPY ( -SELECT Company.id AS companyId, - Company.name AS companyName, - Company.isBlocked AS isBlocked, - epoch_ms(Company.createTime) AS createTime, - Company.country AS country, - Company.city AS city, - Company.business AS business, - Company.description AS description, - Company.url AS url +SELECT Company.id AS companyId, + Company.name AS companyName, + Company.isBlocked AS isBlocked, + strftime(epoch_ms(Company.createTime), '%Y-%m-%d %H:%M:%S.%g') AS createTime, + Company.country AS country, + Company.city AS city, + Company.business AS business, + Company.description AS description, + Company.url AS url FROM Company WHERE Company.createTime <= :start_date_long ORDER BY Company.createTime ) @@ -34,31 +33,31 @@ TO ':output_dir/snapshot/Company.:output_format'; -- Account. It has deletes. COPY ( -SELECT Account.id AS accountId, - epoch_ms(Account.createTime) AS createTime, - Account.isBlocked AS isBlocked, - Account.type AS accountType, - Account.nickname AS nickname, - Account.phonenum AS phonenum, - Account.email AS email, - Account.freqLoginType AS freqLoginType, - epoch_ms(Account.lastLoginTime) AS lastLoginTime, - Account.accountLevel AS accountLevel, +SELECT Account.id AS accountId, + strftime(epoch_ms(Account.createTime), '%Y-%m-%d %H:%M:%S.%g') AS createTime, + Account.isBlocked AS isBlocked, + Account.type AS accountType, + Account.nickname AS nickname, + Account.phonenum AS phonenum, + Account.email AS email, + Account.freqLoginType AS freqLoginType, + strftime(epoch_ms(Account.lastLoginTime), '%Y-%m-%d %H:%M:%S.%g') AS lastLoginTime, + Account.accountLevel AS accountLevel, FROM Account WHERE Account.createTime <= :start_date_long - -- AND Account.deleteTime > :start_date_long +-- AND Account.deleteTime > :start_date_long ORDER BY Account.createTime ) TO ':output_dir/snapshot/Account.:output_format'; -- Loan. COPY ( -SELECT Loan.id AS loanId, - Loan.loanAmount AS loanAmount, - Loan.balance AS balance, - epoch_ms(Loan.createTime) AS createTime, - Loan.usage AS loanUsage, - Loan.interestRate AS interestRate +SELECT Loan.id AS loanId, + Loan.loanAmount AS loanAmount, + Loan.balance AS balance, + strftime(epoch_ms(Loan.createTime), '%Y-%m-%d %H:%M:%S.%g') AS createTime, + Loan.usage AS loanUsage, + Loan.interestRate AS interestRate FROM Loan WHERE Loan.createTime <= :start_date_long ORDER BY Loan.createTime ) @@ -67,12 +66,12 @@ ORDER BY Loan.createTime ) -- Medium. COPY ( -SELECT Medium.id AS mediumId, - Medium.type AS mediumType, - Medium.isBlocked AS isBlocked, - epoch_ms(Medium.createTime) AS createTime, - epoch_ms(Medium.lastLogin) AS lastLoginTime, - Medium.riskLevel AS riskLevel +SELECT Medium.id AS mediumId, + Medium.type AS mediumType, + Medium.isBlocked AS isBlocked, + strftime(epoch_ms(Medium.createTime), '%Y-%m-%d %H:%M:%S.%g') AS createTime, + strftime(epoch_ms(Medium.lastLogin), '%Y-%m-%d %H:%M:%S.%g') AS lastLoginTime, + Medium.riskLevel AS riskLevel FROM Medium WHERE Medium.createTime <= :start_date_long ORDER BY Medium.createTime ) @@ -85,7 +84,7 @@ COPY Transfer.fromId AS fromId, Transfer.toId AS toId, Transfer.amount AS amount, - epoch_ms(Transfer.createTime) AS createTime, + strftime(epoch_ms(Transfer.createTime), '%Y-%m-%d %H:%M:%S.%g') AS createTime, Transfer.orderNum::VARCHAR AS orderNum, Transfer.comment AS comment, Transfer.payType AS payType, @@ -99,7 +98,7 @@ COPY LoanTransfer.fromId AS fromId, LoanTransfer.toId AS toId, LoanTransfer.amount AS amount, - epoch_ms(LoanTransfer.createTime) AS createTime, + strftime(epoch_ms(LoanTransfer.createTime), '%Y-%m-%d %H:%M:%S.%g') AS createTime, LoanTransfer.orderNum::VARCHAR AS orderNum, LoanTransfer.comment AS comment, LoanTransfer.payType AS payType, @@ -115,69 +114,69 @@ TO ':output_dir/snapshot/AccountTransferAccount.:output_format'; -- Withdraw. It has deletes. COPY ( -SELECT Withdraw.fromId AS fromId, - Withdraw.toId AS toId, - Withdraw.fromType AS fromType, - Withdraw.toType AS toType, - Withdraw.amount AS amount, - epoch_ms(Withdraw.createTime) AS createTime, - Withdraw.comment AS comment +SELECT Withdraw.fromId AS fromId, + Withdraw.toId AS toId, + Withdraw.fromType AS fromType, + Withdraw.toType AS toType, + Withdraw.amount AS amount, + strftime(epoch_ms(Withdraw.createTime), '%Y-%m-%d %H:%M:%S.%g') AS createTime, + Withdraw.comment AS comment FROM Withdraw WHERE Withdraw.createTime <= :start_date_long - -- AND Withdraw.deleteTime > :start_date_long +-- AND Withdraw.deleteTime > :start_date_long ORDER BY Withdraw.createTime ) TO ':output_dir/snapshot/AccountWithdrawAccount.:output_format'; -- Repay. It has deletes. COPY ( -SELECT Repay.accountId AS accountId, - Repay.loanId AS loanId, - Repay.amount AS amount, - epoch_ms(Repay.createTime) AS createTime, - Repay.comment AS comment +SELECT Repay.accountId AS accountId, + Repay.loanId AS loanId, + Repay.amount AS amount, + strftime(epoch_ms(Repay.createTime), '%Y-%m-%d %H:%M:%S.%g') AS createTime, + Repay.comment AS comment FROM Repay WHERE Repay.createTime <= :start_date_long - -- AND Repay.deleteTime > :start_date_long +-- AND Repay.deleteTime > :start_date_long ORDER BY Repay.createTime ) TO ':output_dir/snapshot/AccountRepayLoan.:output_format'; -- Deposit. It has deletes. COPY ( -SELECT Deposit.loanId AS loanId, - Deposit.accountId AS accountId, - Deposit.amount AS amount, - epoch_ms(Deposit.createTime) AS createTime, - Deposit.comment AS comment +SELECT Deposit.loanId AS loanId, + Deposit.accountId AS accountId, + Deposit.amount AS amount, + strftime(epoch_ms(Deposit.createTime), '%Y-%m-%d %H:%M:%S.%g') AS createTime, + Deposit.comment AS comment FROM Deposit WHERE Deposit.createTime <= :start_date_long - -- AND Deposit.deleteTime > :start_date_long +-- AND Deposit.deleteTime > :start_date_long ORDER BY Deposit.createTime ) TO ':output_dir/snapshot/LoanDepositAccount.:output_format'; -- SignIn. It has deletes. COPY ( -SELECT SignIn.mediumId AS mediumId, - SignIn.accountId AS accountId, - epoch_ms(SignIn.createTime) AS createTime, - SignIn.location AS location, - SignIn.comment AS comment +SELECT SignIn.mediumId AS mediumId, + SignIn.accountId AS accountId, + strftime(epoch_ms(SignIn.createTime), '%Y-%m-%d %H:%M:%S.%g') AS createTime, + SignIn.location AS location, + SignIn.comment AS comment FROM SignIn WHERE SignIn.createTime <= :start_date_long - -- AND SignIn.deleteTime > :start_date_long +-- AND SignIn.deleteTime > :start_date_long ORDER BY SignIn.createTime ) TO ':output_dir/snapshot/MediumSignInAccount.:output_format'; -- PersonInvest. COPY ( -SELECT PersonInvest.investorId AS investorId, - PersonInvest.companyId AS companyId, - PersonInvest.ratio AS ratio, - epoch_ms(PersonInvest.createTime) AS createTime, - PersonInvest.comment AS comment +SELECT PersonInvest.investorId AS investorId, + PersonInvest.companyId AS companyId, + PersonInvest.ratio AS ratio, + strftime(epoch_ms(PersonInvest.createTime), '%Y-%m-%d %H:%M:%S.%g') AS createTime, + PersonInvest.comment AS comment FROM PersonInvest WHERE PersonInvest.createTime <= :start_date_long ORDER BY PersonInvest.createTime ) @@ -186,11 +185,11 @@ TO ':output_dir/snapshot/PersonInvestCompany.:output_format'; -- CompanyInvest. COPY ( -SELECT CompanyInvest.investorId AS investorId, - CompanyInvest.companyId AS companyId, - CompanyInvest.ratio AS ratio, - epoch_ms(CompanyInvest.createTime) AS createTime, - CompanyInvest.comment AS comment +SELECT CompanyInvest.investorId AS investorId, + CompanyInvest.companyId AS companyId, + CompanyInvest.ratio AS ratio, + strftime(epoch_ms(CompanyInvest.createTime), '%Y-%m-%d %H:%M:%S.%g') AS createTime, + CompanyInvest.comment AS comment FROM CompanyInvest WHERE CompanyInvest.createTime <= :start_date_long ORDER BY CompanyInvest.createTime ) @@ -199,12 +198,12 @@ TO ':output_dir/snapshot/CompanyInvestCompany.:output_format'; -- PersonApplyLoan. COPY ( -SELECT PersonApplyLoan.personId AS personId, - PersonApplyLoan.loanId AS loanId, - PersonApplyLoan.loanAmount AS loanAmount, - epoch_ms(PersonApplyLoan.createTime) AS createTime, - PersonApplyLoan.org AS org, - PersonApplyLoan.comment AS comment +SELECT PersonApplyLoan.personId AS personId, + PersonApplyLoan.loanId AS loanId, + PersonApplyLoan.loanAmount AS loanAmount, + strftime(epoch_ms(PersonApplyLoan.createTime), '%Y-%m-%d %H:%M:%S.%g') AS createTime, + PersonApplyLoan.org AS org, + PersonApplyLoan.comment AS comment FROM PersonApplyLoan WHERE PersonApplyLoan.createTime <= :start_date_long ORDER BY PersonApplyLoan.createTime ) @@ -213,12 +212,12 @@ TO ':output_dir/snapshot/PersonApplyLoan.:output_format'; -- CompanyApplyLoan. COPY ( -SELECT CompanyApplyLoan.companyId AS companyId, - CompanyApplyLoan.loanId AS loanId, - CompanyApplyLoan.loanAmount AS loanAmount, - epoch_ms(CompanyApplyLoan.createTime) AS createTime, - CompanyApplyLoan.org AS org, - CompanyApplyLoan.comment AS comment +SELECT CompanyApplyLoan.companyId AS companyId, + CompanyApplyLoan.loanId AS loanId, + CompanyApplyLoan.loanAmount AS loanAmount, + strftime(epoch_ms(CompanyApplyLoan.createTime), '%Y-%m-%d %H:%M:%S.%g') AS createTime, + CompanyApplyLoan.org AS org, + CompanyApplyLoan.comment AS comment FROM CompanyApplyLoan WHERE CompanyApplyLoan.createTime <= :start_date_long ORDER BY CompanyApplyLoan.createTime ) @@ -227,11 +226,11 @@ TO ':output_dir/snapshot/CompanyApplyLoan.:output_format'; -- PersonGuaranteePerson. COPY ( -SELECT PersonGuarantee.fromId AS fromId, - PersonGuarantee.toId AS toId, - epoch_ms(PersonGuarantee.createTime) AS createTime, - PersonGuarantee.relation AS relation, - PersonGuarantee.comment AS comment +SELECT PersonGuarantee.fromId AS fromId, + PersonGuarantee.toId AS toId, + strftime(epoch_ms(PersonGuarantee.createTime), '%Y-%m-%d %H:%M:%S.%g') AS createTime, + PersonGuarantee.relation AS relation, + PersonGuarantee.comment AS comment FROM PersonGuarantee WHERE PersonGuarantee.createTime <= :start_date_long ORDER BY PersonGuarantee.createTime ) @@ -240,11 +239,11 @@ TO ':output_dir/snapshot/PersonGuaranteePerson.:output_format'; -- CompanyGuaranteeCompany. COPY ( -SELECT CompanyGuarantee.fromId AS fromId, - CompanyGuarantee.toId AS toId, - epoch_ms(CompanyGuarantee.createTime) AS createTime, - CompanyGuarantee.relation AS relation, - CompanyGuarantee.comment AS comment +SELECT CompanyGuarantee.fromId AS fromId, + CompanyGuarantee.toId AS toId, + strftime(epoch_ms(CompanyGuarantee.createTime), '%Y-%m-%d %H:%M:%S.%g') AS createTime, + CompanyGuarantee.relation AS relation, + CompanyGuarantee.comment AS comment FROM CompanyGuarantee WHERE CompanyGuarantee.createTime <= :start_date_long ORDER BY CompanyGuarantee.createTime ) @@ -253,25 +252,25 @@ TO ':output_dir/snapshot/CompanyGuaranteeCompany.:output_format'; -- PersonOwnAccount. It has deletes. COPY ( -SELECT PersonOwnAccount.personId AS personId, - PersonOwnAccount.accountId AS accountId, - epoch_ms(PersonOwnAccount.createTime) AS createTime, - PersonOwnAccount.comment AS comment +SELECT PersonOwnAccount.personId AS personId, + PersonOwnAccount.accountId AS accountId, + strftime(epoch_ms(PersonOwnAccount.createTime), '%Y-%m-%d %H:%M:%S.%g') AS createTime, + PersonOwnAccount.comment AS comment FROM PersonOwnAccount WHERE PersonOwnAccount.createTime <= :start_date_long - -- AND PersonOwnAccount.deleteTime > :start_date_long +-- AND PersonOwnAccount.deleteTime > :start_date_long ORDER BY PersonOwnAccount.createTime ) TO ':output_dir/snapshot/PersonOwnAccount.:output_format'; -- CompanyOwnAccount. It has deletes. COPY ( -SELECT CompanyOwnAccount.companyId AS companyId, - CompanyOwnAccount.accountId AS accountId, - epoch_ms(CompanyOwnAccount.createTime) AS createTime, - CompanyOwnAccount.comment AS comment +SELECT CompanyOwnAccount.companyId AS companyId, + CompanyOwnAccount.accountId AS accountId, + strftime(epoch_ms(CompanyOwnAccount.createTime), '%Y-%m-%d %H:%M:%S.%g') AS createTime, + CompanyOwnAccount.comment AS comment FROM CompanyOwnAccount WHERE CompanyOwnAccount.createTime <= :start_date_long - -- AND CompanyOwnAccount.deleteTime > :start_date_long +-- AND CompanyOwnAccount.deleteTime > :start_date_long ORDER BY CompanyOwnAccount.createTime ) TO ':output_dir/snapshot/CompanyOwnAccount.:output_format';