Skip to content

Commit

Permalink
flatten the graph in transformation sqls (#104)
Browse files Browse the repository at this point in the history
  • Loading branch information
qishipengqsp authored Sep 26, 2024
1 parent a218645 commit 7599200
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 49 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ sf*/
sf*.tar
sf*.tar.gz

scripts/paramgen/__pycache__/
tools/paramgen/__pycache__/
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,10 @@ public String getUniformDistRandomText(Random random) {
}

public String getUniformDistRandomTextForComments(Random random) {
// Randomly select two resources and concatenate them to flatten the dataset
long index1 = random.nextInt(resources.size());
long index2 = random.nextInt(resources.size());
return resources.get(index1) + " " + resources.get(index2);
StringBuilder text = new StringBuilder();
for (int i = 0; i < 5; i++) {
text.append(resources.get((long) random.nextInt(resources.size()))).append(" ");
}
return text.toString();
}
}
24 changes: 12 additions & 12 deletions src/main/resources/scale_factors.xml
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,15 @@
<scale_factor name="1">
<property>
<name>generator.numPersons</name>
<value>20000</value>
<value>10000</value>
</property>
<property>
<name>generator.numCompanies</name>
<value>20000</value>
<value>10000</value>
</property>
<property>
<name>generator.numMediums</name>
<value>40000</value>
<value>20000</value>
</property>
<property>
<name>transfer.minNumDegree</name>
Expand Down Expand Up @@ -118,15 +118,15 @@
<scale_factor name="10">
<property>
<name>generator.numPersons</name>
<value>200000</value>
<value>100000</value>
</property>
<property>
<name>generator.numCompanies</name>
<value>200000</value>
<value>100000</value>
</property>
<property>
<name>generator.numMediums</name>
<value>350000</value>
<value>200000</value>
</property>
<property>
<name>transfer.minNumDegree</name>
Expand All @@ -141,15 +141,15 @@
<scale_factor name="30">
<property>
<name>generator.numPersons</name>
<value>600000</value>
<value>300000</value>
</property>
<property>
<name>generator.numCompanies</name>
<value>600000</value>
<value>300000</value>
</property>
<property>
<name>generator.numMediums</name>
<value>1200000</value>
<value>600000</value>
</property>
<property>
<name>transfer.minNumDegree</name>
Expand All @@ -164,15 +164,15 @@
<scale_factor name="100">
<property>
<name>generator.numPersons</name>
<value>2000000</value>
<value>1000000</value>
</property>
<property>
<name>generator.numCompanies</name>
<value>2000000</value>
<value>1000000</value>
</property>
<property>
<name>generator.numMediums</name>
<value>3500000</value>
<value>2000000</value>
</property>
<property>
<name>transfer.minNumDegree</name>
Expand Down
60 changes: 39 additions & 21 deletions transformation/snapshot.sql
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ SELECT Account.id AS accountId,
Account.accountLevel AS accountLevel,
FROM Account
WHERE Account.createTime <= :start_date_long
AND Account.deleteTime > :start_date_long
-- AND Account.deleteTime > :start_date_long
ORDER BY Account.createTime )
TO ':output_dir/snapshot/Account.:output_format';

Expand Down Expand Up @@ -91,7 +91,8 @@ COPY
Transfer.payType AS payType,
Transfer.goodsType AS goodsType
FROM Transfer
WHERE Transfer.createTime <= :start_date_long AND Transfer.deleteTime > :start_date_long
WHERE Transfer.createTime <= :start_date_long
-- AND Transfer.deleteTime > :start_date_long
)
UNION ALL
(SELECT
Expand All @@ -104,7 +105,8 @@ COPY
LoanTransfer.payType AS payType,
LoanTransfer.goodsType AS goodsType
FROM LoanTransfer
WHERE LoanTransfer.createTime <= :start_date_long AND LoanTransfer.deleteTime > :start_date_long
WHERE LoanTransfer.createTime <= :start_date_long
-- AND LoanTransfer.deleteTime > :start_date_long
)
ORDER BY createTime
)
Expand All @@ -115,11 +117,14 @@ COPY
(
SELECT Withdraw.fromId AS fromId,
Withdraw.toId AS toId,
Withdraw.fromType AS fromType,
Withdraw.toType AS toType,
Withdraw.amount AS amount,
epoch_ms(Withdraw.createTime) AS createTime
epoch_ms(Withdraw.createTime) AS createTime,
Withdraw.comment AS comment
FROM Withdraw
WHERE Withdraw.createTime <= :start_date_long
AND Withdraw.deleteTime > :start_date_long
-- AND Withdraw.deleteTime > :start_date_long
ORDER BY Withdraw.createTime )
TO ':output_dir/snapshot/AccountWithdrawAccount.:output_format';

Expand All @@ -129,10 +134,11 @@ COPY
SELECT Repay.accountId AS accountId,
Repay.loanId AS loanId,
Repay.amount AS amount,
epoch_ms(Repay.createTime) AS createTime
epoch_ms(Repay.createTime) AS createTime,
Repay.comment AS comment
FROM Repay
WHERE Repay.createTime <= :start_date_long
AND Repay.deleteTime > :start_date_long
-- AND Repay.deleteTime > :start_date_long
ORDER BY Repay.createTime )
TO ':output_dir/snapshot/AccountRepayLoan.:output_format';

Expand All @@ -142,10 +148,11 @@ COPY
SELECT Deposit.loanId AS loanId,
Deposit.accountId AS accountId,
Deposit.amount AS amount,
epoch_ms(Deposit.createTime) AS createTime
epoch_ms(Deposit.createTime) AS createTime,
Deposit.comment AS comment
FROM Deposit
WHERE Deposit.createTime <= :start_date_long
AND Deposit.deleteTime > :start_date_long
-- AND Deposit.deleteTime > :start_date_long
ORDER BY Deposit.createTime )
TO ':output_dir/snapshot/LoanDepositAccount.:output_format';

Expand All @@ -155,10 +162,11 @@ COPY
SELECT SignIn.mediumId AS mediumId,
SignIn.accountId AS accountId,
epoch_ms(SignIn.createTime) AS createTime,
SignIn.location AS location
SignIn.location AS location,
SignIn.comment AS comment
FROM SignIn
WHERE SignIn.createTime <= :start_date_long
AND SignIn.deleteTime > :start_date_long
-- AND SignIn.deleteTime > :start_date_long
ORDER BY SignIn.createTime )
TO ':output_dir/snapshot/MediumSignInAccount.:output_format';

Expand All @@ -168,7 +176,8 @@ COPY
SELECT PersonInvest.investorId AS investorId,
PersonInvest.companyId AS companyId,
PersonInvest.ratio AS ratio,
epoch_ms(PersonInvest.createTime) AS createTime
epoch_ms(PersonInvest.createTime) AS createTime,
PersonInvest.comment AS comment
FROM PersonInvest
WHERE PersonInvest.createTime <= :start_date_long
ORDER BY PersonInvest.createTime )
Expand All @@ -180,7 +189,8 @@ COPY
SELECT CompanyInvest.investorId AS investorId,
CompanyInvest.companyId AS companyId,
CompanyInvest.ratio AS ratio,
epoch_ms(CompanyInvest.createTime) AS createTime
epoch_ms(CompanyInvest.createTime) AS createTime,
CompanyInvest.comment AS comment
FROM CompanyInvest
WHERE CompanyInvest.createTime <= :start_date_long
ORDER BY CompanyInvest.createTime )
Expand All @@ -191,8 +201,10 @@ COPY
(
SELECT PersonApplyLoan.personId AS personId,
PersonApplyLoan.loanId AS loanId,
PersonApplyLoan.loanAmount AS loanAmount,
epoch_ms(PersonApplyLoan.createTime) AS createTime,
PersonApplyLoan.org AS org
PersonApplyLoan.org AS org,
PersonApplyLoan.comment AS comment
FROM PersonApplyLoan
WHERE PersonApplyLoan.createTime <= :start_date_long
ORDER BY PersonApplyLoan.createTime )
Expand All @@ -203,8 +215,10 @@ COPY
(
SELECT CompanyApplyLoan.companyId AS companyId,
CompanyApplyLoan.loanId AS loanId,
CompanyApplyLoan.loanAmount AS loanAmount,
epoch_ms(CompanyApplyLoan.createTime) AS createTime,
CompanyApplyLoan.org AS org
CompanyApplyLoan.org AS org,
CompanyApplyLoan.comment AS comment
FROM CompanyApplyLoan
WHERE CompanyApplyLoan.createTime <= :start_date_long
ORDER BY CompanyApplyLoan.createTime )
Expand All @@ -216,7 +230,8 @@ COPY
SELECT PersonGuarantee.fromId AS fromId,
PersonGuarantee.toId AS toId,
epoch_ms(PersonGuarantee.createTime) AS createTime,
PersonGuarantee.relation AS relation
PersonGuarantee.relation AS relation,
PersonGuarantee.comment AS comment
FROM PersonGuarantee
WHERE PersonGuarantee.createTime <= :start_date_long
ORDER BY PersonGuarantee.createTime )
Expand All @@ -228,7 +243,8 @@ COPY
SELECT CompanyGuarantee.fromId AS fromId,
CompanyGuarantee.toId AS toId,
epoch_ms(CompanyGuarantee.createTime) AS createTime,
CompanyGuarantee.relation AS relation
CompanyGuarantee.relation AS relation,
CompanyGuarantee.comment AS comment
FROM CompanyGuarantee
WHERE CompanyGuarantee.createTime <= :start_date_long
ORDER BY CompanyGuarantee.createTime )
Expand All @@ -239,10 +255,11 @@ COPY
(
SELECT PersonOwnAccount.personId AS personId,
PersonOwnAccount.accountId AS accountId,
epoch_ms(PersonOwnAccount.createTime) AS createTime
epoch_ms(PersonOwnAccount.createTime) AS createTime,
PersonOwnAccount.comment AS comment
FROM PersonOwnAccount
WHERE PersonOwnAccount.createTime <= :start_date_long
AND PersonOwnAccount.deleteTime > :start_date_long
-- AND PersonOwnAccount.deleteTime > :start_date_long
ORDER BY PersonOwnAccount.createTime )
TO ':output_dir/snapshot/PersonOwnAccount.:output_format';

Expand All @@ -251,9 +268,10 @@ COPY
(
SELECT CompanyOwnAccount.companyId AS companyId,
CompanyOwnAccount.accountId AS accountId,
epoch_ms(CompanyOwnAccount.createTime) AS createTime
epoch_ms(CompanyOwnAccount.createTime) AS createTime,
CompanyOwnAccount.comment AS comment
FROM CompanyOwnAccount
WHERE CompanyOwnAccount.createTime <= :start_date_long
AND CompanyOwnAccount.deleteTime > :start_date_long
-- AND CompanyOwnAccount.deleteTime > :start_date_long
ORDER BY CompanyOwnAccount.createTime )
TO ':output_dir/snapshot/CompanyOwnAccount.:output_format';
Loading

0 comments on commit 7599200

Please sign in to comment.