-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add run scripts * Remove wandb flag in run_scripts * Add missing python command * Remove SLURM * Fix array name * Fix wrong embeddings in clustering
- Loading branch information
Showing
16 changed files
with
858 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
#!/bin/bash | ||
small_nneighbors=500 | ||
large_nneighbors=1000 | ||
iebsize="512" | ||
hidden_dim="196" | ||
learning_rate="0.004542" | ||
|
||
myargs=( | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 15K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 15K --version V2 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 15K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 15K --version V2 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 15K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 15K --version V2 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 15K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 15K --version V2 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 100K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 100K --version V2 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 100K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 100K --version V2 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 100K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 100K --version V2 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 100K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 100K --version V2 deepblocker" | ||
) | ||
|
||
nnargs=() | ||
for base in "${myargs[@]}" | ||
do | ||
if [[ $base =~ .*15K.* ]] | ||
then | ||
nnargs+=("$base --n-neighbors $small_nneighbors") | ||
else | ||
nnargs+=("$base --n-neighbors $large_nneighbors --block-builder-kwargs faisshnsw") | ||
fi | ||
done | ||
|
||
sifembeddings="fasttext" | ||
embeddings="gtr-t5-base" | ||
multi_embeddings="LaBSE" | ||
st_other_args="--encoder autoencoder --inner-encoder sentencetransformertokenized --inner-encoder-batch-size $iebsize --batch-size 512 --hidden-dimension $hidden_dim --learning-rate $learning_rate --force True" | ||
args=() | ||
for base in "${nnargs[@]}" | ||
do | ||
if [[ $base =~ .*D_Y.* ]] || [[ $base =~ .*D_W.* ]] | ||
then | ||
args+=("$base $st_other_args --embeddings $embeddings") | ||
else | ||
args+=("$base $st_other_args --embeddings $multi_embeddings") | ||
fi | ||
done | ||
sif_other_args="--encoder autoencoder --inner-encoder sifembeddingtokenized --hidden-dimension $hidden_dim --learning-rate $learning_rate --force True" | ||
for base in "${nnargs[@]}" | ||
do | ||
args+=("$base $other_args --embeddings $sifembeddings") | ||
done | ||
|
||
curr_param=$(echo ${args[$1]}) | ||
echo $curr_param | ||
|
||
micromamba run -n klinker-conda -r y python experiment.py $curr_param |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
#!/bin/bash | ||
small_nneighbors=500 | ||
large_nneighbors=1000 | ||
iebsize="512" | ||
hidden_dim="384" | ||
learning_rate="0.0030405" | ||
max_perturbation="0.408395" | ||
pos_to_neg_ratio="1.55515" | ||
synth_tuples_per_tuple="5" | ||
|
||
myargs=( | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 15K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 15K --version V2 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 15K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 15K --version V2 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 15K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 15K --version V2 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 15K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 15K --version V2 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 100K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 100K --version V2 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 100K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 100K --version V2 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 100K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 100K --version V2 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 100K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 100K --version V2 deepblocker" | ||
) | ||
|
||
nnargs=() | ||
for base in "${myargs[@]}" | ||
do | ||
if [[ $base =~ .*15K.* ]] | ||
then | ||
nnargs+=("$base --n-neighbors $small_nneighbors") | ||
else | ||
nnargs+=("$base --n-neighbors $large_nneighbors --block-builder-kwargs faisshnsw") | ||
fi | ||
done | ||
|
||
embeddings="gtr-t5-base" | ||
multi_embeddings="LaBSE" | ||
st_other_args="--encoder crosstupletraining --inner-encoder sentencetransformertokenized --inner-encoder-batch-size $iebsize --batch-size 512 --hidden-dimension $hidden_dim --learning-rate $learning_rate --force True --max-perturbation=$max_perturbation --pos-to-neg-ratio=$pos_to_neg_ratio --synth-tuples-per-tuple=$synth_tuples_per_tuple --block-builder-kwargs faisshnsw" | ||
args=() | ||
for base in "${nnargs[@]}" | ||
do | ||
if [[ $base =~ .*D_Y.* ]] || [[ $base =~ .*D_W.* ]] | ||
then | ||
args+=("$base $st_other_args --embeddings $embeddings") | ||
else | ||
args+=("$base $st_other_args --embeddings $multi_embeddings") | ||
fi | ||
done | ||
|
||
sifembeddings="fasttext" | ||
sif_other_args="--encoder crosstupletraining --inner-encoder sifembeddingtokenized --batch-size 512 --hidden-dimension $hidden_dim --learning-rate $learning_rate --force True --max-perturbation=$max_perturbation --pos-to-neg-ratio=$pos_to_neg_ratio --synth-tuples-per-tuple=$synth_tuples_per_tuple" | ||
for base in "${nnargs[@]}" | ||
do | ||
args+=("$base $other_args --embeddings $sifembeddings") | ||
done | ||
|
||
curr_param=$(echo ${args[$1]}) | ||
echo $curr_param | ||
|
||
micromamba run -n klinker-conda -r y python experiment.py $curr_param |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
#!/bin/bash | ||
small_nneighbors=500 | ||
large_nneighbors=1000 | ||
iebsize="512" | ||
# hidden_dim="384" | ||
reduce_dim_to="192" | ||
reduce_sample_perc=0.3 | ||
hidden_dim="96" | ||
learning_rate="0.0030405" | ||
max_perturbation="0.408395" | ||
pos_to_neg_ratio="1.55515" | ||
synth_tuples_per_tuple="5" | ||
|
||
myargs=( | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 15K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 15K --version V2 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 15K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 15K --version V2 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 15K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 15K --version V2 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 15K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 15K --version V2 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 100K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 100K --version V2 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 100K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 100K --version V2 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 100K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 100K --version V2 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 100K --version V1 deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 100K --version V2 deepblocker" | ||
) | ||
|
||
nnargs=() | ||
for base in "${myargs[@]}" | ||
do | ||
if [[ $base =~ .*15K.* ]] | ||
then | ||
nnargs+=("$base --n-neighbors $small_nneighbors") | ||
else | ||
nnargs+=("$base --n-neighbors $large_nneighbors --block-builder-kwargs faisshnsw") | ||
fi | ||
done | ||
|
||
sifembeddings="fasttext" | ||
embeddings="gtr-t5-base" | ||
multi_embeddings="LaBSE" | ||
other_args="--encoder hybrid --batch-size 512 --hidden-dimension $hidden_dim --learning-rate $learning_rate --force True --max-perturbation=$max_perturbation --pos-to-neg-ratio=$pos_to_neg_ratio --synth-tuples-per-tuple=$synth_tuples_per_tuple" | ||
args=() | ||
for base in "${nnargs[@]}" | ||
do | ||
if [[ $base =~ .*D_Y.* ]] || [[ $base =~ .*D_W.* ]] | ||
then | ||
args+=("$base $other_args --embeddings $embeddings --inner-encoder sentencetransformertokenized --inner-encoder-batch-size $iebsize") | ||
else | ||
args+=("$base $other_args --embeddings $multi_embeddings --inner-encoder sentencetransformertokenized --inner-encoder-batch-size $iebsize") | ||
fi | ||
# args+=("$base $other_args --inner-encoder sifembeddingtokenized --embeddings $sifembeddings") | ||
done | ||
for base in "${nnargs[@]}" | ||
do | ||
args+=("$base $other_args --inner-encoder sifembeddingtokenized --embeddings $sifembeddings") | ||
done | ||
|
||
curr_param=$(echo ${args[$1]}) | ||
echo $curr_param | ||
|
||
micromamba run -n klinker-conda -r y python experiment.py $curr_param |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#!/bin/bash | ||
small_nneighbors=500 | ||
large_nneighbors=1000 | ||
myargs=( | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 15K --version V1 only-embeddings-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 15K --version V2 only-embeddings-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 15K --version V1 only-embeddings-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 15K --version V2 only-embeddings-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 15K --version V1 only-embeddings-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 15K --version V2 only-embeddings-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 15K --version V1 only-embeddings-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 15K --version V2 only-embeddings-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 100K --version V1 only-embeddings-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 100K --version V2 only-embeddings-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 100K --version V1 only-embeddings-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 100K --version V2 only-embeddings-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 100K --version V1 only-embeddings-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 100K --version V2 only-embeddings-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 100K --version V1 only-embeddings-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 100K --version V2 only-embeddings-blocker" | ||
) | ||
|
||
nnargs=() | ||
for base in "${myargs[@]}" | ||
do | ||
if [[ $base =~ .*15K.* ]] | ||
then | ||
nnargs+=("$base --n-neighbors $small_nneighbors") | ||
else | ||
nnargs+=("$base --n-neighbors $large_nneighbors --block-builder-kwargs faisshnsw") | ||
fi | ||
done | ||
|
||
sifembeddings="fasttext" | ||
embeddings="gtr-t5-base" | ||
multi_embeddings="LaBSE" | ||
args=() | ||
for base in "${nnargs[@]}" | ||
do | ||
if [[ $base =~ .*D_Y.* ]] || [[ $base =~ .*D_W.* ]] | ||
then | ||
args+=("$base --embeddings $embeddings --encoder sentencetransformertokenized --inner-encoder-batch-size $iebsize") | ||
else | ||
args+=("$base --embeddings $multi_embeddings --encoder sentencetransformertokenized --inner-encoder-batch-size $iebsize") | ||
fi | ||
done | ||
for base in "${nnargs[@]}" | ||
do | ||
args+=("$base --encoder sifembeddingtokenized --embeddings $sifembeddings") | ||
done | ||
|
||
curr_param=$(echo ${args[$1]}) | ||
echo $curr_param | ||
|
||
micromamba run -n klinker-conda -r y python experiment.py $curr_param |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#!/bin/bash | ||
args=( | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 15K --version V1 token-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 15K --version V2 token-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 15K --version V1 token-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 15K --version V2 token-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 15K --version V1 token-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 15K --version V2 token-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 15K --version V1 token-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 15K --version V2 token-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 100K --version V1 token-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 100K --version V2 token-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 100K --version V1 token-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 100K --version V2 token-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 100K --version V1 token-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 100K --version V2 token-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 100K --version V1 token-blocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 100K --version V2 token-blocker" | ||
) | ||
|
||
curr_param=$(echo ${args[$1]}) | ||
echo $curr_param | ||
|
||
micromamba run -n klinker-conda -r y python experiment.py $curr_param |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
#!/bin/bash | ||
small_nneighbors=250 | ||
large_nneighbors=500 | ||
iebsize="512" | ||
hidden_dim="196" | ||
learning_rate="0.004542" | ||
|
||
myargs=( | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 15K --version V1 relational-deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 15K --version V2 relational-deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 15K --version V1 relational-deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 15K --version V2 relational-deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 15K --version V1 relational-deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 15K --version V2 relational-deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 15K --version V1 relational-deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 15K --version V2 relational-deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 100K --version V1 relational-deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_W --size 100K --version V2 relational-deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 100K --version V1 relational-deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair D_Y --size 100K --version V2 relational-deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 100K --version V1 relational-deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_DE --size 100K --version V2 relational-deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 100K --version V1 relational-deepblocker" | ||
"--random-seed 42 open-ea-dataset --graph-pair EN_FR --size 100K --version V2 relational-deepblocker" | ||
) | ||
|
||
other_args="--encoder autoencoder --batch-size 512 --hidden-dimension $hidden_dim --learning-rate $learning_rate --force True" | ||
args=() | ||
st_args="--inner-encoder sentencetransformertokenized --inner-encoder-batch-size $iebsize" | ||
embeddings="gtr-t5-base" | ||
multi_embeddings="LaBSE" | ||
for base in "${myargs[@]}" | ||
do | ||
if [[ $base =~ .*D_Y.* ]] || [[ $base =~ .*D_W.* ]] | ||
then | ||
args+=("$base $other_args $st_other_args --embeddings $embeddings") | ||
else | ||
args+=("$base $other_args $st_other_args --embeddings $multi_embeddings") | ||
fi | ||
if [[ $base =~ .*15K.* ]] | ||
then | ||
nnargs+=("$base --n-neighbors $small_nneighbors --rel-n-neighbors $small_nneighbors") | ||
else | ||
nnargs+=("$base --n-neighbors $large_nneighbors --rel-n-neighbors $large_nneighbors --block-builder-kwargs faisshnsw --reduce-dim-to $reduce_dim_to --reduce-sample-perc $reduce_sample_perc") | ||
fi | ||
done | ||
sif_other_args="--inner-encoder sifembeddingtokenized" | ||
for base in "${nnargs[@]}" | ||
do | ||
if [[ $base =~ .*15K.* ]] | ||
then | ||
nnargs+=("$base $other_args $sif_other_args --embeddings fasttext --n-neighbors $small_nneighbors --rel-n-neighbors $small_nneighbors") | ||
else | ||
nnargs+=("$base $other_args $sif_other_args --embeddings 100wiki.en.bin --n-neighbors $small_nneighbors --rel-n-neighbors $small_nneighbors --block-builder-kwargs faisshnsw") | ||
fi | ||
done | ||
|
||
curr_param=$(echo ${args[$1]}) | ||
echo $curr_param | ||
|
||
micromamba run -n klinker-conda -r y python experiment.py $curr_param |
Oops, something went wrong.