From e6b24a94db5ca83da6cc991b1acb63ce681a6f57 Mon Sep 17 00:00:00 2001 From: Thomas M Kehrenberg Date: Fri, 23 Jun 2023 00:24:11 +0200 Subject: [PATCH 1/4] Add configs for celeba --- external_confs/ds/celeba/gender_blond.yaml | 7 +++++++ external_confs/ds/celeba/gender_smiling.yaml | 2 -- .../split/celeba/random/no_nonblond_females.yaml | 8 ++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 external_confs/ds/celeba/gender_blond.yaml create mode 100644 external_confs/split/celeba/random/no_nonblond_females.yaml diff --git a/external_confs/ds/celeba/gender_blond.yaml b/external_confs/ds/celeba/gender_blond.yaml new file mode 100644 index 00000000..6cc7d305 --- /dev/null +++ b/external_confs/ds/celeba/gender_blond.yaml @@ -0,0 +1,7 @@ +--- +defaults: + - celeba + - _self_ +download: false +superclass: BLOND_HAIR +subclass: MALE \ No newline at end of file diff --git a/external_confs/ds/celeba/gender_smiling.yaml b/external_confs/ds/celeba/gender_smiling.yaml index 7f79cda4..a6e9e0f6 100644 --- a/external_confs/ds/celeba/gender_smiling.yaml +++ b/external_confs/ds/celeba/gender_smiling.yaml @@ -5,5 +5,3 @@ defaults: download: false superclass: SMILING subclass: MALE -transform: null -split: null diff --git a/external_confs/split/celeba/random/no_nonblond_females.yaml b/external_confs/split/celeba/random/no_nonblond_females.yaml new file mode 100644 index 00000000..0873b616 --- /dev/null +++ b/external_confs/split/celeba/random/no_nonblond_females.yaml @@ -0,0 +1,8 @@ +--- +defaults: + - random + - /split/celeba/random/base@_here_ + - _self_ + +artifact_name: split_celeba_no_nonblond_females_${oc.env:SLURM_NODELIST}_${.seed} +train_subsampling_props: {0: {0: 0}} # Drop all nonblond females \ No newline at end of file From e3cb97a01598a45ac11034fa98507e0f761c72c4 Mon Sep 17 00:00:00 2001 From: Thomas MK Date: Fri, 23 Jun 2023 11:35:42 +0000 Subject: [PATCH 2/4] Add a bunch of configs --- external_confs/alg/only_pred_y_loss.yaml | 15 +++++ external_confs/alg/supmatch_no_disc.yaml | 8 +-- external_confs/dm/nicopp.yaml | 2 +- .../celeba/rn50/pretrained_enc.yaml | 58 +++++++++++++++++++ .../nicopp/rn18/only_pred_y_loss.yaml | 11 +--- .../nicopp/rn50/only_pred_y_loss.yaml | 7 +-- .../nicopp/rn50/pretrained_enc.yaml | 4 +- external_confs/hydra/launcher/slurm/ada.yaml | 2 +- .../split/nicopp/change_is_hard_seeded.yaml | 7 +++ scripts/save_nicopp_splits.py | 38 ++++++++++++ 10 files changed, 130 insertions(+), 22 deletions(-) create mode 100644 external_confs/alg/only_pred_y_loss.yaml create mode 100644 external_confs/experiment/celeba/rn50/pretrained_enc.yaml create mode 100644 external_confs/split/nicopp/change_is_hard_seeded.yaml create mode 100644 scripts/save_nicopp_splits.py diff --git a/external_confs/alg/only_pred_y_loss.yaml b/external_confs/alg/only_pred_y_loss.yaml new file mode 100644 index 00000000..d80984b1 --- /dev/null +++ b/external_confs/alg/only_pred_y_loss.yaml @@ -0,0 +1,15 @@ +--- +disc_loss_w: 0 +enc_loss_w: 0.0 +num_disc_updates: 0 +pred_s_loss_w: 0 +pred_y_loss_w: 1.0 +prior_loss_w: null +twoway_disc_loss: false +warmup_steps: 0 +pred: + scheduler_cls: ranzen.torch.schedulers.CosineLRWithLinearWarmup + scheduler_kwargs: + total_iters: ${ alg.steps } + lr_min: 5.e-7 + warmup_iters: 0.05 diff --git a/external_confs/alg/supmatch_no_disc.yaml b/external_confs/alg/supmatch_no_disc.yaml index 64ed99c2..ce6813bc 100644 --- a/external_confs/alg/supmatch_no_disc.yaml +++ b/external_confs/alg/supmatch_no_disc.yaml @@ -1,8 +1,8 @@ --- +disc_loss_w: 0 num_disc_updates: 0 -twoway_disc_loss: false -prior_loss_w: 0 -pred_y_loss_w: 0 pred_s_loss_w: 0 +pred_y_loss_w: 0 +prior_loss_w: 0 +twoway_disc_loss: false warmup_steps: 0 -disc_loss_w: 0 diff --git a/external_confs/dm/nicopp.yaml b/external_confs/dm/nicopp.yaml index 8b18b5e2..72390bd1 100644 --- a/external_confs/dm/nicopp.yaml +++ b/external_confs/dm/nicopp.yaml @@ -1,4 +1,4 @@ stratified_sampler: approx_class num_workers: 4 batch_size_tr: 1 -batch_size_te: 10 +batch_size_te: 20 diff --git a/external_confs/experiment/celeba/rn50/pretrained_enc.yaml b/external_confs/experiment/celeba/rn50/pretrained_enc.yaml new file mode 100644 index 00000000..f22a72b4 --- /dev/null +++ b/external_confs/experiment/celeba/rn50/pretrained_enc.yaml @@ -0,0 +1,58 @@ +# @package _global_ + +defaults: + - override /ds: celeba + - override /split: celeba/random/no_nonblond_females + - override /labeller: gt + - override /ae_arch: resnet/rn50_256_pre + - _self_ + +ae: + lr: 1.e-5 + zs_dim: 6 + zs_transform: none + +alg: + use_amp: true + pred: + lr: ${ ae.lr } + log_freq: ${ alg.steps } + val_freq: 200 + num_disc_updates: 5 + # enc_loss_w: 0.0001 + enc_loss_w: 1 + disc_loss_w: 0.03 + # prior_loss_w: 0.01 + prior_loss_w: null + pred_y_loss_w: 1 + pred_s_loss_w: 0 + pred_y: + num_hidden: 1 # for decoding the pre-trained RN50 output + dropout_prob: 0.1 + s_pred_with_bias: false + s_as_zs: false + +disc: + lr: 1.e-4 + +# disc_arch: +# dropout_prob: 0.1 + +dm: + stratified_sampler: exact + num_workers: 4 + batch_size_tr: 10 + batch_size_te: 20 + +eval: + batch_size: 10 + balanced_sampling: true + hidden_dim: null + num_hidden: 1 + steps: 10000 + opt: + lr: 1.e-4 + scheduler_cls: torch.optim.lr_scheduler.CosineAnnealingLR + scheduler_kwargs: + T_max: ${ eval.steps } + eta_min: 5e-7 diff --git a/external_confs/experiment/nicopp/rn18/only_pred_y_loss.yaml b/external_confs/experiment/nicopp/rn18/only_pred_y_loss.yaml index 2e7d3f84..c7721ead 100644 --- a/external_confs/experiment/nicopp/rn18/only_pred_y_loss.yaml +++ b/external_confs/experiment/nicopp/rn18/only_pred_y_loss.yaml @@ -2,7 +2,7 @@ defaults: - /ae: cosine_annealing - - /alg: supmatch_no_disc + - /alg: only_pred_y_loss - /eval: nicopp - override /ae_arch: resnet - override /dm: nicopp @@ -19,17 +19,10 @@ ae_arch: pretrained_enc: true alg: - pred_y_loss_w: 1.0 - enc_loss_w: 0.0 steps: 30000 use_amp: true pred: - lr: 5.e-5 - scheduler_cls: ranzen.torch.schedulers.CosineLRWithLinearWarmup - scheduler_kwargs: - total_iters: ${ alg.steps } - lr_min: 5.e-7 - warmup_iters: 0.05 + lr: ${ ae lr } log_freq: 100000000000 # never val_freq: 1000 diff --git a/external_confs/experiment/nicopp/rn50/only_pred_y_loss.yaml b/external_confs/experiment/nicopp/rn50/only_pred_y_loss.yaml index c9fde646..c05672c0 100644 --- a/external_confs/experiment/nicopp/rn50/only_pred_y_loss.yaml +++ b/external_confs/experiment/nicopp/rn50/only_pred_y_loss.yaml @@ -2,8 +2,7 @@ defaults: - /ae: cosine_annealing - - /alg: supmatch_no_disc - - /alg/pred: cosine_annealing + - /alg: only_pred_y_loss - /eval: nicopp - override /ae_arch: resnet/rn50_256_pre - override /dm: nicopp @@ -13,12 +12,10 @@ defaults: - _self_ alg: - pred_y_loss_w: 1.0 - enc_loss_w: 0.0 steps: 30000 use_amp: true pred: - lr: 5.e-5 + lr: ${ ae.lr } log_freq: 100000000000 # never val_freq: 1000 diff --git a/external_confs/experiment/nicopp/rn50/pretrained_enc.yaml b/external_confs/experiment/nicopp/rn50/pretrained_enc.yaml index ce4d746b..48054896 100644 --- a/external_confs/experiment/nicopp/rn50/pretrained_enc.yaml +++ b/external_confs/experiment/nicopp/rn50/pretrained_enc.yaml @@ -10,7 +10,7 @@ defaults: - _self_ ae: - lr: 5.e-5 + lr: 1.e-5 zs_dim: 6 zs_transform: none @@ -18,7 +18,7 @@ alg: use_amp: true pred: lr: ${ ae.lr } - log_freq: 100000000000 # never + log_freq: ${ alg.steps } val_freq: 200 num_disc_updates: 5 # enc_loss_w: 0.0001 diff --git a/external_confs/hydra/launcher/slurm/ada.yaml b/external_confs/hydra/launcher/slurm/ada.yaml index 9d28041b..162edd01 100644 --- a/external_confs/hydra/launcher/slurm/ada.yaml +++ b/external_confs/hydra/launcher/slurm/ada.yaml @@ -5,7 +5,7 @@ defaults: - submitit_slurm partition: ada -cpus_per_task: 8 # on ada, we have 3 CPUs per GPU +cpus_per_task: 10 # on ada, we have 8 CPUs per GPU, but we use 10 timeout_min: 99999 # 99999 minutes = a few months additional_parameters: diff --git a/external_confs/split/nicopp/change_is_hard_seeded.yaml b/external_confs/split/nicopp/change_is_hard_seeded.yaml new file mode 100644 index 00000000..ab3e2600 --- /dev/null +++ b/external_confs/split/nicopp/change_is_hard_seeded.yaml @@ -0,0 +1,7 @@ +--- +defaults: + - artifact + - /split/nicopp/base@_here_ + - _self_ + +artifact_name: split_nicopp_change_is_hard_kyiv_${ seed } diff --git a/scripts/save_nicopp_splits.py b/scripts/save_nicopp_splits.py new file mode 100644 index 00000000..07059d65 --- /dev/null +++ b/scripts/save_nicopp_splits.py @@ -0,0 +1,38 @@ +import platform +from sys import argv + +from conduit.data.datasets.vision import NICOPP +import numpy as np +import torch +import wandb + +from src.data.common import find_data_dir +from src.data.splitter import save_split_inds_as_artifact + + +def main(seed: int) -> None: + assert seed >= 0 + run = wandb.init( + project="support-matching", entity="predictive-analytics-lab", dir="local_logging" + ) + NICOPP.data_split_seed = seed + ds = NICOPP(root=find_data_dir()) + split_ids = ds.metadata["split"] + train_inds = torch.as_tensor(np.nonzero(split_ids == NICOPP.Split.TRAIN.value)[0]) + test_inds = torch.as_tensor(np.nonzero(split_ids == NICOPP.Split.TEST.value)[0]) + dep_inds = torch.as_tensor(np.nonzero(split_ids == NICOPP.Split.VAL.value)[0]) + name_of_machine = platform.node() + save_split_inds_as_artifact( + run=run, + train_inds=train_inds, + test_inds=test_inds, + dep_inds=dep_inds, + ds=ds, + seed=seed, + artifact_name=f"split_nicopp_change_is_hard_{name_of_machine}_{seed}", + ) + run.finish() + + +if __name__ == "__main__": + main(int(argv[1])) From 68394b9ddd276150be5e47e73017ac5484df55c1 Mon Sep 17 00:00:00 2001 From: Thomas MK Date: Tue, 27 Jun 2023 22:28:11 +0000 Subject: [PATCH 3/4] New split --- external_confs/split/celeba/random/no_nonblond_females.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/external_confs/split/celeba/random/no_nonblond_females.yaml b/external_confs/split/celeba/random/no_nonblond_females.yaml index 0873b616..24e0523f 100644 --- a/external_confs/split/celeba/random/no_nonblond_females.yaml +++ b/external_confs/split/celeba/random/no_nonblond_females.yaml @@ -4,5 +4,7 @@ defaults: - /split/celeba/random/base@_here_ - _self_ +dep_prop: 0.1 +test_prop: 0.1 artifact_name: split_celeba_no_nonblond_females_${oc.env:SLURM_NODELIST}_${.seed} -train_subsampling_props: {0: {0: 0}} # Drop all nonblond females \ No newline at end of file +train_subsampling_props: {0: {0: 0}} # Drop all nonblond females From c725c4ba33c816d3f35531819abdbe4ea60ea08b Mon Sep 17 00:00:00 2001 From: Thomas MK Date: Tue, 27 Jun 2023 22:28:25 +0000 Subject: [PATCH 4/4] Prepare for label noise experiments --- src/algs/adv/supmatch.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/algs/adv/supmatch.py b/src/algs/adv/supmatch.py index 01acb426..e01c623a 100644 --- a/src/algs/adv/supmatch.py +++ b/src/algs/adv/supmatch.py @@ -33,7 +33,11 @@ def _get_data_iterators(self, dm: DataModule) -> tuple[IterTr, IterDep]: dl_tr = dm.train_dataloader(balance=True) # The batch size needs to be consistent for the aggregation layer in the setwise neural # discriminator - dl_dep = dm.deployment_dataloader(batch_size=dm.batch_size_tr) + dl_dep = dm.deployment_dataloader( + batch_size=dl_tr.batch_sampler.batch_size + if dm.deployment_ids is None + else dm.batch_size_tr + ) return iter(dl_tr), iter(dl_dep) @override