diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..952e624 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +__pycache__/ + + +*.egg-info/ + +build/ +dist/ + +logs/ + +configs/ \ No newline at end of file diff --git a/examples/GDA/configs/FLAG.yml b/examples/GDA/configs/FLAG.yml new file mode 100644 index 0000000..ac50617 --- /dev/null +++ b/examples/GDA/configs/FLAG.yml @@ -0,0 +1,57 @@ +dataset: + classname: "Ogbn" + name: "arxiv" + root: "/home/ssq/test_data/" +model: + gnn_type: 'gcn' + hidden_dim: 256 + dropout: 0.5 + n_layers: 3 + step_size: 0.001 + augM: 3 + batch_norm: True +task: + lr: 0.01 + seed: 12345 + epochs: 500 + patience: 80 + weight_decay: 0 +# dataset: +# classname: "Ogbn" +# name: "arxiv" +# root: "/home/ssq/test_data/" +# model: +# gnn_type: 'sage' +# hidden_dim: 256 +# dropout: 0.5 +# n_layers: 3 +# step_size: 0.001 +# augM: 3 +# batch_norm: True +# task: +# lr: 0.01 +# seed: 12345 +# epochs: 500 +# patience: 80 +# weight_decay: 0 +# dataset: +# classname: "Ogbn" +# name: "arxiv" +# root: "/home/ssq/test_data/" +# model: +# gnn_type: 'gat' +# hidden_dim: 256 +# n_heads: '8,8,1' +# dropout: 0.6 +# attn_dropout: 0.6 +# n_layers: 3 +# step_size: 0.001 +# augM: 3 +# amp: 2 +# batch_norm: True +# task: +# lr: 0.002 +# seed: 12345 +# epochs: 500 +# patience: 100 +# weight_decay: 0 \ No newline at end of file diff --git a/examples/GDA/configs/GAugM.yml b/examples/GDA/configs/GAugM.yml new file mode 100644 index 0000000..6c55082 --- /dev/null +++ b/examples/GDA/configs/GAugM.yml @@ -0,0 +1,59 @@ +dataset: + classname: "Planetoid" + name: "cora" + root: "/home/ssq/test_data/" +model: + model_name: 'GAugM' + gnn_type: 'gcn' + feat_norm: 'row' + hidden_dim: 256 + dropout: 0.5 + n_layers: 2 + choose_idx: 5 + rm_pct: 2 + add_pct: 57 +task: + lr: 0.01 + seed: 42 + epochs: 200 + weight_decay: 0.0005 +# dataset: +# classname: "Planetoid" +# name: "cora" +# root: "/home/ssq/test_data/" +# model: +# model_name: 'GAugM' +# gnn_type: 'gsage' +# feat_norm: 'row' +# normalize: True +# hidden_dim: 128 +# dropout: 0.5 +# n_layers: 2 +# choose_idx: 2 +# rm_pct: 1 +# add_pct: 80 +# task: +# lr: 0.01 +# seed: 42 +# epochs: 200 +# weight_decay: 0.0005 +# dataset: +# classname: "Planetoid" +# name: "cora" +# root: "/home/ssq/test_data/" +# model: +# model_name: 'GAugM' +# gnn_type: 'gat' +# feat_norm: 'row' +# activation: 'elu' +# hidden_dim: 128 +# dropout: 0.5 +# n_layers: 2 +# choose_idx: 2 +# rm_pct: 1 +# add_pct: 68 +# task: +# lr: 0.01 +# seed: 42 +# epochs: 200 +# weight_decay: 0.0005 \ No newline at end of file diff --git a/examples/GDA/configs/GAugO.yml b/examples/GDA/configs/GAugO.yml new file mode 100644 index 0000000..d1e52e3 --- /dev/null +++ b/examples/GDA/configs/GAugO.yml @@ -0,0 +1,80 @@ +# dataset: +# classname: "Planetoid" +# name: "cora" +# root: "/home/ssq/test_data/" +# model: +# model_name: 'GAugO' +# gnn_type: 'gcn' +# alpha: 1.0 +# temperature: 1.2 +# hidden_dim: 256 +# emb_size: 32 +# dropout: 0.5 +# n_layers: 2 +# gae: true +# feat_norm: 'row' +# sample_type: 'add_sample' +# task: +# lr: 0.01 +# seed: 42 +# warmup: 0 +# beta: 0.8 +# epochs: 200 +# weight_decay: 0.0005 +# pretrain_ep: 160 +# pretrain_nc: 30 +# max_patience: 50 +# dataset: +# classname: "Planetoid" +# name: "cora" +# root: "/home/ssq/test_data/" +# model: +# model_name: 'GAugO' +# gnn_type: 'gsage' +# alpha: 0.13 +# temperature: 1.0 +# hidden_dim: 256 +# emb_size: 32 +# dropout: 0.5 +# n_layers: 2 +# gae: true +# feat_norm: 'row' +# normalize: True +# sample_type: 'add_sample' +# task: +# lr: 0.01 +# seed: 42 +# warmup: 2 +# beta: 3.2 +# epochs: 200 +# weight_decay: 0.0005 +# pretrain_ep: 195 +# pretrain_nc: 35 +# max_patience: 50 +dataset: + classname: "Planetoid" + name: "cora" + root: "/home/ssq/test_data/" +model: + model_name: 'GAugO' + gnn_type: 'gat' + alpha: 0.02 + temperature: 1.7 + hidden_dim: 128 + emb_size: 32 + dropout: 0.6 + n_layers: 2 + activation: "elu" + gae: true + feat_norm: 'row' + sample_type: 'add_sample' +task: + lr: 0.01 + seed: 42 + warmup: 1 + beta: 3.2 + epochs: 200 + weight_decay: 0.0005 + pretrain_ep: 175 + pretrain_nc: 45 + max_patience: 50 diff --git a/examples/GDA/configs/GAugOMini.yml b/examples/GDA/configs/GAugOMini.yml new file mode 100644 index 0000000..a70ba88 --- /dev/null +++ b/examples/GDA/configs/GAugOMini.yml @@ -0,0 +1,30 @@ +dataset: + classname: "Planetoid" + name: "pubmed" + root: "/home/ssq/test_data/" +model: + model_name: 'GAugO' + gnn_type: 'gcn' + alpha: 0.79 + temperature: 1.6 + hidden_dim: 256 + emb_size: 64 + dropout: 0.5 + n_layers: 2 + gae: true + feat_norm: 'row' + sample_type: 'add_sample' + minibatch: True +task: + lr: 0.01 + ep_lr: 0.006 + seed: 42 + warmup: 7 + beta: 3.3 + epochs: 200 + weight_decay: 0.0005 + pretrain_ep: 10 + pretrain_nc: 40 + max_patience: 50 + train_batch_size: 8192 + pretrain_batch_size: 8192 \ No newline at end of file diff --git a/examples/GDA/configs/Mixup.yml b/examples/GDA/configs/Mixup.yml new file mode 100644 index 0000000..ec38742 --- /dev/null +++ b/examples/GDA/configs/Mixup.yml @@ -0,0 +1,20 @@ +dataset: + classname: "Planetoid" + name: "pubmed" + root: "/home/ssq/test_data/" + split: "full" +model: + gnn_type: 'sage' + hidden_dim: 256 + dropout: 0.5 + alpha: 4 + beta: 4 + n_layers: 3 + feat_norm: "none" + batch_norm: True +task: + lr: 0.01 + seed: 12345 + epochs: 300 + patience: 30 + weight_decay: 0 \ No newline at end of file diff --git a/examples/GDA/configs/SampleFLAG.yml b/examples/GDA/configs/SampleFLAG.yml new file mode 100644 index 0000000..fe7db14 --- /dev/null +++ b/examples/GDA/configs/SampleFLAG.yml @@ -0,0 +1,78 @@ +# dataset: +# classname: "Ogbn" +# name: "products" +# root: "/home/ssq/test_data/" +# sampler: +# training: +# name: "NeighborSampler" +# layer_sizes: "15,10,5" +# prob_type: "normalize" +# replace: False +# eval: +# name: "NeighborSampler" +# layer_sizes: "-1" +# replace: False +# model: +# gnn_type: 'sage' +# hidden_dim: 256 +# dropout: 0.5 +# n_layers: 3 +# step_size: 0.008 +# augM: 3 +# amp: 2 +# batch_norm: False +# task: +# name: "NodeClassification_Sampling" +# lr: 0.003 +# seed: 12345 +# epochs: 20 +# patience: 10 +# weight_decay: 0 +# train_batch_size: 1024 +# eval_batch_size: 4096 +# train_num_workers: 12 +# eval_num_workers: 12 +# eval_together: True +# eval_freq: 2 +# eval_start: 10 +# loss_fn: "nll_loss" +dataset: + classname: "Ogbn" + name: "arxiv" + root: "/home/ssq/test_data/" +sampler: + training: + name: "NeighborSampler" + layer_sizes: "15,10,5" + prob_type: "normalize" + replace: False + eval: + name: "NeighborSampler" + layer_sizes: "-1" + replace: False +model: + gnn_type: 'gat' + hidden_dim: 64 + n_heads: "8,8,1" + dropout: 0.5 + n_layers: 3 + step_size: 0.008 + augM: 3 + amp: 2 + batch_norm: False +task: + name: "NodeClassification_Sampling" + lr: 0.002 + seed: 12345 + epochs: 500 + patience: 50 + weight_decay: 0 + train_batch_size: 1024 + eval_batch_size: 4096 + train_num_workers: 12 + eval_num_workers: 12 + eval_together: True + pin_memory: True + eval_freq: 2 + eval_start: 1 + loss_fn: "nll_loss" \ No newline at end of file diff --git a/examples/GDA/configs/SampleMixup.yml b/examples/GDA/configs/SampleMixup.yml new file mode 100644 index 0000000..07ec815 --- /dev/null +++ b/examples/GDA/configs/SampleMixup.yml @@ -0,0 +1,36 @@ +dataset: + classname: "Ogbn" + name: "arxiv" + root: "/home/ssq/test_data/" +sampler: + training: + name: "NeighborSampler" + layer_sizes: "15, 10, 5" + prob_type: "normalize" + replace: False + eval: + name: "NeighborSampler" + layer_sizes: "-1" + replace: False +model: + gnn_type: 'sage' + hidden_dim: 256 + dropout: 0.5 + alpha: 4 + beta: 4 + n_layers: 3 + feat_norm: "none" + batch_norm: True +task: + name: "NodeClassification_Sampling" + lr: 0.003 + seed: 12345 + epochs: 300 + patience: 30 + weight_decay: 0 + train_batch_size: 1024 + eval_batch_size: 4096 + train_num_workers: 12 + eval_num_workers: 12 + eval_together: True + eval_freq: 2 \ No newline at end of file diff --git a/examples/GDA/test_FLAG.py b/examples/GDA/test_FLAG.py new file mode 100644 index 0000000..f317f84 --- /dev/null +++ b/examples/GDA/test_FLAG.py @@ -0,0 +1,30 @@ +import yaml +import argparse + +import sgl.dataset as Dataset +from sgl.models.homo.gda import FLAG +from sgl.tasks import NodeClassification + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description = "FLAG-Model.") + parser.add_argument( + "--device", type=int, default=0, help="gpu device id or cpu (-1)" + ) + parser.add_argument( + "--config_path", type=str, default="./configs/FLAG.yml", help="save path of the configuration file" + ) + args = parser.parse_args() + config = yaml.safe_load(open(args.config_path, "rb")) + device = f"cuda:{args.device}" if args.device >= 0 else "cpu" + dataset_kwargs = config["dataset"] + model_kwargs = config["model"] + task_kwargs = config["task"] + + dataset_classname = dataset_kwargs.pop("classname") + dataset = getattr(Dataset, dataset_classname)(**dataset_kwargs) + for seed in range(10): + model = FLAG(in_dim=dataset.num_features, n_classes=dataset.num_classes, **model_kwargs) + task_kwargs.update({"device": device}) + task_kwargs.update({"seed": seed}) + test_acc = NodeClassification(dataset, model, **task_kwargs).test_acc + print(f"test acc: {test_acc:.4f}") \ No newline at end of file diff --git a/examples/GDA/test_GAug.py b/examples/GDA/test_GAug.py new file mode 100644 index 0000000..3aa60c0 --- /dev/null +++ b/examples/GDA/test_GAug.py @@ -0,0 +1,33 @@ +import yaml +import argparse + +import sgl.dataset as Dataset +from sgl.models.homo.gda import GAugO, GAugM +from sgl.tasks import NodeClassificationGAugO, NodeClassificationGAugM + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="GAug-Model.") + parser.add_argument( + "--device", type=int, default=0, help="gpu device id or cpu (-1)" + ) + parser.add_argument( + "--config_path", type=str, default="./configs/GAugO.yml", help="save path of the configuration file" + ) + args = parser.parse_args() + config = yaml.safe_load(open(args.config_path, "rb")) + device = f"cuda:{args.device}" if args.device >= 0 else "cpu" + dataset_kwargs = config["dataset"] + model_kwargs = config["model"] + task_kwargs = config["task"] + + dataset_classname = dataset_kwargs.pop("classname") + dataset = getattr(Dataset, dataset_classname)(**dataset_kwargs) + Model = {"GAugO": GAugO, "GAugM": GAugM} + Task = {"GAugO": NodeClassificationGAugO, "GAugM": NodeClassificationGAugM} + model_name = model_kwargs.pop("model_name") + for seed in range(10): + model = Model.get(model_name)(in_dim=dataset.num_features, n_classes=dataset.num_classes, **model_kwargs) + task_kwargs.update({"device": device}) + task_kwargs.update({"seed": seed}) + test_acc = Task.get(model_name)(dataset, model, **task_kwargs).test_acc + print(f"test acc: {test_acc:.4f}") \ No newline at end of file diff --git a/examples/GDA/test_Mixup.py b/examples/GDA/test_Mixup.py new file mode 100644 index 0000000..6e70d46 --- /dev/null +++ b/examples/GDA/test_Mixup.py @@ -0,0 +1,31 @@ +import yaml +import argparse + +import sgl.dataset as Dataset +from sgl.models.homo.gda import Mixup +from sgl.tasks import NodeClassification + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description = "Mixup-Model.") + parser.add_argument( + "--device", type=int, default=0, help="gpu device id or cpu (-1)" + ) + parser.add_argument( + "--config_path", type=str, default="./configs/Mixup.yml", help="save path of the configuration file" + ) + args = parser.parse_args() + config = yaml.safe_load(open(args.config_path, "rb")) + device = f"cuda:{args.device}" if args.device >= 0 else "cpu" + dataset_kwargs = config["dataset"] + model_kwargs = config["model"] + task_kwargs = config["task"] + + dataset_classname = dataset_kwargs.pop("classname") + dataset = getattr(Dataset, dataset_classname)(**dataset_kwargs) + for seed in range(10): + model = Mixup(in_dim=dataset.num_features, n_classes=dataset.num_classes, **model_kwargs) + task_kwargs.update({"loss_fn": model.loss_fn}) + task_kwargs.update({"device": device}) + task_kwargs.update({"seed": seed}) + test_acc = NodeClassification(dataset, model, **task_kwargs).test_acc + print(f"test acc: {test_acc:.4f}") \ No newline at end of file diff --git a/examples/GDA/test_SampleFLAG.py b/examples/GDA/test_SampleFLAG.py new file mode 100644 index 0000000..7803ee3 --- /dev/null +++ b/examples/GDA/test_SampleFLAG.py @@ -0,0 +1,46 @@ +import yaml +import argparse + +import sgl.dataset as Dataset +import sgl.sampler as Sampler +import sgl.tasks as Tasks +from sgl.models.homo.gda import SampleFLAG + + +if __name__ == "__main__": + parser = argparse.ArgumentParser("Sampler-Models") + parser.add_argument( + "--device", type=int, default=0, help="gpu device id or cpu (-1)" + ) + parser.add_argument( + "--config_path", type=str, default="./configs/SampleFLAG.yml", help="save path of the configuration file" + ) + args = parser.parse_args() + config = yaml.safe_load(open(args.config_path, "rb")) + device = f"cuda:{args.device}" if args.device >= 0 else "cpu" + dataset_kwargs = config["dataset"] + task_kwargs = config["task"] + classname = dataset_kwargs.pop("classname") + dataset = getattr(Dataset, classname)(**dataset_kwargs) + training_sampler_kwargs = config["sampler"]["training"] + if "inductive" in training_sampler_kwargs.keys(): + inductive = training_sampler_kwargs.pop("inductive") + else: + inductive = False + task_kwargs.update({"inductive": inductive}) + training_sampler_name = training_sampler_kwargs.pop("name") + training_sampler_kwargs.update({"save_dir": dataset.processed_dir}) + training_sampler = getattr(Sampler, training_sampler_name)(dataset.adj[dataset.train_idx, :][:, dataset.train_idx] if inductive else dataset.adj, **training_sampler_kwargs) + if "eval" in config["sampler"].keys(): + eval_sampler_kwargs = config["sampler"]["eval"] + eval_sampler_name = eval_sampler_kwargs.pop("name") + eval_sampler_kwargs.update({"save_dir": dataset.processed_dir}) + eval_sampler = getattr(Sampler, eval_sampler_name)(dataset.adj, **eval_sampler_kwargs) + else: + eval_sampler = None + model_kwargs = config["model"] + model = SampleFLAG(training_sampler, eval_sampler, in_dim=dataset.num_features, n_classes=dataset.num_classes, **model_kwargs) + task_kwargs.update({"device": device}) + task_name = task_kwargs.pop("name") + test_acc = getattr(Tasks, task_name)(dataset, model, **task_kwargs).test_acc + print(f"final test acc: {test_acc}") \ No newline at end of file diff --git a/examples/GDA/test_SampleMixup.py b/examples/GDA/test_SampleMixup.py new file mode 100644 index 0000000..3c17562 --- /dev/null +++ b/examples/GDA/test_SampleMixup.py @@ -0,0 +1,53 @@ +import yaml +import argparse +import scipy.sparse as sp + +import sgl.tasks as Tasks +import sgl.dataset as Dataset +import sgl.sampler as Sampler +from sgl.models.homo.gda import SampleMixup + + +if __name__ == "__main__": + parser = argparse.ArgumentParser("Sampler-Models") + parser.add_argument( + "--device", type=int, default=0, help="gpu device id or cpu (-1)" + ) + parser.add_argument( + "--config_path", type=str, default="./configs/SampleMixup.yml", help="save path of the configuration file" + ) + args = parser.parse_args() + config = yaml.safe_load(open(args.config_path, "rb")) + device = f"cuda:{args.device}" if args.device >= 0 else "cpu" + dataset_kwargs = config["dataset"] + task_kwargs = config["task"] + classname = dataset_kwargs.pop("classname") + dataset = getattr(Dataset, classname)(**dataset_kwargs) + adj_matrix = dataset.adj + if isinstance(adj_matrix, sp.coo_matrix) is False: + adj_matrix = sp.coo_matrix(adj_matrix) + adj_matrix.setdiag(0) + adj_matrix = adj_matrix.tocsr() + training_sampler_kwargs = config["sampler"]["training"] + if "inductive" in training_sampler_kwargs.keys(): + inductive = training_sampler_kwargs.pop("inductive") + else: + inductive = False + task_kwargs.update({"inductive": inductive}) + training_sampler_name = training_sampler_kwargs.pop("name") + training_sampler_kwargs.update({"save_dir": dataset.processed_dir}) + training_sampler = getattr(Sampler, training_sampler_name)(adj_matrix[dataset.train_idx, :][:, dataset.train_idx] if inductive else adj_matrix, **training_sampler_kwargs) + if "eval" in config["sampler"].keys(): + eval_sampler_kwargs = config["sampler"]["eval"] + eval_sampler_name = eval_sampler_kwargs.pop("name") + eval_sampler_kwargs.update({"save_dir": dataset.processed_dir}) + eval_sampler = getattr(Sampler, eval_sampler_name)(adj_matrix, **eval_sampler_kwargs) + else: + eval_sampler = None + model_kwargs = config["model"] + model = SampleMixup(training_sampler, eval_sampler, in_dim=dataset.num_features, n_classes=dataset.num_classes, **model_kwargs) + task_kwargs.update({"device": device}) + task_kwargs.update({"loss_fn": model.loss_fn}) + task_name = task_kwargs.pop("name") + test_acc = getattr(Tasks, task_name)(dataset, model, **task_kwargs).test_acc + print(f"final test acc: {test_acc}") \ No newline at end of file diff --git a/examples/GDA/test_search_GAug.py b/examples/GDA/test_search_GAug.py new file mode 100644 index 0000000..170185c --- /dev/null +++ b/examples/GDA/test_search_GAug.py @@ -0,0 +1,66 @@ +import torch +import argparse +import torch.nn.functional as F +from openbox import Optimizer + +import sgl.dataset as Dataset +from sgl.search.gda_hpo.search_config import BaseGDAConfigManager + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="HPO-GAug-Model.") + parser.add_argument("--device", type=int, default=0, help="gpu device id or cpu(-1)") + parser.add_argument("--dataset_classname", type=str, default="Planetoid", help="class name of the dataset") + parser.add_argument("--name", type=str, default="cora", help="dataset name") + parser.add_argument("--root", type=str, default="data/", help="root dir for dataset") + parser.add_argument("--gnn_type", type=str, default="gcn", choices=["gcn", "gsage", "gat"], help="gnn backbone") + parser.add_argument("--not_gae", action="store_true", default=False, help="whether not to use gae") + parser.add_argument("--minibatch", action="store_true", default=False, help="whether to use minibatch") + parser.add_argument("--pretrain_batch_size", type=int, default=-1, help="batch size when pretraining ep net") + parser.add_argument("--train_batch_size", type=int, default=-1, help="batch size when training") + parser.add_argument("--model", type=str, default="GAugO", choices=["GAugO", "GAugM"], help="choose the target mnodel") + parser.add_argument("--num_logits", type=int, default=10, help="number of candidate edge logits") + parser.add_argument("--runs_per_config", type=int, default=5, help="repeat runs for each configuration") + parser.add_argument("--max_patience", type=int, default=50, help="patience for early stop") + args = parser.parse_args() + device = f"cuda:{args.device}" if args.device >= 0 and torch.cuda.is_available() else "cpu" + dataset = getattr(Dataset, args.dataset_classname)(name=args.name, root=args.root) + pretrain_batch_size = args.pretrain_batch_size if args.pretrain_batch_size > 0 else None + train_batch_size = args.train_batch_size if args.train_batch_size > 0 else None + if args.model == "GAugO": + model_keys = ["in_dim", "hidden_dim", "emb_size", "n_classes", "n_layers", "dropout", "gnn_type", "activation", "temperature", "gae", "alpha", "feat_norm", "sample_type", "minibatch", "n_heads"] + task_keys = ["dataset", "model", "lr", "weight_decay", "epochs", "device", "beta", "warmup", "max_patience", "pretrain_ep", "pretrain_nc", "runs", "verbose", "seed", "pretrain_batch_size", "train_batch_size", "ep_lr"] + const_model_kwargs = dict(in_dim=dataset.num_features, n_classes=dataset.num_classes, gnn_type=args.gnn_type, activation=F.relu, gae=not args.not_gae, minibatch=args.minibatch, emb_size=32, n_layers=2, dropout=0.5, feat_norm="row") + const_task_kwargs = dict(dataset=dataset, epochs=200, device=device, max_patience=args.max_patience, pretrain_batch_size=pretrain_batch_size, train_batch_size=train_batch_size, runs=args.runs_per_config, verbose=False, lr=0.01, weight_decay=0.0005) + Reals = dict(alpha=dict(lower=0, upper=1, default_value=0.4, q=0.01), temperature=dict(lower=0.1, upper=2.1, default_value=1.5, q=0.1), beta=dict(lower=0, upper=4, default_value=2, q=0.1)) + if pretrain_batch_size is not None: + Reals.update(ep_lr=dict(lower=0.001, upper=0.01, default_value=0.002, q=0.001)) + else: + const_task_kwargs.update(ep_lr=0.01) + Categoricals = dict(hidden_dim=dict(choices=[32, 64, 128, 256], default_value=128)) + Ints = dict(warmup=dict(lower=0, upper=10, default_value=2, q=1), pretrain_ep=dict(lower=5, upper=300, default_value=100, q=5), pretrain_nc=dict(lower=5, upper=300, default_value=100, q=5)) + hier_params = dict(Real=Reals, Categorical=Categoricals, Int=Ints) + configer = BaseGDAConfigManager(args.model, f"NodeClassification{args.model}", model_keys, task_keys, const_model_kwargs, const_task_kwargs, hier_params) + else: + model_keys = ["in_dim", "hidden_dim", "n_classes", "n_layers", "gnn_type", "rm_pct", "add_pct", "choose_idx", "gae", "dropout", "activation", "feat_norm", "n_heads"] + task_keys = ["dataset", "model", "lr", "weight_decay", "epochs", "device", "max_patience", "runs", "verbose", "seed"] + const_model_kwargs = dict(in_dim=dataset.num_features, n_classes=dataset.num_classes, gnn_type=args.gnn_type, activation=F.relu, gae=not args.not_gae, n_layers=2, dropout=0.5, feat_norm="row") + const_task_kwargs = dict(dataset=dataset, epochs=200, device=device, max_patience=args.max_patience, runs=args.runs_per_config, verbose=False, lr=0.01, weight_decay=0.0005) + Categoricals = dict(hidden_dim=dict(choices=[32, 64, 128, 256], default_value=128)) + Ints = dict(choose_idx=dict(lower=1, upper=args.num_logits, default_value=1, q=1), rm_pct=dict(lower=0, upper=80, default_value=20, q=1), add_pct=dict(lower=0, upper=80, default_value=20, q=1)) + hier_params = dict(Categorical=Categoricals, Int=Ints) + configer = BaseGDAConfigManager(args.model, f"NodeClassification{args.model}", model_keys, task_keys, const_model_kwargs, const_task_kwargs, hier_params) + + opt = Optimizer(configer._configFunction, + configer._configSpace(), + num_objectives=1, + num_constraints=0, + max_runs=400, + surrogate_type="prf", + acq_type='ei', + acq_optimizer_type='local_random', + initial_runs=20, + task_id='quick_start', + random_state=1) + + history = opt.run() + print(history) \ No newline at end of file diff --git a/examples/clustergcn_nodeclass.py b/examples/clustergcn_nodeclass.py new file mode 100644 index 0000000..41e3025 --- /dev/null +++ b/examples/clustergcn_nodeclass.py @@ -0,0 +1,53 @@ +import yaml +import argparse +import sgl.dataset as Dataset +from sgl.models.homo import ClusterGCN +import sgl.sampler as Sampler +from sgl.sampler import ClusterGCNSampler +from sgl.tasks import NodeClassification_Sampling + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description = "ClusterGCNSampler-Models.") + parser.add_argument( + "--device", type=int, default=0, help="gpu device id or cpu (-1)" + ) + parser.add_argument( + "--config_path", type=str, default="./configs/clustergcn.yml", help="save path of the configuration file" + ) + args = parser.parse_args() + config = yaml.safe_load(open(args.config_path, "rb")) + device = f"cuda:{args.device}" if args.device >= 0 else "cpu" + dataset_kwargs = config["dataset"] + sampler_kwargs = config["sampler"] + model_kwargs = config["model"] + task_kwargs = config["task"] + + classname = dataset_kwargs.pop("classname") + dataset = getattr(Dataset, classname)(**dataset_kwargs) + train_sampler_kwargs = sampler_kwargs["train"] + train_sampler_kwargs.update({"save_dir": dataset.processed_dir}) + train_cluster_number = train_sampler_kwargs["cluster_number"] + task_kwargs.update({"train_graph_number": train_cluster_number}) + if "inductive" in train_sampler_kwargs.keys(): + inductive = train_sampler_kwargs.pop("inductive") + else: + inductive = False + task_kwargs.update({"inductive": inductive}) + train_sampler = ClusterGCNSampler(dataset, inductive=inductive, **train_sampler_kwargs) + if "eval" in sampler_kwargs: + eval_sampler_kwargs = sampler_kwargs["eval"] + eval_sampler_name = eval_sampler_kwargs["name"] + if eval_sampler_name == "ClusterGCNSampler": + eval_sampler_kwargs.update({"save_dir": dataset.processed_dir}) + eval_cluster_number = eval_sampler_kwargs["cluster_number"] + task_kwargs.update({"eval_graph_number": eval_cluster_number}) + eval_sampler = ClusterGCNSampler(dataset, **eval_sampler_kwargs) + else: + eval_sampler = getattr(Sampler, eval_sampler_name)(dataset.adj, **eval_sampler_kwargs) + else: + eval_sampler = None + model_kwargs.update({"device": device}) + model = ClusterGCN(train_sampler, eval_sampler, nfeat=dataset.num_features, nclass=dataset.num_classes, **model_kwargs) + task_kwargs.update({"device": device}) + test_acc = NodeClassification_Sampling(dataset, model, **task_kwargs).test_acc diff --git a/examples/configs/clustergcn.yml b/examples/configs/clustergcn.yml new file mode 100644 index 0000000..d723e7d --- /dev/null +++ b/examples/configs/clustergcn.yml @@ -0,0 +1,45 @@ +# dataset: +# classname: "Planetoid" +# name: "cora" +# root: "/home/ssq/test_data/" +# sampler: +# train: +# cluster_method: "metis" +# cluster_number: 10 +# sparse_type: "pyg" +# model: +# hidden_dim: 128 +# dropout: 0.5 +# num_layers: 2 +# sparse_type: "pyg" +# task: +# train_batch_size: 5 +# epochs: 30 +# lr: 0.01 +# weight_decay: 0.00005 +# loss_fn: "nll_loss" +# seed: 42 +dataset: + classname: "Planetoid" + name: "pubmed" + root: "/home/ssq/test_data/" + split: "full" +sampler: + train: + cluster_method: "metis" + cluster_number: 10 + sparse_type: "pyg" + inductive: True +model: + hidden_dim: 128 + dropout: 0.5 + num_layers: 2 + sparse_type: "pyg" +task: + train_batch_size: 5 + epochs: 50 + lr: 0.01 + weight_decay: 0.00005 + loss_fn: "nll_loss" + seed: 42 + diff --git a/examples/configs/fastgcn.yml b/examples/configs/fastgcn.yml new file mode 100644 index 0000000..e4c16f2 --- /dev/null +++ b/examples/configs/fastgcn.yml @@ -0,0 +1,28 @@ +dataset: + classname: "Planetoid" + name: "pubmed" + root: "/home/ssq/test_data/" + split: "full" +sampler: + training: + name: "FastGCNSampler" + inductive: False + pre_sampling_op: "LaplacianGraphOp" + layer_sizes: "2048,2048" + prob_type: "normalize" + replace: False + sparse_type: "torch" +model: + name: "FastGCN" + hidden_dim: 128 + dropout: 0.5 + num_layers: 2 +task: + name: "NodeClassification_Sampling" + train_batch_size: 2048 + train_num_workers: 3 + epochs: 30 + lr: 0.1 + weight_decay: 0.00005 + loss_fn: "nll_loss" + diff --git a/examples/configs/graphsage.yml b/examples/configs/graphsage.yml new file mode 100644 index 0000000..8d96760 --- /dev/null +++ b/examples/configs/graphsage.yml @@ -0,0 +1,38 @@ +dataset: + classname: "Planetoid" + name: "pubmed" + root: "/home/ssq/test_data/" + split: "full" +sampler: + training: + name: "NeighborSampler" + inductive: False + layer_sizes: "10,5" + prob_type: "normalize" + replace: False + post_sampling_op: "RwGraphOp" + sparse_type: "torch" + # eval: + # name: "NeighborSampler" + # inductive: False + # layer_sizes: "-1" + # prob_type: "normalize" + # post_sampling_op: "RwGraphOp" + # sparse_type: "torch" +model: + name: "GraphSAGE" + hidden_dim: 256 + dropout: 0.5 + num_layers: 2 +task: + name: "NodeClassification_Sampling" + train_batch_size: 1024 + train_num_workers: 0 + # eval_batch_size: 1024 + # eval_num_workers: 5 + # eval_together: True + epochs: 20 + lr: 0.03 + weight_decay: 0.00005 + loss_fn: "nll_loss" + diff --git a/examples/configs/graphsaint.yml b/examples/configs/graphsaint.yml new file mode 100644 index 0000000..3f47956 --- /dev/null +++ b/examples/configs/graphsaint.yml @@ -0,0 +1,26 @@ +dataset: + classname: "Planetoid" + name: "cora" + root: "/home/ssq/test_data/" +sampler: + train: + pre_sampling_graphs: 20 + sampler_type: "random_walk" + nodebudget: 1000 + edgebudget: 3000 + r: 500 + h: 3 + pre_sampling_op: "RwGraphOp" + sparse_type: "torch" +model: + hidden_dim: 128 + dropout: 0.5 + num_layers: 2 +task: + train_graph_number: 10 + train_batch_size: 5 + epochs: 100 + lr: 0.01 + weight_decay: 0.00005 + seed: 42 + diff --git a/examples/configs/lazygnn.yml b/examples/configs/lazygnn.yml new file mode 100644 index 0000000..ffe25a5 --- /dev/null +++ b/examples/configs/lazygnn.yml @@ -0,0 +1,30 @@ +dataset: + classname: "Planetoid" + name: "pubmed" + root: "/home/ssq/test_data/" + split: "full" +sampler: + training: + name: "NeighborSampler" + inductive: False + layer_sizes: "10,5" + prob_type: "normalize" + replace: False + post_sampling_op: "LaplacianGraphOp" + sparse_type: "torch" +model: + name: "LazyGNN" + basemodel: "GCN" + hidden_dim: 256 + dropout: 0.5 + num_layers: 2 + max_workers: 8 + max_threads: 10 +task: + name: "NodeClassification_RecycleSampling" + num_iters: 100 + lr: 0.01 + weight_decay: 0.00005 + loss_fn: "nll_loss" + train_batch_size: 1024 + diff --git a/examples/configs/vanillagnn.yml b/examples/configs/vanillagnn.yml new file mode 100644 index 0000000..2de9fb0 --- /dev/null +++ b/examples/configs/vanillagnn.yml @@ -0,0 +1,23 @@ +dataset: + classname: "Planetoid" + name: "cora" + root: "/home/ssq/test_data/" + split: "official" +sampler: + training: + name: "FullSampler" + inductive: False + sparse_type: "torch" +model: + name: "VanillaGNN" + basemodel: "SAGE" + hidden_dim: 128 + dropout: 0.5 + num_layers: 2 +task: + name: "NodeClassification_Sampling" + epochs: 50 + lr: 0.1 + weight_decay: 0.00005 + loss_fn: "nll_loss" + diff --git a/examples/gamlp_products.py b/examples/gamlp_products.py index a9ec6aa..89e41b3 100644 --- a/examples/gamlp_products.py +++ b/examples/gamlp_products.py @@ -5,13 +5,15 @@ if __name__ == "__main__": parser = argparse.ArgumentParser("GMLP") + parser.add_argument("--device", type=int, default=0, help="GPU ID or CPU (-1)") parser.add_argument("--hidden-dim", type=int, default=512, help="dimension of hidden layer") parser.add_argument("--num-layers", type=int, default=3, help="number of layers") + parser.add_argument("--dataset_root", type=str, default="data/", help="dataset path") args = parser.parse_args() - dataset = Ogbn("products", "./", "official") + dataset = Ogbn("products", args.dataset_root, "official") model = GAMLP(prop_steps=3, feat_dim=dataset.num_features, output_dim=dataset.num_classes, hidden_dim=args.hidden_dim, num_layers=args.num_layers) - device = "cuda:0" - test_acc = NodeClassification(dataset, model, lr=0.1, weight_decay=5e-5, epochs=200, device=device).test_acc + device = f"cuda:{args.device}" + test_acc = NodeClassification(dataset, model, lr=0.1, weight_decay=5e-5, epochs=200, device=device, train_batch_size=100000, eval_batch_size=200000).test_acc diff --git a/examples/graphsaint_nodeclass.py b/examples/graphsaint_nodeclass.py new file mode 100644 index 0000000..3d7520f --- /dev/null +++ b/examples/graphsaint_nodeclass.py @@ -0,0 +1,50 @@ +import yaml +import argparse + +import sgl.dataset as Dataset +from sgl.models.homo import GraphSAINT + +import sgl.sampler as Sampler +from sgl.sampler import GraphSAINTSampler +from sgl.tasks import NodeClassification_Sampling + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="GraphSaint-Models.") + parser.add_argument( + "--device", type=int, default=0, help="gpu device id or cpu (-1)" + ) + parser.add_argument( + "--config_path", type=str, default="./configs/graphsaint.yml", help="save path of the configuration file" + ) + args = parser.parse_args() + config = yaml.safe_load(open(args.config_path, "rb")) + device = f"cuda:{args.device}" if args.device >= 0 else "cpu" + dataset_kwargs = config["dataset"] + sampler_kwargs = config["sampler"] + model_kwargs = config["model"] + task_kwargs = config["task"] + + classname = dataset_kwargs.pop("classname") + dataset = getattr(Dataset, classname)(**dataset_kwargs) + train_sampler_kwargs = sampler_kwargs["train"] + train_sampler_kwargs.update({"save_dir": dataset.processed_dir}) + + train_sampler = GraphSAINTSampler(dataset, **train_sampler_kwargs) + if "eval" in sampler_kwargs: + eval_sampler_kwargs = sampler_kwargs["eval"] + eval_sampler_name = eval_sampler_kwargs["name"] + if eval_sampler_name == "ClusterGCNSampler": + eval_sampler_kwargs.update({"save_dir": dataset.processed_dir}) + eval_cluster_number = eval_sampler_kwargs["cluster_number"] + task_kwargs.update({"eval_graph_number": eval_cluster_number}) + eval_sampler = GraphSAINTSampler(dataset, **eval_sampler_kwargs) + else: + eval_sampler = getattr(Sampler, eval_sampler_name)(dataset.adj, **eval_sampler_kwargs) + else: + eval_sampler = None + + model_kwargs.update({"device": device}) + model = GraphSAINT(dataset, train_sampler, eval_sampler, **model_kwargs) + task_kwargs.update({"device": device}) + task_kwargs.update({"loss_fn": model.loss_fn}) + test_acc = NodeClassification_Sampling(dataset, model, **task_kwargs).test_acc diff --git a/examples/sample_based_nodeclass.py b/examples/sample_based_nodeclass.py new file mode 100644 index 0000000..b6ec4ff --- /dev/null +++ b/examples/sample_based_nodeclass.py @@ -0,0 +1,48 @@ +import yaml +import argparse + +import sgl.dataset as Dataset +import sgl.sampler as Sampler +import sgl.models.homo as HomoModels +import sgl.tasks as Tasks + + +if __name__ == "__main__": + parser = argparse.ArgumentParser("Sampler-Models") + parser.add_argument( + "--device", type=int, default=0, help="gpu device id or cpu (-1)" + ) + parser.add_argument( + "--config_path", type=str, default="./configs/fastgcn.yml", help="save path of the configuration file" + ) + args = parser.parse_args() + config = yaml.safe_load(open(args.config_path, "rb")) + device = f"cuda:{args.device}" if args.device >= 0 else "cpu" + dataset_kwargs = config["dataset"] + task_kwargs = config["task"] + classname = dataset_kwargs.pop("classname") + dataset = getattr(Dataset, classname)(**dataset_kwargs) + training_sampler_kwargs = config["sampler"]["training"] + if "inductive" in training_sampler_kwargs.keys(): + inductive = training_sampler_kwargs.pop("inductive") + else: + inductive = False + task_kwargs.update({"inductive": inductive}) + training_sampler_name = training_sampler_kwargs.pop("name") + training_sampler_kwargs.update({"save_dir": dataset.processed_dir}) + training_sampler = getattr(Sampler, training_sampler_name)(dataset.adj[dataset.train_idx, :][:, dataset.train_idx] if inductive else dataset.adj, **training_sampler_kwargs) + if "eval" in config["sampler"].keys(): + eval_sampler_kwargs = config["sampler"]["eval"] + eval_sampler_name = eval_sampler_kwargs.pop("name") + eval_sampler_kwargs.update({"save_dir": dataset.processed_dir}) + eval_sampler = getattr(Sampler, eval_sampler_name)(dataset.adj, **eval_sampler_kwargs) + else: + eval_sampler = None + model_kwargs = config["model"] + model_name = model_kwargs.pop("name") + model_kwargs.update({"device": device}) + model = getattr(HomoModels, model_name)(dataset, training_sampler, eval_sampler, **model_kwargs) + task_kwargs.update({"device": device}) + task_name = task_kwargs.pop("name") + test_acc = getattr(Tasks, task_name)(dataset, model, **task_kwargs).test_acc + print(f"final test acc: {test_acc}") \ No newline at end of file diff --git a/sgl/data/__init__.py b/sgl/data/__init__.py index fa5d597..0fc4ad7 100644 --- a/sgl/data/__init__.py +++ b/sgl/data/__init__.py @@ -1,8 +1,11 @@ from .transforms import random_drop_edges, random_drop_nodes, biased_drop_edges, get_subgraph, mask_features from .transforms import sort_edges, add_edges, delete_repeated_edges, add_self_loops, remove_self_loops -from .base_data import Node, Edge, Graph +from .base_data import Node, Edge, Graph, Block +from .utils import RandomLoader, SplitLoader __all__ = [ + "RandomLoader", + "SplitLoader", "random_drop_edges", "random_drop_nodes", "biased_drop_edges", @@ -15,5 +18,6 @@ "remove_self_loops", "Node", "Edge", + "Block", "Graph", ] \ No newline at end of file diff --git a/sgl/data/base_data.py b/sgl/data/base_data.py index 98d01a0..342513f 100644 --- a/sgl/data/base_data.py +++ b/sgl/data/base_data.py @@ -1,7 +1,56 @@ -import numpy as np import torch -from scipy.sparse import csr_matrix from torch import Tensor +import numpy as np +import scipy.sparse as sp +from scipy.sparse import csr_matrix +from torch_sparse import SparseTensor +from torch_geometric.utils import from_scipy_sparse_matrix +from sgl.utils import sparse_mx_to_torch_sparse_tensor, sparse_mx_to_pyg_sparse_tensor + +# A lighter wrapper class for sampled adjacency matrices, +# as the Edge class seems contains useless information +class Block: + def __init__(self, adjs, sparse_type): + self.__sparse_type = sparse_type + if not isinstance(adjs, list): + self.__adjs = [adjs] + if isinstance(adjs, SparseTensor): + self.__root_sizes = [adjs.sparse_size(0)] + else: + self.__root_sizes = [adjs.shape[0]] + else: + self.__adjs = adjs + if isinstance(adjs[0], SparseTensor): + self.__root_sizes = [adj.sparse_size(0) for adj in adjs] + else: + self.__root_sizes = [adj.shape[0] for adj in adjs] + self.__device = None + + def __len__(self): + return len(self.__adjs) + + def __iter__(self): + for adj in self.__adjs: + yield adj + + def __getitem__(self, id): + return self.__adjs[id] + + def root_size(self, id): + return self.__root_sizes[id] + + def to_device(self, device): + if self.__device == device: + return + if isinstance(self.__adjs[0], sp.spmatrix): + if self.__sparse_type == "pyg": + self.__adjs = [sparse_mx_to_pyg_sparse_tensor(adj) for adj in self.__adjs] + elif self.__sparse_type == "torch": + self.__adjs = [sparse_mx_to_torch_sparse_tensor(adj) for adj in self.__adjs] + else: + self.__adjs = [from_scipy_sparse_matrix(adj)[0] for adj in self.__adjs] + self.__adjs = [adj.to(device) for adj in self.__adjs] + self.__device = device # Base class for adjacency matrix @@ -32,7 +81,7 @@ def __init__(self, row, col, edge_weight, edge_type, num_node, edge_attrs=None): @property def sparse_matrix(self): return self.__sparse_matrix - + @property def edge_type(self): return self.__edge_type @@ -86,7 +135,7 @@ def __init__(self, node_type, num_node, x=None, y=None, node_ids=None): if x is not None: if isinstance(x, np.ndarray): - x = torch.FloatTensor(x) + x = torch.FloatTensor(x) #这里是原始实现,但是是有bug的 elif not isinstance(x, Tensor): raise TypeError("x must be a np.ndarray or Tensor!") self.__x = x @@ -160,7 +209,7 @@ def num_edge(self): @property def adj(self): return self.__edge.sparse_matrix - + @property def edge_index(self): return self.__edge.edge_index @@ -181,6 +230,10 @@ def edge_type(self): def node_type(self): return self.__node.node_type + @property + def node_ids(self): + return self.__node.node_ids + @property def x(self): return self.__node.x diff --git a/sgl/data/base_dataset.py b/sgl/data/base_dataset.py index 0a1d1fe..d3fa8fd 100644 --- a/sgl/data/base_dataset.py +++ b/sgl/data/base_dataset.py @@ -1,10 +1,10 @@ -import itertools -import numpy as np import os import os.path as osp +import numpy as np import torch import warnings from scipy.sparse import csr_matrix +from torch_geometric.utils import index_to_mask from sgl.data.base_data import Node, Edge from sgl.data.utils import file_exist, to_undirected @@ -95,6 +95,10 @@ def edge_type_cnt(self): def node_type(self): return self._data.node_type + @property + def node_ids(self): + return self._data.node_ids + @property def train_idx(self): return self._train_idx @@ -106,6 +110,24 @@ def val_idx(self): @property def test_idx(self): return self._test_idx + + @property + def train_mask(self): + mask = torch.zeros((self.num_node, ), dtype=torch.bool) + mask[self._train_idx] = True + return mask + + @property + def val_mask(self): + mask = torch.zeros((self.num_node, ), dtype=torch.bool) + mask[self._val_idx] = True + return mask + + @property + def test_mask(self): + mask = torch.zeros((self.num_node, ), dtype=torch.bool) + mask[self._test_idx] = True + return mask @property def num_features(self): @@ -118,6 +140,10 @@ def num_classes(self): @property def num_node(self): return self._data.num_node + + @property + def processed_dir(self): + return self._processed_dir # Base class for graph-level tasks diff --git a/sgl/data/utils.py b/sgl/data/utils.py index 14d4a12..7ed48de 100644 --- a/sgl/data/utils.py +++ b/sgl/data/utils.py @@ -1,6 +1,6 @@ -import os.path as osp import torch - +import numpy as np +import os.path as osp def file_exist(filepaths): if isinstance(filepaths, list): @@ -22,3 +22,48 @@ def to_undirected(edge_index): new_edge_index = torch.stack((new_row, new_col), dim=0) return new_edge_index + +class Loader: + def __init__(self, seed_nodes, batch_size): + self.seed_nodes = seed_nodes + self.batch_size = batch_size + + def __iter__(self): + pass + + def __call__(self): + pass + +class RandomLoader(Loader): + def __init__(self, seed_nodes, batch_size): + super().__init__(seed_nodes, batch_size) + self.num_batches = (len(seed_nodes) + batch_size - 1) // batch_size + + def __iter__(self): + for _ in range(self.num_batches): + batch = np.random.choice( + self.seed_nodes, self.batch_size, replace=False) + yield batch + + def __call__(self): + batch = np.random.choice( + self.seed_nodes, self.batch_size, replace=False) + + return np.sort(batch) + +class SplitLoader(Loader): + def __init__(self, seed_nodes, batch_size): + super().__init__(seed_nodes, batch_size) + if not isinstance(seed_nodes, torch.LongTensor): + seed_nodes = torch.LongTensor(seed_nodes) + self.batches = torch.split(seed_nodes, self.batch_size) + + def __iter__(self, *args, **kwargs): + for batch in self.batches: + yield batch.numpy() + + def __len__(self): + return len(self.batches) + + def __call__(self, bid, *args, **kwargs): + return self.batches[bid] \ No newline at end of file diff --git a/sgl/dataset/ogbn.py b/sgl/dataset/ogbn.py index c25684a..42f822f 100644 --- a/sgl/dataset/ogbn.py +++ b/sgl/dataset/ogbn.py @@ -20,7 +20,7 @@ def __init__(self, name="arxiv", root="./", split="official"): @property def raw_file_paths(self): - filepath = "ogbn_" + self._name + "/raw/geometric_data_processed.pt" + filepath = "ogbn_" + self._name + "/processed/geometric_data_processed.pt" return osp.join(self._raw_dir, filepath) @property diff --git a/sgl/dataset/planetoid.py b/sgl/dataset/planetoid.py index 613cbab..a1ebf96 100644 --- a/sgl/dataset/planetoid.py +++ b/sgl/dataset/planetoid.py @@ -104,6 +104,10 @@ def __generate_split(self, split): train_idx = range(self.num_classes * 20) val_idx = range(self.num_classes * 20, self.num_classes * 20 + 500) test_idx = range(self.num_node - 1000, self.num_node) + elif split == "full": + train_idx = range(self.num_node - 1500) + val_idx = range(self.num_node - 1500, self.num_node - 1000) + test_idx = range(self.num_node - 1000, self.num_node) elif split == "random": raise NotImplementedError else: diff --git a/sgl/models/backup.py b/sgl/models/backup.py new file mode 100644 index 0000000..dd5ae08 --- /dev/null +++ b/sgl/models/backup.py @@ -0,0 +1,278 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from sgl.data.base_dataset import HeteroNodeDataset +from sgl.tasks.utils import sparse_mx_to_torch_sparse_tensor + + +class BaseSGAPModel(nn.Module): + def __init__(self, prop_steps, feat_dim, output_dim): + super(BaseSGAPModel, self).__init__() + self._prop_steps = prop_steps + self._feat_dim = feat_dim + self._output_dim = output_dim + + self._pre_graph_op, self._pre_msg_op = None, None + self._post_graph_op, self._post_msg_op = None, None + self._base_model = None + + self._processed_feat_list = None + self._processed_feature = None + self._pre_msg_learnable = False + + def preprocess(self, adj, feature): + if self._pre_graph_op is not None: + self._processed_feat_list = self._pre_graph_op.propagate( + adj, feature) + if self._pre_msg_op.aggr_type in [ + "proj_concat", "learnable_weighted", "iterate_learnable_weighted"]: + self._pre_msg_learnable = True + else: + self._pre_msg_learnable = False + self._processed_feature = self._pre_msg_op.aggregate( + self._processed_feat_list) + else: + self._pre_msg_learnable = False + self._processed_feature = feature + + def postprocess(self, adj, output): + if self._post_graph_op is not None: + if self._post_msg_op.aggr_type in [ + "proj_concat", "learnable_weighted", "iterate_learnable_weighted"]: + raise ValueError( + "Learnable weighted message operator is not supported in the post-processing phase!") + output = F.softmax(output, dim=1) + output = output.detach().numpy() + output = self._post_graph_op.propagate(adj, output) + output = self._post_msg_op.aggregate(output) + + return output + + # a wrapper of the forward function + def model_forward(self, idx, device): + return self.forward(idx, device) #直接走下面的代码了 + + def forward(self, idx, device): + processed_feature = None + if self._pre_msg_learnable is False: + processed_feature = self._processed_feature[idx].to(device) + else: + transferred_feat_list = [feat[idx].to( + device) for feat in self._processed_feat_list] + processed_feature = self._pre_msg_op.aggregate( + transferred_feat_list) + + output = self._base_model(processed_feature) #model training + return output + +class BaseSAMPLEModel(nn.Module): + def __init__(self, dataset, prop_steps, feat_dim, output_dim): + super(BaseSAMPLEModel, self).__init__() + # self._prop_steps = prop_steps + # self._feat_dim = feat_dim + # self._output_dim = output_dim + + self._pre_graph_op, self._sampling_op, self._post_graph_op = None, None, None + self._base_model = None + + self._processed_feat_list = None + self._processed_feature = None + self._pre_msg_learnable = False + self._norm_adj = None + print("BaseSAMPLEModel: consider different model for general purpose") + def preprocess(self, adj, feature): # + if self._pre_graph_op is not None: + self._norm_adj = self._pre_graph_op._construct_adj(adj) + self._processed_feature = feature + else: + print("do not normalize the adj") + self._pre_msg_learnable = False + self._processed_feature = feature + def postprocess(self, adj, output): + if self._post_graph_op is not None: + print("Not Implemented") + return output + # a wrapper of the forward function + def model_forward(self, idx, device): + return self.forward(idx, device) #直接走下面的代码了 + + def forward(self, idx, device): + # processed_feature = None + # if self._pre_msg_learnable is False: + # processed_feature = self._processed_feature[idx].to(device) + # else: + # transferred_feat_list = [feat[idx].to( + # device) for feat in self._processed_feat_list] + # processed_feature = self._pre_msg_op.aggregate( + # transferred_feat_list) + if self.training: + sampled_feats, sampled_adjs, var_loss = self._sampling_op.sampling( + idx) + transferred_sampled_feats = sampled_feats.to(device) + transferred_sampled_adjs = [adj.to(device) for adj in sampled_adjs] + + output = self._base_model(transferred_sampled_feats, transferred_sampled_adjs) + return output, var_loss + else: + transferred_sampled_feats = self._processed_feature.to(device) + transferred_sampled_adjs = [] + for adj in [self._norm_adj, self._norm_adj[idx, :]]: + transferred_sampled_adjs.append(sparse_mx_to_torch_sparse_tensor(adj).to(device)) + output = self._base_model(transferred_sampled_feats, transferred_sampled_adjs) + return output + +class BaseHeteroSGAPModel(nn.Module): + def __init__(self, prop_steps, feat_dim, output_dim): + super(BaseHeteroSGAPModel, self).__init__() + self._prop_steps = prop_steps + self._feat_dim = feat_dim + self._output_dim = output_dim + + self._pre_graph_op, self._pre_msg_op = None, None + self._aggregator = None + self._base_model = None + + self._propagated_feat_list_list = None + self._processed_feature_list = None + self._pre_msg_learnable = False + + # Either subgraph_list or (random_subgraph_num, subgraph_edge_type_num) should be provided. + def preprocess(self, dataset, predict_class, + random_subgraph_num=-1, subgraph_edge_type_num=-1, + subgraph_list=None): + if subgraph_list is None and (random_subgraph_num == -1 or subgraph_edge_type_num == -1): + raise ValueError( + "Either subgraph_list or (random_subgraph_num, subgraph_edge_type_num) should be provided!") + if subgraph_list is not None and (random_subgraph_num != -1 or subgraph_edge_type_num != -1): + raise ValueError( + "subgraph_list is provided, random_subgraph_num and subgraph_edge_type_num will be ignored!") + + if not isinstance(dataset, HeteroNodeDataset): + raise TypeError( + "Dataset must be an instance of HeteroNodeDataset!") + elif predict_class not in dataset.node_types: + raise ValueError("Please input valid node class for prediction!") + predict_idx = dataset.data.node_id_dict[predict_class] + + if subgraph_list is None: + subgraph_dict = dataset.nars_preprocess(dataset.edge_types, predict_class, + random_subgraph_num, + subgraph_edge_type_num) + subgraph_list = [(key, subgraph_dict[key]) + for key in subgraph_dict] + + self._propagated_feat_list_list = [[] + for _ in range(self._prop_steps + 1)] + + for key, value in subgraph_list: + edge_type_list = [] + for edge_type in key: + edge_type_list.append(edge_type.split("__")[0]) + edge_type_list.append(edge_type.split("__")[2]) + if predict_class in edge_type_list: + adj, feature, node_id = value + propagated_feature = self._pre_graph_op.propagate(adj, feature) + + start_pos = list(node_id).index(predict_idx[0]) + for i, feature in enumerate(propagated_feature): + self._propagated_feat_list_list[i].append( + feature[start_pos:start_pos + dataset.data.num_node[predict_class]]) + + # a wrapper of the forward function + def model_forward(self, idx, device): + return self.forward(idx, device) + + def forward(self, idx, device): + feat_input = [] + for x_list in self._propagated_feat_list_list: + feat_input.append([]) + for x in x_list: + feat_input[-1].append(x[idx].to(device)) + + aggregated_feat_list = self._aggregator(feat_input) + combined_feat = self._pre_msg_op.aggregate(aggregated_feat_list) + output = self._base_model(combined_feat) + + return output + + +class FastBaseHeteroSGAPModel(nn.Module): + def __init__(self, prop_steps, feat_dim, output_dim): + super(FastBaseHeteroSGAPModel, self).__init__() + self._prop_steps = prop_steps + self._feat_dim = feat_dim + self._output_dim = output_dim + + self._pre_graph_op = None + self._aggregator = None + self._base_model = None + + self._propagated_feat_list_list = None + self._processed_feature_list = None + self._pre_msg_learnable = False + + # Either subgraph_list or (random_subgraph_num, subgraph_edge_type_num) should be provided. + def preprocess(self, dataset, predict_class, + random_subgraph_num=-1, subgraph_edge_type_num=-1, + subgraph_list=None): + if subgraph_list is None and (random_subgraph_num == -1 or subgraph_edge_type_num == -1): + raise ValueError( + "Either subgraph_list or (random_subgraph_num, subgraph_edge_type_num) should be provided!") + if subgraph_list is not None and (random_subgraph_num != -1 or subgraph_edge_type_num != -1): + raise ValueError( + "subgraph_list is provided, random_subgraph_num and subgraph_edge_type_num will be ignored!") + + if not isinstance(dataset, HeteroNodeDataset): + raise TypeError( + "Dataset must be an instance of HeteroNodeDataset!") + elif predict_class not in dataset.node_types: + raise ValueError("Please input valid node class for prediction!") + predict_idx = dataset.data.node_id_dict[predict_class] + + if subgraph_list is None: + subgraph_dict = dataset.nars_preprocess(dataset.edge_types, predict_class, + random_subgraph_num, + subgraph_edge_type_num) + subgraph_list = [(key, subgraph_dict[key]) + for key in subgraph_dict] + + self._propagated_feat_list_list = [[] + for _ in range(self._prop_steps + 1)] + + for key, value in subgraph_list: + edge_type_list = [] + for edge_type in key: + edge_type_list.append(edge_type.split("__")[0]) + edge_type_list.append(edge_type.split("__")[2]) + if predict_class in edge_type_list: + adj, feature, node_id = value + propagated_feature = self._pre_graph_op.propagate(adj, feature) + + start_pos = list(node_id).index(predict_idx[0]) + for i, feature in enumerate(propagated_feature): + self._propagated_feat_list_list[i].append( + feature[start_pos:start_pos + dataset.data.num_node[predict_class]]) + + # 2-d list to 4-d tensor (num_node, feat_dim, num_subgraphs, prop_steps) + self._propagated_feat_list_list = [torch.stack( + x, dim=2) for x in self._propagated_feat_list_list] + self._propagated_feat_list_list = torch.stack( + self._propagated_feat_list_list, dim=3) + + # 4-d tensor to 3-d tensor (num_node, feat_dim, num_subgraphs * prop_steps) + shape = self._propagated_feat_list_list.size() + self._propagated_feat_list_list = self._propagated_feat_list_list.view( + shape[0], shape[1], shape[2] * shape[3]) + + # a wrapper of the forward function + def model_forward(self, idx, device): + return self.forward(idx, device) + + def forward(self, idx, device): + feat_input = self._propagated_feat_list_list[idx].to(device) + + aggregated_feat_from_diff_hops = self._aggregator(feat_input) + output = self._base_model(aggregated_feat_from_diff_hops) + + return output diff --git a/sgl/models/base_model.py b/sgl/models/base_model.py index 6c93988..07ec5aa 100644 --- a/sgl/models/base_model.py +++ b/sgl/models/base_model.py @@ -1,7 +1,7 @@ import torch import torch.nn as nn import torch.nn.functional as F - +from sgl.data.base_data import Block from sgl.data.base_dataset import HeteroNodeDataset @@ -20,7 +20,10 @@ def __init__(self, prop_steps, feat_dim, output_dim): self._processed_feature = None self._pre_msg_learnable = False - def preprocess(self, adj, feature): + def reset_parameters(self): + pass # TODO + + def preprocess(self, adj, feature, *args): if self._pre_graph_op is not None: self._processed_feat_list = self._pre_graph_op.propagate( adj, feature) @@ -34,7 +37,73 @@ def preprocess(self, adj, feature): else: self._pre_msg_learnable = False self._processed_feature = feature - + + @staticmethod + def model_train(model, train_idx, labels, device, optimizer, loss_fn, accuracy): + model.train() + optimizer.zero_grad() + + train_output = model.model_forward(train_idx, device) + loss_train = loss_fn(train_output, labels[train_idx]) + acc_train = accuracy(train_output, labels[train_idx]) + loss_train.backward() + optimizer.step() + + return loss_train.item(), acc_train + + @staticmethod + @torch.no_grad() + def model_evaluate(model, val_idx, test_idx, labels, device, metric): + model.eval() + val_output = model.model_forward(val_idx, device) + test_output = model.model_forward(test_idx, device) + + acc_val = metric(val_output, labels[val_idx]) + acc_test = metric(test_output, labels[test_idx]) + + return acc_val, acc_test + + @staticmethod + def model_mini_batch_train(model, train_idx, train_loader, labels, device, optimizer, loss_fn): + model.train() + correct_num = 0 + loss_train_sum = 0. + for batch in train_loader: + train_output = model.model_forward(batch, device) + loss_train = loss_fn(train_output, labels[batch]) + + pred = train_output.max(1)[1].type_as(labels) + correct_num += pred.eq(labels[batch]).double().sum() + loss_train_sum += loss_train.item() + + optimizer.zero_grad() + loss_train.backward() + optimizer.step() + + loss_train = loss_train_sum / len(train_loader) + acc_train = correct_num / len(train_idx) + + return loss_train, acc_train.item() + + @staticmethod + @torch.no_grad() + def model_mini_batch_evaluate(model, val_idx, val_loader, test_idx, test_loader, labels, device): + model.eval() + correct_num_val, correct_num_test = 0, 0 + for batch in val_loader: + val_output = model.model_forward(batch, device) + pred = val_output.max(1)[1].type_as(labels) + correct_num_val += pred.eq(labels[batch]).double().sum() + acc_val = correct_num_val / len(val_idx) + + for batch in test_loader: + test_output = model.model_forward(batch, device) + pred = test_output.max(1)[1].type_as(labels) + correct_num_test += pred.eq(labels[batch]).double().sum() + acc_test = correct_num_test / len(test_idx) + + return acc_val.item(), acc_test.item() + def postprocess(self, adj, output): if self._post_graph_op is not None: if self._post_msg_op.aggr_type in [ @@ -62,10 +131,120 @@ def forward(self, idx, device): processed_feature = self._pre_msg_op.aggregate( transferred_feat_list) - output = self._base_model(processed_feature) + output = self._base_model(processed_feature) # model training return output +class BaseSAMPLEModel(nn.Module): + def __init__(self, evaluate_mode="full", sparse_type="pyg"): + super(BaseSAMPLEModel, self).__init__() + self._evaluate_mode = evaluate_mode + if sparse_type not in ["pyg", "torch", "2d-tensor"]: + raise ValueError(f"sparse type {sparse_type} is not supported, please use either pyg or torch.") + self._sparse_type = sparse_type + self._pre_graph_op, self._post_graph_op = None, None + self._training_sampling_op, self._eval_sampling_op = None, None + self._base_model = None + + @property + def evaluate_mode(self): + return self._evaluate_mode + + @property + def processed_block(self): + return self._processed_block + + @property + def processed_feature(self): + return self._processed_feature + + @property + def train_collate_fn(self): + return self._training_sampling_op.collate_fn + + @property + def eval_collate_fn(self): + return self._eval_sampling_op.collate_fn + + def reset_parameters(self): + self._base_model.reset_parameters() + + def mini_batch_prepare_forward(self, batch, device, inductive=False, transfer_y_to_device=True): + batch_in, batch_out, block = batch + + if inductive is False: + in_x = self._processed_feature[batch_in].to(device) + y_truth = self._vanilla_y[batch_out] + else: + in_x = self._processed_train_feature[batch_in].to(device) + y_truth = self._vanilla_train_y[batch_out] + + if transfer_y_to_device is True: + y_truth = y_truth.to(device) + + block.to_device(device) + + y_pred = self._base_model(in_x, block) + + return y_pred, y_truth + + def full_batch_prepare_forward(self, node_idx): + y_pred = self._base_model(self._processed_feature, self._processed_block)[node_idx] + y_truth = self._vanilla_y[node_idx] + return y_pred, y_truth + + @torch.no_grad() + def inference(self, dataloader, device): + preds = self._base_model.inference(self.processed_feature, dataloader, device) + return preds + + def preprocess(self, adj, x, y, device, **kwargs): + if self._pre_graph_op is not None: + norm_adj = self._pre_graph_op._construct_adj(adj) + else: + norm_adj = adj + + self._processed_block = Block(norm_adj, self._sparse_type) + + if hasattr(self, "_pre_feature_op"): + self._processed_feature = self._pre_feature_op._transform_x(x) + else: + self._processed_feature = x + + self._vanilla_y = y + mini_batch = kwargs.get("mini_batch", True) + if mini_batch is False: + self._processed_block.to_device(device) + self._processed_feature = self._processed_feature.to(device) + self._vanilla_y = self._vanilla_y.to(device) + + inductive = kwargs.get("inductive", False) + if inductive is True: + train_idx = kwargs.get("train_idx", None) + if train_idx is None: + raise ValueError(f"For inductive learning, " + "please pass train idx " + "as the parameters of preprocess function.") + if hasattr(self, "_pre_feature_op"): + self._processed_train_feature = self._pre_feature_op._transform_x(x[train_idx]) + else: + self._processed_train_feature = x[train_idx] + self._vanilla_train_y = y[train_idx] + + + def postprocess(self, adj, output): + if self._post_graph_op is not None: + raise NotImplementedError + return output + def model_forward(self, batch_in, block, device): + x = self._processed_feature[batch_in].to(device) + block.to_device(device) + return self.forward(x, block) + + def forward(self, x, block): + return self._base_model(x, block), self._vanilla_y + + class BaseHeteroSGAPModel(nn.Module): def __init__(self, prop_steps, feat_dim, output_dim): super(BaseHeteroSGAPModel, self).__init__() diff --git a/sgl/models/base_model_dist.py b/sgl/models/base_model_dist.py index 261f773..b6d3fa8 100644 --- a/sgl/models/base_model_dist.py +++ b/sgl/models/base_model_dist.py @@ -1,9 +1,6 @@ -import torch import torch.nn as nn import torch.nn.functional as F -from sgl.data.base_dataset import HeteroNodeDataset - class BaseSGAPModelDist(nn.Module): def __init__(self, prop_steps, feat_dim, output_dim): diff --git a/sgl/models/homo/__init__.py b/sgl/models/homo/__init__.py index cf4643a..06c65a4 100644 --- a/sgl/models/homo/__init__.py +++ b/sgl/models/homo/__init__.py @@ -6,6 +6,12 @@ from .ssgc import SSGC from .nafs import NAFS from .sgc_dist import SGCDist +from .fastgcn import FastGCN +from .clustergcn import ClusterGCN +from .graphsage import GraphSAGE +from .vanillagnn import VanillaGNN +from .lazygnn import LazyGNN +from .graphsaint import GraphSAINT __all__ = [ "SGC", @@ -15,5 +21,11 @@ "GAMLP", "GAMLPRecursive", "NAFS", - "SGCDist" + "SGCDist", + "FastGCN", + "ClusterGCN", + "GraphSAGE", + "VanillaGNN", + "LazyGNN", + "GraphSAINT" ] diff --git a/sgl/models/homo/clustergcn.py b/sgl/models/homo/clustergcn.py new file mode 100644 index 0000000..75f8038 --- /dev/null +++ b/sgl/models/homo/clustergcn.py @@ -0,0 +1,36 @@ +from sgl.models.pyg_simple_models import GCN +from sgl.models.base_model import BaseSAMPLEModel + +class ClusterGCN(BaseSAMPLEModel): + def __init__(self, training_sampler, eval_sampler, nfeat, hidden_dim, nclass, sparse_type="torch", dropout=0.5, num_layers=2, device="cpu"): + super(ClusterGCN, self).__init__(evaluate_mode="sampling", sparse_type=sparse_type) + self._training_sampling_op = training_sampler + self._eval_sampling_op = eval_sampler + self._base_model = GCN(n_feat=nfeat, n_hid=hidden_dim, n_class=nclass, n_layers=num_layers, dropout=dropout).to(device) + + def pre_sample(self, mode="train"): + if mode == "train": + self._training_sampling_op.multiple_graphs_sampling() + else: + self._eval_sampling_op.multiple_graphs_sampling() + + def mini_batch_prepare_forward(self, batch, device, inductive=False): + batch_in, batch_out, block = batch + local_inds, global_inds = batch_out + + if inductive is False: + in_x = self._processed_feature[batch_in].to(device) + y_truth = self._vanilla_y[global_inds].to(device) + else: + in_x = self._processed_train_feature[batch_in].to(device) + y_truth = self._vanilla_train_y[global_inds].to(device) + + block.to_device(device) + y_pred = self._base_model(in_x, block)[local_inds] + return y_pred, y_truth + + def collate_fn(self, batch_inds, mode): + if self.training: + return self._training_sampling_op.collate_fn(batch_inds, mode) + else: + return self._eval_sampling_op.collate_fn(batch_inds, mode) diff --git a/sgl/models/homo/fastgcn.py b/sgl/models/homo/fastgcn.py new file mode 100644 index 0000000..16fba2c --- /dev/null +++ b/sgl/models/homo/fastgcn.py @@ -0,0 +1,13 @@ +from sgl.models.simple_models import GCN +from sgl.models.base_model import BaseSAMPLEModel +from sgl.operators.graph_op import LaplacianGraphOp + +class FastGCN(BaseSAMPLEModel): + def __init__(self, dataset, training_sampler, eval_sampler, hidden_dim, sparse_type="torch", dropout=0.5, num_layers=2, device="cpu"): + super(FastGCN, self).__init__(sparse_type=sparse_type) + self._pre_graph_op = LaplacianGraphOp(r=0.5) + self._training_sampling_op = training_sampler + self._eval_sampling_op = eval_sampler + self._base_model = GCN( + n_feat=dataset.num_features, n_hid=hidden_dim, n_class=dataset.num_classes, n_layers=num_layers, dropout=dropout + ).to(device) diff --git a/sgl/models/homo/gamlp_dist.py b/sgl/models/homo/gamlp_dist.py index 1e7be08..877c2d1 100644 --- a/sgl/models/homo/gamlp_dist.py +++ b/sgl/models/homo/gamlp_dist.py @@ -1,4 +1,4 @@ -from sgl.models.base_model import BaseSGAPModelDist +from sgl.models.base_model_dist import BaseSGAPModelDist from sgl.models.simple_models import MultiLayerPerceptron from sgl.operators.graph_op import LaplacianGraphOp from sgl.operators.message_op import LearnableWeightedMessageOp diff --git a/sgl/models/homo/gda/FLAG.py b/sgl/models/homo/gda/FLAG.py new file mode 100644 index 0000000..0a6a487 --- /dev/null +++ b/sgl/models/homo/gda/FLAG.py @@ -0,0 +1,232 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from sgl.models.base_model import BaseSAMPLEModel +from sgl.utils import sparse_mx_to_pyg_sparse_tensor +from sgl.models.pyg_simple_models import GCN, SAGE, GAT + +GNN_BACKBONE = {"gcn": GCN, "sage": SAGE, "gat": GAT} + +class FLAG(nn.Module): + def __init__(self, in_dim, hidden_dim, n_classes, n_layers, dropout, gnn_type, step_size, augM, activation=F.relu, **kwargs): + super(FLAG, self).__init__() + self.__step_size = float(step_size) + self.__augM = augM + self.__amp = kwargs.pop("amp", 1) + if isinstance(activation, str): + activation = getattr(F, activation) + if gnn_type == 'gat': + if kwargs.get("n_heads"): + n_heads = list(map(lambda x: int(x), kwargs["n_heads"].split(","))) + else: + n_heads = [8] * (n_layers - 1) + [1] + kwargs.update({"n_heads": n_heads}) + self._base_model = GNN_BACKBONE.get(gnn_type)(in_dim, hidden_dim, n_classes, n_layers=n_layers, dropout=dropout, activation=activation, **kwargs) + + @property + def processed_feature(self): + return self.__features + + @property + def processed_adj(self): + return self.__processed_adj + + def preprocess(self, adj, features, device): + self.__features = features.to(device) + self.__processed_adj = sparse_mx_to_pyg_sparse_tensor(adj).to(device) + + def flag(self, ground_truth_y, optimizer, device, train_idx, loss_fn): + x = self.__features + adj = self.__processed_adj + + self._base_model.train() + optimizer.zero_grad() + + perturb = torch.FloatTensor(x.shape).uniform_(-self.__step_size, self.__step_size).to(device) + unlabel_idx = list(set(range(perturb.shape[0])) - set(train_idx)) + perturb.data[unlabel_idx] *= self.__amp + + perturb.requires_grad_() + pred_y = self._base_model(x+perturb, adj)[train_idx] + loss = loss_fn(pred_y, ground_truth_y) + loss /= self.__augM + + for _ in range(self.__augM-1): + loss.backward() + perturb_data = perturb[train_idx].detach() + self.__step_size * torch.sign(perturb.grad[train_idx].detach()) + perturb.data[train_idx] = perturb_data.data + perturb_data = perturb[unlabel_idx].detach() + self.__amp * self.__step_size * torch.sign(perturb.grad[unlabel_idx].detach()) + perturb.data[unlabel_idx] = perturb_data.data + perturb.grad[:] = 0 + + pred_y = self._base_model(x+perturb, adj)[train_idx] + loss = loss_fn(pred_y, ground_truth_y) + loss /= self.__augM + + loss.backward() + optimizer.step() + + return loss.item() + + @staticmethod + def model_train(model, train_idx, labels, device, optimizer, loss_fn, metric): + loss_train = model.flag(labels[train_idx], optimizer, device, train_idx, loss_fn) + + model.eval() + pred_y = model(model.processed_feature, model.processed_adj) + acc_train = metric(pred_y[train_idx], labels[train_idx]) + + return loss_train, acc_train + + @staticmethod + @torch.no_grad() + def model_evaluate(model, val_idx, test_idx, labels, device, metric): + model.eval() + pred_y = model(model.processed_feature, model.processed_adj) + + acc_val = metric(pred_y[val_idx], labels[val_idx]) + acc_test = metric(pred_y[test_idx], labels[test_idx]) + return acc_val, acc_test + + def model_forward(self, idx, device): + pred_y = self.forward(self.__features, self.__processed_adj) + return pred_y[idx] + + def forward(self, x, adj): + return self._base_model(x, adj) + + def postprocess(self, adj, outputs): + return outputs + + +class SampleFLAG(BaseSAMPLEModel): + def __init__(self, training_sampler, eval_sampler, in_dim, hidden_dim, n_classes, n_layers, dropout, gnn_type, step_size, augM, activation=F.relu, **kwargs): + super(SampleFLAG, self).__init__() + self.__step_size = float(step_size) + self.__augM = augM + self.__amp = kwargs.pop("amp", 1) + self._training_sampling_op = training_sampler + self._eval_sampling_op = eval_sampler + if isinstance(activation, str): + activation = getattr(F, activation) + if gnn_type == 'gat': + if kwargs.get("n_heads"): + n_heads = list(map(lambda x: int(x), kwargs["n_heads"].split(","))) + else: + n_heads = [8] * (n_layers - 1) + [1] + kwargs.update({"n_heads": n_heads}) + self._base_model = GNN_BACKBONE.get(gnn_type)(in_dim, hidden_dim, n_classes, n_layers=n_layers, dropout=dropout, activation=activation, **kwargs) + + def flag(self, clean, ground_truth_y, adjs, batch_out, optimizer, device, loss_fn): + self._base_model.train() + optimizer.zero_grad() + batch_size = len(batch_out) + perturb_t = torch.FloatTensor(clean[:batch_size].shape).uniform_(-self.__step_size, self.__step_size).to(device) + perturb_un = torch.FloatTensor(clean[batch_size:].shape).uniform_(-self.__amp * self.__step_size, self.__amp * self.__step_size).to(device) + perturb_t.requires_grad_() + perturb_un.requires_grad_() + + perturb = torch.cat([perturb_t, perturb_un], dim=0) + pred_y = self._base_model(clean+perturb, adjs) + loss = loss_fn(pred_y, ground_truth_y) + loss /= self.__augM + + for _ in range(self.__augM-1): + loss.backward() + + perturb_data_t = perturb_t.detach() + self.__step_size * torch.sign(perturb_t.grad.detach()) + perturb_t.data = perturb_data_t.data + perturb_t.grad[:] = 0 + + perturb_data_un = perturb_un.detach() + self.__amp * self.__step_size * torch.sign(perturb_un.grad.detach()) + perturb_un.data = perturb_data_un.data + perturb_un.grad[:] = 0 + + perturb = torch.cat((perturb_t, perturb_un), dim=0) + + pred_y = self._base_model(clean+perturb, adjs) + loss = loss_fn(pred_y, ground_truth_y) + loss /= self.__augM + + loss.backward() + optimizer.step() + + return loss.item(), pred_y + + def mini_batch_prepare_forward(self, batch, device, loss_fn, optimizer, inductive=False, transfer_y_to_device=True): + batch_in, batch_out, block = batch + + if inductive is False: + in_x = self._processed_feature[batch_in].to(device) + y_truth = self._vanilla_y[batch_out] + else: + in_x = self._processed_train_feature[batch_in].to(device) + y_truth = self._vanilla_train_y[batch_out] + + if transfer_y_to_device is True: + y_truth = y_truth.to(device) + + block.to_device(device) + loss, pred_y = self.flag(in_x, y_truth, block, batch_out, optimizer, device, loss_fn) + + return loss, pred_y, y_truth + + @staticmethod + def model_train(model, train_loader, inductive, device, optimizer, loss_fn): + correct_num = 0 + loss_train_sum = 0. + train_num = 0 + + for batch in train_loader: + loss_train, y_out, y_truth = model.mini_batch_prepare_forward(batch, device, loss_fn, optimizer, inductive=inductive) + pred = y_out.max(1)[1].type_as(y_truth) + correct_num += pred.eq(y_truth).double().sum() + loss_train_sum += loss_train + train_num += len(y_truth) + + loss_train = loss_train_sum / len(train_loader) + acc_train = correct_num / train_num + + return loss_train, acc_train.item() + + @staticmethod + @torch.no_grad() + def model_evaluate(model, val_loader, test_loader, device): + model.eval() + + correct_num_val, correct_num_test = 0, 0 + val_num = 0 + for batch in val_loader: + val_output, out_y = model.model_forward(batch, device) + pred = val_output.max(1)[1].type_as(out_y) + correct_num_val += pred.eq(out_y).double().sum() + val_num += len(out_y) + + acc_val = correct_num_val / val_num + + test_num = 0 + for batch in test_loader: + test_output, out_y = model.model_forward(batch, device) + pred = test_output.max(1)[1].type_as(out_y) + correct_num_test += pred.eq(out_y).double().sum() + test_num += len(out_y) + + acc_test = correct_num_test / test_num + + return acc_val.item(), acc_test.item() + + def model_forward(self, batch, device): + batch_in, batch_out, block = batch + in_x = self._processed_feature[batch_in].to(device) + y_truth = self._vanilla_y[batch_out].to(device) + block.to_device(device) + + y_pred = self.forward(in_x, block) + return y_pred, y_truth + + def forward(self, x, adj): + return self._base_model(x, adj) + + def postprocess(self, adj, outputs): + return outputs \ No newline at end of file diff --git a/sgl/models/homo/gda/GAug.py b/sgl/models/homo/gda/GAug.py new file mode 100644 index 0000000..41ba487 --- /dev/null +++ b/sgl/models/homo/gda/GAug.py @@ -0,0 +1,294 @@ +import os +import pyro +import copy +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +import pickle as pkl +import scipy.sparse as sp + +from sgl.utils import sparse_mx_to_pyg_sparse_tensor +from sgl.operators.graph_op import LaplacianGraphOp +from sgl.models.pyg_simple_models import GCNConv, GCN, SAGE, GAT +from sgl.models.homo.gda.utils import RoundNoGradient, CeilNoGradient + + +class GAugO(nn.Module): + def __init__(self, in_dim, hidden_dim, emb_size, n_classes, n_layers, dropout, gnn_type, + activation=F.relu, temperature=1, gae=False, alpha=1, feat_norm="row", sample_type="add_sample", **kwargs): + super(GAugO, self).__init__() + self.__temperature = temperature + self.__alpha = alpha + self.__sample_type = sample_type + self.__minibatch = kwargs.pop("minibatch", False) + # edge prediction network + self.__gae = gae + self.__feat_norm = feat_norm + self.ep_net = VGAE(in_dim, hidden_dim, emb_size, F.relu, gae=gae) + # node classification network + gnn_backbone = {"gcn": GCN, "gsage": SAGE, "gat": GAT} + if isinstance(activation, str): + activation = getattr(F, activation) + if gnn_type == "gat": + if kwargs.get("n_heads"): + n_heads = list(map(lambda x: int(x), kwargs["n_heads"].split(","))) + else: + n_heads = [8] * (n_layers - 1) + [1] + kwargs.update({"n_heads": n_heads}) + + self.nc_net = gnn_backbone.get(gnn_type)(in_dim, hidden_dim, n_classes, n_layers=n_layers, dropout=dropout, activation=activation, **kwargs) + + @property + def gae(self): + return self.__gae + + @staticmethod + def col_normalization(features): + """ column normalization for feature matrix """ + features = features.numpy() + m = features.mean(axis=0) + s = features.std(axis=0, ddof=0, keepdims=True) + 1e-12 + features -= m + features /= s + return torch.FloatTensor(features) + + def reset_parameters(self): + self.ep_net.reset_parameters() + self.nc_net.reset_parameters() + + def preprocess(self, features, adj_matrix, device): + if self.__feat_norm == "row": + features = F.normalize(features, p=1, dim=1) + elif self.__feat_norm == "col": + features = self.col_normalization(features) + features = features.to(device) + + assert sp.issparse(adj_matrix) + if not isinstance(adj_matrix, sp.coo_matrix): + adj_matrix = sp.coo_matrix(adj_matrix) + adj_matrix.setdiag(0) # remove incompelte self-loops before adding self-loops + adj_matrix_sl = adj_matrix + sp.eye(*adj_matrix.shape) + adj_orig = sparse_mx_to_pyg_sparse_tensor(adj_matrix_sl).to_dense() + adj_norm_matrix = LaplacianGraphOp()._construct_adj(adj_matrix) + adj_norm = sparse_mx_to_pyg_sparse_tensor(adj_norm_matrix).to(device) + adj = sparse_mx_to_pyg_sparse_tensor(adj_matrix).to(device) + + if self.__minibatch is False: + adj_orig = adj_orig.to(device) + + return features, adj_orig, adj, adj_norm + + @staticmethod + def sample_adj(adj_logits, temp): + """ sample an adj from the predicted edge probabilities of ep_net """ + edge_probs = adj_logits / torch.max(adj_logits) + # sampling + adj_sampled = pyro.distributions.RelaxedBernoulliStraightThrough(temperature=temp, probs=edge_probs).rsample() + # making adj_sampled symmetric + adj_sampled = adj_sampled.triu(1) + adj_sampled = adj_sampled + adj_sampled.T + return adj_sampled + + @staticmethod + def sample_adj_add_bernoulli(adj_logits, adj_orig, alpha, temp): + edge_probs = adj_logits / (torch.max(adj_logits) + 1e-5) + edge_probs = alpha * edge_probs + (1-alpha) * adj_orig + adj_sampled = pyro.distributions.RelaxedBernoulliStraightThrough(temperature=temp, probs=edge_probs).rsample() + # making adj_sampled symmetric + adj_sampled = adj_sampled.triu(1) + adj_sampled = adj_sampled + adj_sampled.T + return adj_sampled + + @staticmethod + def sample_adj_add_round(adj_logits, adj_orig, alpha): + edge_probs = adj_logits / torch.max(adj_logits) + edge_probs = alpha * edge_probs + (1-alpha) * adj_orig + # sampling + adj_sampled = RoundNoGradient.apply(edge_probs) + # making adj_sampled symmetric + adj_sampled = adj_sampled.triu(1) + adj_sampled = adj_sampled + adj_sampled.T + return adj_sampled + + @staticmethod + def sample_adj_random(adj_logits): + adj_rand = torch.rand(adj_logits.size()) + adj_rand = adj_rand.triu(1) + adj_rand = torch.round(adj_rand) + adj_rand = adj_rand + adj_rand.T + return adj_rand + + @staticmethod + def sample_adj_edge(adj_logits, adj_orig, change_frac): + adj = adj_orig.to_dense() if adj_orig.is_sparse else adj_orig + n_edges = adj.nonzero().size(0) + n_change = int(n_edges * change_frac / 2) + # take only the upper triangle + edge_probs = adj_logits.triu(1) + edge_probs = edge_probs - torch.min(edge_probs) + edge_probs = edge_probs / torch.max(edge_probs) + adj_inverse = 1 - adj + # get edges to be removed + mask_rm = edge_probs * adj + nz_mask_rm = mask_rm[mask_rm>0] + if len(nz_mask_rm) > 0: + n_rm = len(nz_mask_rm) if len(nz_mask_rm) < n_change else n_change + thresh_rm = torch.topk(mask_rm[mask_rm>0], n_rm, largest=False)[0][-1] + mask_rm[mask_rm > thresh_rm] = 0 + mask_rm = CeilNoGradient.apply(mask_rm) + mask_rm = mask_rm + mask_rm.T + # remove edges + adj_new = adj - mask_rm + # get edges to be added + mask_add = edge_probs * adj_inverse + nz_mask_add = mask_add[mask_add>0] + if len(nz_mask_add) > 0: + n_add = len(nz_mask_add) if len(nz_mask_add) < n_change else n_change + thresh_add = torch.topk(mask_add[mask_add>0], n_add, largest=True)[0][-1] + mask_add[mask_add < thresh_add] = 0 + mask_add = CeilNoGradient.apply(mask_add) + mask_add = mask_add + mask_add.T + # add edges + adj_new = adj_new + mask_add + return adj_new + + def forward(self, adj_norm, adj_orig, features, nodes_batch=None): + adj_logits = self.ep_net(adj_norm, features, nodes_batch) + if self.__sample_type == "edge": + adj_new = self.sample_adj_edge(adj_logits, adj_orig, self.__alpha) + elif self.__sample_type == "add_round": + adj_new = self.sample_adj_add_round(adj_logits, adj_orig, self.__alpha) + elif self.__sample_type == "rand": + adj_new = self.sample_adj_random(adj_logits) + elif self.__sample_type == "add_sample": + if self.__alpha == 1: + adj_new = self.sample_adj(adj_logits, self.__temperature) + else: + adj_new = self.sample_adj_add_bernoulli(adj_logits, adj_orig, self.__alpha, self.__temperature) + + row, col = adj_new.nonzero(as_tuple=True) + edge_index = torch.vstack([row, col]) + if nodes_batch is not None: + nc_logits = self.nc_net(features[nodes_batch], edge_index) + else: + nc_logits = self.nc_net(features, edge_index) + + return nc_logits, adj_logits + +class VGAE(nn.Module): + """ GAE/VGAE as edge prediction model """ + def __init__(self, in_dim, hidden_dim, emb_size, activation, gae=False): + super(VGAE, self).__init__() + self.gae = gae + self.activation = activation + self.gcn_base = GCNConv(in_dim, hidden_dim, add_self_loops=False, normalize=False, bias=False) + self.gcn_mean = GCNConv(hidden_dim, emb_size, add_self_loops=False, normalize=False, bias=False) + self.gcn_logstd = GCNConv(hidden_dim, emb_size, add_self_loops=False, normalize=False, bias=False) + + def reset_parameters(self): + self.gcn_base.reset_parameters() + self.gcn_mean.reset_parameters() + self.gcn_logstd.reset_parameters() + + def forward(self, adj, features, nodes_batch=None): + # GCN encoder + hidden = self.gcn_base(features, adj) + self.mean = self.activation(self.gcn_mean(hidden, adj)) + if self.gae: + # GAE (no sampling at bottleneck) + Z = self.mean + else: + # VGAE + self.logstd = self.activation(self.gcn_logstd(hidden, adj)) + gaussian_noise = torch.randn_like(self.mean) + sampled_Z = gaussian_noise * torch.exp(self.logstd) + self.mean + Z = sampled_Z + if nodes_batch is not None: + Z = Z[nodes_batch] + # inner product decoder + adj_logits = Z @ Z.T + return adj_logits + + +class GAugM(nn.Module): + def __init__(self, in_dim, hidden_dim, n_classes, n_layers, gnn_type, rm_pct, add_pct, choose_idx, gae=False, dropout=0.5, activation=F.relu, feat_norm='none', **kwargs): + super(GAugM, self).__init__() + + self.__feat_norm = feat_norm + self.__rm_pct = rm_pct + self.__add_pct = add_pct + self.__choose_idx = choose_idx + self.__gae = gae + if isinstance(activation, str): + activation = getattr(F, activation) + gnn_backbone = {"gcn": GCN, "gsage": SAGE, "gat": GAT} + if gnn_type == "gat": + if kwargs.get("n_heads"): + n_heads = list(map(lambda x: int(x), kwargs["n_heads"].split(","))) + else: + n_heads = [8] * (n_layers - 1) + [1] + kwargs.update({"n_heads": n_heads}) + + self.nc_net = gnn_backbone.get(gnn_type)(in_dim, hidden_dim, n_classes, n_layers=n_layers, dropout=dropout, activation=activation, **kwargs) + + def reset_parameters(self): + self.nc_net.reset_parameters() + + @staticmethod + def sample_graph_det(adj_orig, adj_pred, remove_pct, add_pct): + if remove_pct == 0 and add_pct == 0: + return copy.deepcopy(adj_orig) + + orig_upper = sp.triu(adj_orig, 1) + n_edges = orig_upper.nnz + edges = np.asarray(orig_upper.nonzero()).T + + if remove_pct: + n_remove = int(n_edges * remove_pct / 100) + pos_probs = adj_pred[edges.T[0], edges.T[1]] + e_index_2b_remove = np.argpartition(pos_probs, n_remove)[:n_remove] + mask = np.ones(len(edges), dtype=bool) + mask[e_index_2b_remove] = False + edges_pred = edges[mask] + else: + edges_pred = edges + + if add_pct: + n_add = int(n_edges * add_pct / 100) + # deep copy to avoid modifying adj_pred + adj_probs = np.array(adj_pred) + # make the probabilities of the lower half to be zero (including diagonal) + adj_probs[np.tril_indices(adj_probs.shape[0])] = 0 + # make the probabilities of existing edges to be zero + adj_probs[edges.T[0], edges.T[1]] = 0 + all_probs = adj_probs.reshape(-1) + e_index_2b_add = np.argpartition(all_probs, -n_add)[-n_add:] + new_edges = [] + for index in e_index_2b_add: + i = int(index / adj_probs.shape[0]) + j = index % adj_probs.shape[0] + new_edges.append([i, j]) + edges_pred = np.concatenate((edges_pred, new_edges), axis=0) + adj_pred = sp.csr_matrix((np.ones(len(edges_pred)), edges_pred.T), shape=adj_orig.shape) + adj_pred = adj_pred + adj_pred.T + + return adj_pred + + def preprocess(self, adj_orig, features, adj_pred_dir, device): + if self.__feat_norm == "row": + features = F.normalize(features, p=1, dim=1) + features = features.to(device) + + if self.__gae is True: + adj_pred = pkl.load(open(os.path.join(adj_pred_dir, f"{self.__choose_idx}_logits_gae.pkl"), "rb")) + else: + adj_pred = pkl.load(open(os.path.join(adj_pred_dir, f"{self.__choose_idx}_logits.pkl"), "rb")) + adj_pred = self.sample_graph_det(adj_orig, adj_pred, self.__rm_pct, self.__add_pct) + adj_processed = sparse_mx_to_pyg_sparse_tensor(adj_pred).to(device) + + return adj_processed, features + + def forward(self, adj, features): + return self.nc_net(features, adj) + diff --git a/sgl/models/homo/gda/Mixup.py b/sgl/models/homo/gda/Mixup.py new file mode 100644 index 0000000..a88c6de --- /dev/null +++ b/sgl/models/homo/gda/Mixup.py @@ -0,0 +1,411 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +import scipy.sparse as sp +from torch_sparse import SparseTensor +from torch_geometric.nn import SAGEConv + +from sgl.data.base_data import Block +from sgl.models.base_model import BaseSAMPLEModel + +class Mixup(nn.Module): + def __init__(self, in_dim, hidden_dim, n_classes, n_layers, dropout, alpha, beta, gnn_type="sage", feat_norm="row", activation=F.relu, **kwargs): + super(Mixup, self).__init__() + self.alpha = alpha + self.beta = beta + self.__feat_norm = feat_norm + self.nc_net = TwoBranchGNN(in_dim, hidden_dim, n_classes, n_layers, dropout, gnn_type, activation, **kwargs) + + def preprocess(self, adj, features, device): + if self.__feat_norm == "row": + features = F.normalize(features, p=1, dim=1) + self.__num_nodes = features.size(0) + self.__features = features.to(device) + if isinstance(adj, sp.coo_matrix) is False: + adj = sp.coo_matrix(adj) + adj.setdiag(0) + self.__row = torch.from_numpy(adj.row).to(torch.long) + self.__col = torch.from_numpy(adj.col).to(torch.long) + self.__adj = torch.vstack([self.__row, self.__col]).to(device) + + @property + def processed_feature(self): + return self.__features + + @property + def processed_block(self): + return self.__adj + + @staticmethod + def loss_fn(mix_ratio, output, y_raw, y_b, train_idx): + loss = F.nll_loss(output[train_idx], y_raw[train_idx]) * mix_ratio + \ + F.nll_loss(output[train_idx], y_b[train_idx]) * (1 - mix_ratio) + return loss + + def reset_parameters(self): + self.nc_net.reset_parameters() + + @staticmethod + def model_train(model, train_idx, y_raw, device, optimizer, loss_fn, metric): + model.nc_net.train() + mix_ratio = np.random.beta(model.alpha, model.beta) + id_old_value_new, adj_b, y_b = model.mixup(train_idx, y_raw, device) + output = model.nc_net(model.processed_feature, model.processed_block, adj_b, mix_ratio, id_old_value_new) + + loss = loss_fn(mix_ratio, output, y_raw, y_b, train_idx) + optimizer.zero_grad() + loss.backward() + optimizer.step() + + model.nc_net.eval() + output = model(model.processed_feature, model.processed_block) + acc = metric(output[train_idx], y_raw[train_idx]) + + return loss.item(), acc + + @staticmethod + @torch.no_grad() + def model_evaluate(model, val_idx, test_idx, labels, device, metric): + model.nc_net.eval() + + pred_y = model(model.processed_feature, model.processed_block) + + acc_val = metric(pred_y[val_idx], labels[val_idx]) + acc_test = metric(pred_y[test_idx], labels[test_idx]) + + return acc_val, acc_test + + def mixup(self, train_idx, y_raw, device): + id_old_value_new = torch.arange(self.__num_nodes, dtype=torch.long) + train_idx_shuffle = np.asarray(train_idx) + np.random.shuffle(train_idx_shuffle) + # map raw node id to its pair node id + id_old_value_new[train_idx] = torch.from_numpy(train_idx_shuffle).to(torch.long) + id_new_value_old = torch.zeros_like(id_old_value_new) + # map the pair node id to the raw node id + id_new_value_old[id_old_value_new] = torch.arange(self.__num_nodes, dtype=torch.long) + row_b = id_old_value_new[self.__row] + col_b = id_old_value_new[self.__col] + adj_b = torch.vstack([row_b, col_b]).to(device) + y_b = y_raw[id_old_value_new] + + return id_old_value_new, adj_b, y_b + + def model_forward(self, idx, device): + output = self.forward(self.__features, self.__adj) + + return output[idx] + + def forward(self, x, adj): + output = self.nc_net(x, adj, adj, 1, np.arange(self.__num_nodes)) + + return output + + def postprocess(self, adj, output): + return output + + +class SampleMixup(BaseSAMPLEModel): + def __init__(self, training_sampler, eval_sampler, in_dim, hidden_dim, n_classes, n_layers, dropout, alpha, beta, gnn_type="sage", feat_norm="row", activation=F.relu, **kwargs): + super(SampleMixup, self).__init__(sparse_type="pyg") + self.alpha = alpha + self.beta = beta + self.__feat_norm = feat_norm + self._training_sampling_op = training_sampler + self._eval_sampling_op = eval_sampler + self._base_model = MinibatchTwoBranchGNN(in_dim, hidden_dim, n_classes, n_layers, dropout, gnn_type, activation, **kwargs) + + def preprocess(self, adj, x, y, device, **kwargs): + if self.__feat_norm == "row": + x = F.normalize(x, p=1, dim=1) + self.__num_nodes = x.size(0) + self.__features = x.to(device) + if isinstance(adj, sp.coo_matrix) is False: + adj = sp.coo_matrix(adj) + adj.setdiag(0) + self.__adj = Block(adj, sparse_type="pyg") + + self.__vanilla_y = y + + inductive = kwargs.get("inductive", False) + if inductive is True: + train_idx = kwargs.get("train_idx", None) + if train_idx is None: + raise ValueError(f"For inductive learning, " + "please pass train idx " + "as the parameters of preprocess function.") + self.__train_features = x[train_idx] + self.__vanilla_train_y = y[train_idx] + + @property + def processed_feature(self): + return self.__features + + @property + def processed_block(self): + return self.__adj + + @staticmethod + def loss_fn(mix_ratio, output, y_raw, y_b): + loss = F.nll_loss(output, y_raw) * mix_ratio + \ + F.nll_loss(output, y_b) * (1 - mix_ratio) + return loss + + def mini_batch_prepare_forward(self, batch, device, loss_fn, optimizer, inductive=False, transfer_y_to_device=True, mix_ratio=1): + batch_in, batch_out, block = batch + + if inductive is False: + in_x = self.__features[batch_in].to(device) + y_raw = self.__vanilla_y[batch_out] + else: + in_x = self.__train_features[batch_in].to(device) + y_raw = self.__vanilla_train_y[batch_out] + + if transfer_y_to_device is True: + y_raw = y_raw.to(device) + + id_old_value_new, block_b, y_b = self._mixup(batch_out.shape[0], batch_in.shape[0], block, y_raw) + block.to_device(device) + block_b.to_device(device) + output = self._base_model(in_x, block, block_b, mix_ratio, id_old_value_new) + + loss = loss_fn(mix_ratio, output, y_raw, y_b) + optimizer.zero_grad() + loss.backward() + optimizer.step() + + return loss.item(), output, y_raw + + @staticmethod + def train_func(model, train_loader, inductive, device, optimizer, loss_fn): + correct_num = 0 + loss_train_sum = 0. + train_num = 0 + + model.train() + mix_ratio = np.random.beta(model.alpha, model.beta) + + for batch in train_loader: + loss_train, y_out, y_truth = model.mini_batch_prepare_forward(batch, device, loss_fn, optimizer, inductive=inductive, mix_ratio=mix_ratio) + pred = y_out.max(1)[1].type_as(y_truth) + correct_num += pred.eq(y_truth).double().sum() + loss_train_sum += loss_train + train_num += len(y_truth) + + loss_train = loss_train_sum / len(train_loader) + acc_train = correct_num / train_num + + return loss_train, acc_train.item() + + @staticmethod + @torch.no_grad() + def model_evaluate(model, val_loader, test_loader, device): + model.eval() + + correct_num_val, correct_num_test = 0, 0 + val_num = 0 + for batch in val_loader: + val_output, out_y = model.model_forward(batch, device) + pred = val_output.max(1)[1].type_as(out_y) + correct_num_val += pred.eq(out_y).double().sum() + val_num += len(out_y) + + acc_val = correct_num_val / val_num + + test_num = 0 + for batch in test_loader: + test_output, out_y = model.model_forward(batch, device) + pred = test_output.max(1)[1].type_as(out_y) + correct_num_test += pred.eq(out_y).double().sum() + test_num += len(out_y) + + acc_test = correct_num_test / test_num + + return acc_val.item(), acc_test.item() + + def _mixup(self, num_train_nodes, batch_size, block, y_raw): + id_old_value_new = torch.arange(batch_size, dtype=torch.long) + train_idx_shuffle = np.arange(num_train_nodes) + np.random.shuffle(train_idx_shuffle) + # map raw node id to its pair node id + id_old_value_new[:num_train_nodes] = torch.from_numpy(train_idx_shuffle).to(torch.long) + id_new_value_old = torch.zeros_like(id_old_value_new) + # map the pair node id to the raw node id + id_new_value_old[id_old_value_new] = torch.arange(batch_size, dtype=torch.long) + adjs_b = [] + for i in range(len(block)): + adj = block[i] + if isinstance(adj, sp.coo_matrix) is False: + adj = sp.coo_matrix(adj) + row, col = adj.row, adj.col + row_b = id_old_value_new[row] + col_b = id_old_value_new[col] + adj_b = SparseTensor(row=row_b, col=col_b, value=torch.ones_like(row_b)) + adjs_b.append(adj_b) + + block_b = Block(adjs_b, sparse_type="pyg") + + y_b = y_raw[train_idx_shuffle] + + return id_old_value_new, block_b, y_b + + def postprocess(self, adj, output): + return output + + def model_forward(self, batch_in, block, device): + x = self.__features[batch_in].to(device) + block.to_device(device) + output = self.forward(x, block) + + return output + + def forward(self, x, block): + output = self._base_model(x, block, block, 1, np.arange(self.__num_nodes)) + + return output + +class TwoBranchGNN(nn.Module): + def __init__(self, in_dim, hidden_dim, n_classes, n_layers, dropout, gnn_type, activation=F.relu, **kwargs): + super(TwoBranchGNN, self).__init__() + self.gcs = nn.ModuleList() + if gnn_type != "sage": + raise NotImplementedError + self.gcs.append(SAGEConv(in_dim, hidden_dim)) + self.batch_norm = kwargs.get("batch_norm", False) + if self.batch_norm: + self.bns = nn.ModuleList() + self.bns.append(nn.BatchNorm1d(hidden_dim)) + for _ in range(n_layers-1): + self.gcs.append(SAGEConv(hidden_dim, hidden_dim)) + if self.batch_norm: + self.bns.append(nn.BatchNorm1d(hidden_dim)) + self.lin = nn.Linear(hidden_dim, n_classes) + self.n_layers = n_layers + self.dropout = dropout + self.activation = activation + + def reset_parameters(self): + for gc in self.gcs: + gc.reset_parameters() + self.lin.reset_parameters() + + def forward(self, x0, adj, adj_b, mix_ratio, id_old_value_new): + aggr_xs = [x0] + for i in range(self.n_layers-1): + x = self.gcs[i](aggr_xs[-1], adj) + if self.batch_norm: + x = self.bns[i](x) + x = self.activation(x) + x = F.dropout(x, p=self.dropout, training=self.training) + aggr_xs.append(x) + + aggr_xs_b = [] + for x in aggr_xs: + aggr_xs_b.append(x[id_old_value_new]) + + x_mix = aggr_xs[0] * mix_ratio + aggr_xs_b[0] * (1 - mix_ratio) + for i in range(self.n_layers): + x_new = self.gcs[i]((aggr_xs[i], x_mix), adj) + if self.batch_norm: + x_new = self.bns[i](x_new) + x_new = self.activation(x_new) + + x_new_b = self.gcs[i]((aggr_xs_b[i], x_mix), adj_b) + if self.batch_norm: + x_new_b = self.bns[i](x_new_b) + x_new_b = self.activation(x_new_b) + + x_mix = x_new * mix_ratio + x_new_b * (1 - mix_ratio) + x_mix = F.dropout(x_mix, self.dropout, training=self.training) + + x = self.lin(x_mix) + return F.log_softmax(x, dim=-1) + +class MinibatchTwoBranchGNN(nn.Module): + def __init__(self, in_dim, hidden_dim, n_classes, n_layers, dropout, gnn_type, activation=F.relu, **kwargs): + super(MinibatchTwoBranchGNN, self).__init__() + self.gcs = nn.ModuleList() + if gnn_type != "sage": + raise NotImplementedError + self.gcs.append(SAGEConv(in_dim, hidden_dim)) + self.batch_norm = kwargs.get("batch_norm", False) + if self.batch_norm: + self.bns = nn.ModuleList() + self.bns.append(nn.BatchNorm1d(hidden_dim)) + for _ in range(n_layers-1): + self.gcs.append(SAGEConv(hidden_dim, hidden_dim)) + if self.batch_norm: + self.bns.append(nn.BatchNorm1d(hidden_dim)) + self.lin = nn.Linear(hidden_dim, n_classes) + self.n_layers = n_layers + self.dropout = dropout + self.activation = activation + + def reset_parameters(self): + for gc in self.gcs: + gc.reset_parameters() + self.lin.reset_parameters() + + def forward(self, x0, block, block_b, mix_ratio, id_old_value_new): + aggr_xs = [x0] + for i in range(self.n_layers): + root_size = block.root_size(i) + root_x = aggr_xs[-1][:root_size] + x = self.gcs[i]((aggr_xs[-1], root_x), block[i]) + if self.batch_norm: + x = self.bns[i](x) + x = self.activation(x) + x = F.dropout(x, p=self.dropout, training=self.training) + aggr_xs.append(x) + + aggr_xs_b = [] + for x in aggr_xs: + num_nodes = x.size(0) + aggr_xs_b.append(x[id_old_value_new[:num_nodes]]) + + x_mix = aggr_xs[0] * mix_ratio + aggr_xs_b[0] * (1 - mix_ratio) + for i in range(self.n_layers): + root_size = block.root_size(i) + root_x = x_mix[:root_size] + x_new = self.gcs[i]((aggr_xs[i], root_x), block[i]) + if self.batch_norm: + x_new = self.bns[i](x_new) + x_new = self.activation(x_new) + + root_size = block_b.root_size(i) + root_x_b = x_mix[:root_size] + x_new_b = self.gcs[i]((aggr_xs_b[i], root_x_b), block_b[i]) + if self.batch_norm: + x_new_b = self.bns[i](x_new_b) + x_new_b = self.activation(x_new_b) + x_mix = x_new * mix_ratio + x_new_b * (1 - mix_ratio) + x_mix = F.dropout(x_mix, self.dropout, training=self.training) + + x = self.lin(x_mix) + return F.log_softmax(x, dim=-1) + + @torch.no_grad() + def inference(self, x_all, subgraph_loader, device): + + for i in range(self.n_layers): + xs = [] + for batch in subgraph_loader: + batch_in, batch_out, block = batch + block.to_device(device) + x = x_all[batch_in].to(device) + root_size = len(batch_out) + root_x = x[:root_size] + x = self.gcs[i]((x, root_x), block[0]) # one-layer sampling + if self.batch_norm: + x = self.bns[i](x) + x = self.activation(x) + if i == self.n_layers-1: + x = self.lin(x) + xs.append(x.cpu()) + + x_all = torch.cat(xs, dim=0) + + return x_all + + \ No newline at end of file diff --git a/sgl/models/homo/gda/__init__.py b/sgl/models/homo/gda/__init__.py new file mode 100644 index 0000000..b0537a7 --- /dev/null +++ b/sgl/models/homo/gda/__init__.py @@ -0,0 +1,15 @@ +from .GAug import GAugO, GAugM +from .FLAG import FLAG, SampleFLAG +from .Mixup import Mixup, SampleMixup +from .gen_graphs import graph_generate, VGAE + +__all__ = [ + "GAugO", + "GAugM", + "FLAG", + "SampleFLAG", + "graph_generate", + "VGAE", + "Mixup", + "SampleMixup" +] \ No newline at end of file diff --git a/sgl/models/homo/gda/gen_graphs.py b/sgl/models/homo/gda/gen_graphs.py new file mode 100644 index 0000000..ee75d8e --- /dev/null +++ b/sgl/models/homo/gda/gen_graphs.py @@ -0,0 +1,201 @@ +import os +import copy +import torch +import torch.nn as nn +from torch.optim import Adam +import torch.nn.functional as F +import argparse +import numpy as np +import pickle as pkl +import scipy.sparse as sp +from sklearn.preprocessing import normalize +from torch_geometric.utils import negative_sampling, from_scipy_sparse_matrix + +import sgl.dataset as Dataset +from sgl.tasks.utils import set_seed +from sgl.operators.graph_op import LaplacianGraphOp +from sgl.utils import sparse_mx_to_torch_sparse_tensor +from sgl.models.homo.gda.utils import sparse_to_tuple, get_scores_gen_graphs + +class GraphConv(nn.Module): + def __init__(self, input_dim, output_dim, activation=True): + super(GraphConv, self).__init__() + self.weight = self.glorot_init(input_dim, output_dim) + self.activation = activation + + def glorot_init(self, input_dim, output_dim): + init_range = np.sqrt(6.0 / (input_dim + output_dim)) + initial = torch.rand(input_dim, output_dim) * 2 * init_range - init_range + return nn.Parameter(initial) + + def forward(self, adj, inputs): + x = inputs @ self.weight + x = adj @ x + if self.activation: + return F.elu(x) + else: + return x + +class VGAE(nn.Module): + def __init__(self, dim_in, dim_h, dim_z, gae): + super(VGAE,self).__init__() + self.dim_z = dim_z + self.gae = gae + self.base_gcn = GraphConv(dim_in, dim_h) + self.gcn_mean = GraphConv(dim_h, dim_z, activation=False) + self.gcn_logstd = GraphConv(dim_h, dim_z, activation=False) + + def encode(self, adj, X, gen_Z=False): + hidden = self.base_gcn(adj, X) + self.mean = self.gcn_mean(adj, hidden) + if self.gae or gen_Z: + return self.mean + else: + self.logstd = self.gcn_logstd(adj, hidden) + gaussian_noise = torch.randn_like(self.mean) + sampled_z = gaussian_noise * torch.exp(self.logstd) + self.mean + return sampled_z + + def decode(self, Z): + A_pred = Z @ Z.T + return A_pred + + def forward(self, adj, X): + Z = self.encode(adj, X) + A_pred = self.decode(Z) + return A_pred + +def prepare_data(dataset, val_frac, test_frac, no_mask, norm_feat=True): + adj_ori, features_orig = dataset.adj, dataset.x + if adj_ori.diagonal().sum() > 0: + adj_ori = sp.coo_matrix(adj_ori) + adj_ori.setdiag(0) + adj_ori.eliminate_zeros() + adj_ori = sp.csr_matrix(adj_ori) + if isinstance(features_orig, torch.Tensor): + features_orig = features_orig.numpy() + features_orig = sp.csr_matrix(features_orig) + if norm_feat: + features_orig = normalize(features_orig, norm="l1", axis=1) + adj_triu = sp.triu(adj_ori) + edges = sparse_to_tuple(adj_triu)[0] + num_val = int(np.floor(edges.shape[0] * val_frac)) + num_test = int(np.floor(edges.shape[0] * test_frac)) + + all_edge_idx = list(range(edges.shape[0])) + np.random.shuffle(all_edge_idx) + val_edge_idx = all_edge_idx[:num_val] + test_edge_idx = all_edge_idx[num_val:(num_val+num_test)] + val_edges = edges[val_edge_idx] + test_edges = edges[test_edge_idx] + if no_mask: + train_edges = edges + else: + train_edge_idx = all_edge_idx[num_val+num_test:] + train_edges = edges[train_edge_idx] + + num_nodes = adj_ori.shape[0] + test_edges_false = negative_sampling(from_scipy_sparse_matrix(adj_ori+sp.eye(adj_ori.shape[0]))[0], num_nodes, num_test) + test_edges_false = test_edges_false.numpy() + + val_edges_false = negative_sampling(from_scipy_sparse_matrix(adj_ori+sp.eye(adj_ori.shape[0]))[0], num_nodes, num_val) + val_edges_false = val_edges_false.numpy() + + adj_train = sp.csr_matrix((np.ones(train_edges.shape[0]), (train_edges[:, 0], train_edges[:, 1])), shape=adj_ori.shape) + adj_train = adj_train + adj_train.T + adj_norm = LaplacianGraphOp()._construct_adj(adj_train) + adj_norm = sparse_mx_to_torch_sparse_tensor(adj_norm) + adj_label = adj_train + sp.eye(adj_train.shape[0]) + adj_label = sparse_mx_to_torch_sparse_tensor(adj_label) + features = sparse_mx_to_torch_sparse_tensor(features_orig) + + return features, adj_ori, adj_train, adj_norm, adj_label, val_edges, val_edges_false, test_edges, test_edges_false + +def train_model(data, model, lr, epochs, gae, device, verbose=False, criterion="roc"): + features, _, adj_train, adj_norm, adj_label, val_edges, val_edges_false, test_edges, test_edges_false = data + optimizer = Adam(model.parameters(), lr=lr) + adj_t = adj_train + norm_w = adj_t.shape[0]**2 / float((adj_t.shape[0]**2 - adj_t.sum()) * 2) + pos_weight = torch.FloatTensor([float(adj_t.shape[0]**2 - adj_t.sum()) / adj_t.sum()]).to(device) + features = features.to(device) + adj_norm = adj_norm.to(device) + adj_label = adj_label.to_dense().to(device) + best_val = 0 + best_state_dict = None + model.train() + for epoch in range(epochs): + adj_pred = model(adj_norm, features) + optimizer.zero_grad() + loss = norm_w * F.binary_cross_entropy_with_logits(adj_pred, adj_label, pos_weight=pos_weight) + if gae is False: + kl_divergence = 0.5 / adj_pred.size(0) * (1 + 2 * model.logstd - model.mean**2 - torch.exp(2*model.logstd)).sum(1).mean() + loss -= kl_divergence + + adj_pred = torch.sigmoid(adj_pred).detach().cpu() + scores_val = get_scores_gen_graphs(val_edges, val_edges_false, adj_pred, adj_label) + if verbose: + print("Epoch{:3}: train_loss: {:.4f} recon_acc: {:.4f} val_roc: {:.4f} val_ap: {:.4f} val_f1: {:.4f}".format( + epoch+1, loss.item(), scores_val["acc"], scores_val["roc"], scores_val["ap"], scores_val["f1"])) + if scores_val[criterion] > best_val: + best_val = scores_val[criterion] + best_state_dict = copy.deepcopy(model.state_dict()) + if verbose: + scores_test = get_scores_gen_graphs(test_edges, test_edges_false, adj_pred, adj_label) + print("test_roc: {:.4f} test_ap: {:.4f} test_f1: {:.4f} test_recon_acc: {:.4f}".format( + scores_test["roc"], scores_test["ap"], scores_test["f1"], scores_test["acc"])) + loss.backward() + optimizer.step() + + model.load_state_dict(best_state_dict) + return model + +def graph_generate(dataset, model, lr, epochs, val_frac, test_frac, no_mask, num_gen_graphs, device, criterion, norm_feat=True, gae=True, verbose=False): + data = prepare_data(dataset, val_frac, test_frac, no_mask, norm_feat) + model = model.to(device) + model = train_model(data, model, lr, epochs, gae, device, verbose, criterion) + adj_ori = data[1] + save_dir = os.path.join(dataset.processed_dir, "GAugM_edge_probabilities") + if gae: + save_path = os.path.join(save_dir, "0_gae.pkl") + else: + save_path = os.path.join(save_dir, "0.pkl") + pkl.dump(adj_ori, open(save_path, "wb")) + features = data[0].to(device) + adj_norm = data[3].to(device) + for i in range(num_gen_graphs): + with torch.no_grad(): + adj_pred = model(adj_norm, features) + adj_pred = torch.sigmoid(adj_pred).detach().cpu() + adj_recon = adj_pred.numpy() + np.fill_diagonal(adj_recon, 0) + if gae: + save_path = os.path.join(save_dir, f"{i+1}_logits_gae.pkl") + else: + save_path = os.path.join(save_dir, f"{i+1}_logits.pkl") + pkl.dump(adj_recon, open(save_path, "wb")) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="generate graphs for GAugM") + parser.add_argument("--emb_size", type=int, default=16) + parser.add_argument("--hidden_size", type=int, default=32) + parser.add_argument("--epochs", type=int, default=200) + parser.add_argument("--seed", type=int, default=7) + parser.add_argument("--num_gen_graphs", type=int, default=0) + parser.add_argument("--lr", type=float, default=0.01, help="learning rate") + parser.add_argument("--val_frac", type=float, default=0.05) + parser.add_argument("--test_frac", type=float, default=0.1) + parser.add_argument("--dataset_classname", type=str, default="Planetoid") + parser.add_argument("--dataset_name", type=str, default="cora") + parser.add_argument("--criterion", type=str, default="roc") + parser.add_argument("--no_mask", action="store_true") + parser.add_argument("--gae", action="store_true") + parser.add_argument("--root", type=str, default="/home/ssq/test_data/") + parser.add_argument("--device", type=int, default=0) + args = parser.parse_args() + + device = f"cuda:{args.device}" if torch.cuda.is_available() else "cpu" + set_seed(args.seed) + + dataset = getattr(Dataset, args.dataset_classname)(root=args.root, name=args.dataset_name) + model = VGAE(dataset.num_features, args.hidden_size, args.emb_size, args.gae) + graph_generate(dataset, model, args.lr, args.epochs, args.val_frac, args.test_frac, args.no_mask, args.num_gen_graphs, device, args.criterion, True, args.gae, verbose=True) \ No newline at end of file diff --git a/sgl/models/homo/gda/utils.py b/sgl/models/homo/gda/utils.py new file mode 100644 index 0000000..75f0b99 --- /dev/null +++ b/sgl/models/homo/gda/utils.py @@ -0,0 +1,61 @@ +import copy +import torch +import warnings +import numpy as np +import scipy.sparse as sp +from sklearn.metrics import roc_auc_score, average_precision_score, precision_recall_curve, auc + +class RoundNoGradient(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + return x.round() + + @staticmethod + def backward(ctx, g): + return g + + +class CeilNoGradient(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + return x.ceil() + + @staticmethod + def backward(ctx, g): + return g + +def sparse_to_tuple(sparse_mx): + if not sp.isspmatrix_coo(sparse_mx): + sparse_mx = sparse_mx.tocoo() + coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose() + values = sparse_mx.data + shape = sparse_mx.shape + return coords, values, shape + +def get_scores_gen_graphs(edges_pos, edges_neg, adj_pred, adj_label): + # get logists and labels + preds = adj_pred[edges_pos.T] + preds_neg = adj_pred[edges_neg] + logists = np.hstack([preds, preds_neg]) + labels = np.hstack([np.ones(preds.size(0)), np.zeros(preds_neg.size(0))]) + roc_auc = roc_auc_score(labels, logists) + ap_score = average_precision_score(labels, logists) + precisions, recalls, thresholds = precision_recall_curve(labels, logists) + pr_auc = auc(recalls, precisions) + warnings.simplefilter("ignore", RuntimeWarning) + f1s = np.nan_to_num(2 * precisions * recalls / (precisions + recalls)) + best_comb = np.argmax(f1s) + f1 = f1s[best_comb] + pre = precisions[best_comb] + rec = recalls[best_comb] + thresh = thresholds[best_comb] + # calc reconstracted adj_mat and accuracy with the threshold for best f1 + adj_rec = copy.deepcopy(adj_pred) + adj_rec[adj_rec < thresh] = 0 + adj_rec[adj_rec >= thresh] = 1 + labels_all = adj_label.view(-1).long().cpu() + preds_all = adj_rec.view(-1).long() + recon_acc = (preds_all == labels_all).sum().float() / labels_all.size(0) + results = {"roc": roc_auc, "pr": pr_auc, "ap": ap_score, "pre": pre, "rec": rec, "f1": f1, "acc": recon_acc, "adj_recon": adj_rec} + + return results \ No newline at end of file diff --git a/sgl/models/homo/graphsage.py b/sgl/models/homo/graphsage.py new file mode 100644 index 0000000..c25962d --- /dev/null +++ b/sgl/models/homo/graphsage.py @@ -0,0 +1,15 @@ +from sgl.models.simple_models import SAGE +from sgl.models.base_model import BaseSAMPLEModel +from sgl.operators.message_op import PreNormMessageOp +from sgl.operators.graph_op import RwGraphOp + +class GraphSAGE(BaseSAMPLEModel): + def __init__(self, dataset, training_sampler, eval_sampler, hidden_dim, sparse_type="torch", dropout=0.5, num_layers=2, device="cpu"): + super(GraphSAGE, self).__init__(sparse_type=sparse_type) + self._pre_graph_op = RwGraphOp() + self._pre_feature_op = PreNormMessageOp(p=1, dim=1) + self._training_sampling_op = training_sampler + self._eval_sampling_op = eval_sampler + self._base_model = SAGE( + n_feat=dataset.num_features, n_hid=hidden_dim, n_class=dataset.num_classes, n_layers=num_layers, dropout=dropout + ).to(device) diff --git a/sgl/models/homo/graphsaint.py b/sgl/models/homo/graphsaint.py new file mode 100644 index 0000000..7e29b09 --- /dev/null +++ b/sgl/models/homo/graphsaint.py @@ -0,0 +1,53 @@ +import torch.nn.functional as F + +from sgl.models.simple_models import GCN +from sgl.models.base_model import BaseSAMPLEModel +from sgl.operators.graph_op import RwGraphOp + +class GraphSAINT(BaseSAMPLEModel): + def __init__(self, dataset, training_sampler, eval_sampler, hidden_dim, sparse_type="torch", dropout=0.5, num_layers=2, device="cpu"): + super(GraphSAINT, self).__init__(sparse_type=sparse_type) + self._pre_graph_op = RwGraphOp() + self._training_sampling_op = training_sampler + self._eval_sampling_op = eval_sampler + self._device = device + self._base_model = GCN( + n_feat=dataset.num_features, n_hid=hidden_dim, n_class=dataset.num_classes, n_layers=num_layers, dropout=dropout + ).to(device) + + def pre_sample(self, mode="train"): + if mode == "train": + self._training_sampling_op._calc_norm() + self._loss_norm = self._training_sampling_op.loss_norm.to(self._device) + else: + raise ValueError("GraphSAINT sampler now only support training mode.") + + def mini_batch_prepare_forward(self, batch, device, inductive=False): + batch_in, batch_out, block = batch + local_inds, global_inds = batch_out + + if inductive is False: + in_x = self._processed_feature[batch_in].to(device) + y_truth = self._vanilla_y[global_inds].to(device) + else: + in_x = self._processed_train_feature[batch_in].to(device) + y_truth = self._vanilla_train_y[global_inds].to(device) + + block.to_device(device) + y_pred = self._base_model(in_x, block)[local_inds] + return y_pred, y_truth + + def collate_fn(self, batch_ids, mode): + if mode == "train": + return self._training_sampling_op.collate_fn(batch_ids, mode) + else: + return self._eval_sampling_op.collate_fn(batch_ids, mode) + + def loss_fn(self, pred, labels): + loss = F.nll_loss(pred, labels, reduction="none") + loss = (loss / self.cur_loss_norm).sum() + return loss + + @property + def cur_loss_norm(self): + return self._loss_norm[self._training_sampling_op.cur_index] \ No newline at end of file diff --git a/sgl/models/homo/lazygnn.py b/sgl/models/homo/lazygnn.py new file mode 100644 index 0000000..09e4cbd --- /dev/null +++ b/sgl/models/homo/lazygnn.py @@ -0,0 +1,111 @@ +from sgl.data.base_data import Block +import sgl.models.simple_models as SimpleModels +from sgl.models.base_model import BaseSAMPLEModel +from sgl.operators.graph_op import LaplacianGraphOp, RwGraphOp +from sgl.utils import sparse_mx_to_torch_sparse_tensor + +import torch +import itertools +import numpy as np +import concurrent.futures + +class LazyGNN(BaseSAMPLEModel): + def __init__(self, dataset, training_sampler, eval_sampler=None, hidden_dim=128, basemodel="GCN", sparse_type="torch", dropout=0.5, num_layers=2, max_workers=5, max_threads=-1, rho=1.1, tau=2, device="cpu"): + super(LazyGNN, self).__init__(sparse_type=sparse_type) + if basemodel == "SAGE": + self._pre_graph_op = RwGraphOp() + elif basemodel == "GCN": + self._pre_graph_op = LaplacianGraphOp(r=0.5) + self._training_sampling_op = training_sampler + self._eval_sampling_op = eval_sampler + self._max_workers = max_workers + self._max_threads = max_threads if max_threads > -1 else torch.get_num_threads() // 2 + self._device = device + # hyperparameters for recycling + self._rho = rho + self._tau = tau + # define the base model + self._base_model = getattr(SimpleModels, basemodel)( + n_feat=dataset.num_features, n_hid=hidden_dim, n_class=dataset.num_classes, n_layers=num_layers, dropout=dropout + ).to(device) + + def preprocess(self, adj, x, val_dataloader=None, test_dataloader=None): + if val_dataloader is None: + norm_adj = self._pre_graph_op._construct_adj(adj) + norm_adj = sparse_mx_to_torch_sparse_tensor(norm_adj) + # if evaluation on full-batch, then we can pre-move the full feature/adjacency matrix to the device to save time + self._processed_block = Block(norm_adj) + self._processed_block.to_device(self._device) + self._processed_feature = x.to(self._device) + else: + # If val/test_dataloader is provided, it means that we conduct minibatch evaluation. + # In such case, we could prepare evaluation minibatches in advance. + self._val_samples = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=int(torch.get_num_threads()*0.4)) as executor: + self._val_sampling_jobs = [executor.submit( + self._eval_sampling_op.collate_fn, val_dataloader(bid)) for bid in range(len(val_dataloader))] + self._test_samples = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=int(torch.get_num_threads()*0.4)) as executor: + self._test_sampling_jobs = [executor.submit( + self._eval_sampling_op.collate_fn, test_dataloader(bid)) for bid in range(len(test_dataloader))] + self._processed_feature = x + + def generate_taus(self, T): + self._taus = [] + k = 0 + total_taus = 0 + while total_taus < T: + tau_i = int(self._tau * np.power(self._rho, k)) + tau_i = min(tau_i, T - total_taus) + self._taus.append(tau_i) + total_taus += tau_i + k += 1 + return self._taus + + def model_forward(self, x=None, block=None, use_full=False): + if use_full is False: + return self._base_model(x, block) + else: + return self._base_model(self._processed_feature, self._processed_block) + + def flash_sampling(self, total_iter, dataloader): + min_iter, max_iter = 1, self._max_threads + count_iter, max_cycle = 0, max(self._taus) + pre_cycle = np.asarray(list(itertools.accumulate(self._taus))) + sampling_func = self._training_sampling_op.collate_fn + + while count_iter < total_iter: + # adaptively increase the number of sampled subgraphs + curr_cycle = self._taus[pre_cycle.searchsorted(count_iter, 'right')] + curr_iter = min_iter + int(curr_cycle / max_cycle * (max_iter - min_iter)) + curr_iter = min(curr_iter, total_iter - count_iter) + count_iter += curr_iter + + with concurrent.futures.ThreadPoolExecutor(max_workers=self._max_workers) as executor: + sampling_jobs = [executor.submit(sampling_func, dataloader) for _ in range(curr_iter)] + + for future in concurrent.futures.as_completed(sampling_jobs): + yield (future.result()) + + def sequential_sampling(self, do_val): + if do_val is True: + if len(self._val_samples) == 0: + # When val_sampling is called at the first time, + # it would take a little more time to receive the subgraphs. + print('Waiting for validation minibatch...') + # Order won't be the same, but it doesn't matter + for future in concurrent.futures.as_completed(self._val_sampling_jobs): + self._val_samples.append(future.result()) + print('Validation minibatch is ready...') + + return self._val_samples + else: + if len(self._test_samples) == 0: + print('Waiting for test minibatch...') + for future in concurrent.futures.as_completed(self._test_sampling_jobs): + self._test_samples.append(future.result()) + print('Test minibatch is ready...') + + return self._test_samples + + diff --git a/sgl/models/homo/sgc.py b/sgl/models/homo/sgc.py index 129f134..e2b96ee 100644 --- a/sgl/models/homo/sgc.py +++ b/sgl/models/homo/sgc.py @@ -7,7 +7,6 @@ class SGC(BaseSGAPModel): def __init__(self, prop_steps, feat_dim, output_dim): super(SGC, self).__init__(prop_steps, feat_dim, output_dim) - - self._pre_graph_op = LaplacianGraphOp(prop_steps, r=0.5) - self._pre_msg_op = LastMessageOp() + self._pre_graph_op = LaplacianGraphOp(prop_steps, r=0.5) ###正则化+传播 + self._pre_msg_op = LastMessageOp() #拼接等多hop操作 self._base_model = LogisticRegression(feat_dim, output_dim) diff --git a/sgl/models/homo/sgc_dist.py b/sgl/models/homo/sgc_dist.py index 591b6ec..96827aa 100644 --- a/sgl/models/homo/sgc_dist.py +++ b/sgl/models/homo/sgc_dist.py @@ -1,10 +1,10 @@ -from sgl.models.base_model import BaseSGAPModelDist +from sgl.models.base_model import BaseSGAPModel from sgl.models.simple_models import LogisticRegression from sgl.operators.graph_op import LaplacianGraphOp from sgl.operators.message_op import LastMessageOp -class SGCDist(BaseSGAPModelDist): +class SGCDist(BaseSGAPModel): def __init__(self, prop_steps, feat_dim, output_dim): super(SGCDist, self).__init__(prop_steps, feat_dim, output_dim) diff --git a/sgl/models/homo/vanillagnn.py b/sgl/models/homo/vanillagnn.py new file mode 100644 index 0000000..104e0fa --- /dev/null +++ b/sgl/models/homo/vanillagnn.py @@ -0,0 +1,20 @@ +import sgl.models.simple_models as SimpleModels +from sgl.models.base_model import BaseSAMPLEModel +from sgl.operators.graph_op import LaplacianGraphOp, RwGraphOp + + +class VanillaGNN(BaseSAMPLEModel): + """ + It is a naive version of Graph Convolutional Network which works in full-batch training. + """ + def __init__(self, dataset, training_sampler, eval_sampler, hidden_dim, basemodel="GCN", sparse_type="torch", dropout=0.5, num_layers=2, device="cpu"): + super(VanillaGNN, self).__init__(evaluate_mode="full", sparse_type=sparse_type) + if basemodel == "SAGE": + self._pre_graph_op = RwGraphOp() + elif basemodel == "GCN": + self._pre_graph_op = LaplacianGraphOp(r=0.5) + self._training_sampling_op = training_sampler + self._eval_sampling_op = eval_sampler + self._base_model = getattr(SimpleModels, basemodel)( + n_feat=dataset.num_features, n_hid=hidden_dim, n_class=dataset.num_classes, n_layers=num_layers, dropout=dropout + ).to(device) diff --git a/sgl/models/pyg_simple_models.py b/sgl/models/pyg_simple_models.py new file mode 100644 index 0000000..fe6f6f5 --- /dev/null +++ b/sgl/models/pyg_simple_models.py @@ -0,0 +1,242 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch_sparse import SparseTensor +from torch_geometric.nn import GCNConv, SAGEConv, GATConv + +class GCN(nn.Module): + def __init__(self, n_feat, n_hid, n_class, n_layers=2, dropout=0.5, activation=F.relu, batch_norm=False, add_self_loops=True, normalize=True, cached=False): + super(GCN, self).__init__() + self.gcs = nn.ModuleList() + self.gcs.append(GCNConv(n_feat, n_hid, cached=cached, add_self_loops=add_self_loops, normalize=normalize)) + self.batch_norm = batch_norm + if self.batch_norm: + self.bns = nn.ModuleList() + self.bns.append(nn.BatchNorm1d(n_hid)) + self.n_layers = n_layers + for _ in range(n_layers-2): + self.gcs.append(GCNConv(n_hid, n_hid, cached=cached, add_self_loops=add_self_loops, normalize=normalize)) + if self.batch_norm: + self.bns.append(nn.BatchNorm1d(n_hid)) + self.gcs.append(GCNConv(n_hid, n_class, cached=cached, add_self_loops=add_self_loops, normalize=normalize)) + self.dropout = dropout + self.activation = activation + + def reset_parameters(self): + for conv in self.gcs: + conv.reset_parameters() + if self.batch_norm: + for bn in self.bns: + bn.reset_parameters() + + def forward(self, x, block): + repr = x + if isinstance(block, (SparseTensor, torch.Tensor)): + block = [block] + if len(block) == self.n_layers: + for i in range(self.n_layers-1): + repr = self.gcs[i](repr, block[i]) + if self.batch_norm: + repr = self.bns[i](repr) + repr = self.activation(repr) + repr = F.dropout(repr, self.dropout, training=self.training) + repr = self.gcs[-1](repr, block[-1]) + elif len(block) == 1: + for i in range(self.n_layers-1): + repr = self.gcs[i](repr, block[0]) + if self.batch_norm: + repr = self.bns[i](repr) + repr = self.activation(repr) + repr = F.dropout(repr, self.dropout, training=self.training) + repr = self.gcs[-1](repr, block[0]) + else: + raise ValueError('The sampling layer must be equal to GNN layer.') + + return F.log_softmax(repr, dim=1) + + @torch.no_grad() + def inference(self, x_all, subgraph_loader, device): + # Compute representations of nodes layer by layer, using *all* + # available edges. This leads to faster computation in contrast to + # immediately computing the final representations of each batch. + for i in range(self.n_layers): + xs = [] + for batch in subgraph_loader: + batch_in, _, block = batch + block.to_device(device) + x = x_all[batch_in].to(device) + x = self.gcs[i](x, block[0]) # one-layer sampling + if i != self.n_layers - 1: + if self.batch_norm: + x = self.bns[i](x) + x = self.activation(x) + xs.append(x.cpu()) + + x_all = torch.cat(xs, dim=0) + + return x_all + +class SAGE(nn.Module): + def __init__(self, n_feat, n_hid, n_class, n_layers=2, dropout=0.5, activation=F.relu, batch_norm=False, normalize=False): + super(SAGE, self).__init__() + self.gcs = nn.ModuleList() + self.gcs.append(SAGEConv(n_feat, n_hid)) + self.batch_norm = batch_norm + self.normalize = normalize + if normalize: + self.norm = lambda x: F.normalize(x, p=1, dim=1) + if self.batch_norm: + self.bns = nn.ModuleList() + self.bns.append(nn.BatchNorm1d(n_hid)) + self.n_layers = n_layers + for _ in range(n_layers-2): + self.gcs.append(SAGEConv(n_hid, n_hid)) + if self.batch_norm: + self.bns.append(nn.BatchNorm1d(n_hid)) + self.gcs.append(SAGEConv(n_hid, n_class)) + self.dropout = dropout + self.activation = activation + + def reset_parameters(self): + for conv in self.gcs: + conv.reset_parameters() + if self.batch_norm: + for bn in self.bns: + bn.reset_parameters() + + def forward(self, x, block): + repr = x + if isinstance(block, (SparseTensor, torch.Tensor)): + block = [block] + if len(block) == self.n_layers: + for i in range(self.n_layers-1): + root_size = block.root_size(i) + root_repr = repr[:root_size] + repr = self.gcs[i]((repr, root_repr), block[i]) + if self.normalize: + repr = self.norm(repr) + if self.batch_norm: + repr = self.bns[i](repr) + repr = self.activation(repr) + repr = F.dropout(repr, self.dropout, training=self.training) + root_size = block.root_size(-1) + root_repr = repr[:root_size] + repr = self.gcs[-1]((repr, root_repr), block[-1]) + elif len(block) == 1: + for i in range(self.n_layers-1): + repr = self.gcs[i](repr, block[0]) + if self.normalize: + repr = self.norm(repr) + if self.batch_norm: + repr = self.bns[i](repr) + repr = self.activation(repr) + repr = F.dropout(repr, self.dropout, training=self.training) + repr = self.gcs[-1](repr, block[0]) + else: + raise ValueError('The sampling layer must be equal to GNN layer.') + + return F.log_softmax(repr, dim=1) + + @torch.no_grad() + def inference(self, x_all, subgraph_loader, device): + # Compute representations of nodes layer by layer, using *all* + # available edges. This leads to faster computation in contrast to + # immediately computing the final representations of each batch. + for i in range(self.n_layers): + xs = [] + for batch in subgraph_loader: + batch_in, batch_out, block = batch + block.to_device(device) + x = x_all[batch_in].to(device) + root_size = len(batch_out) + root_x = x[:root_size] + x = self.gcs[i]((x, root_x), block[0]) + # one-layer sampling + if i != self.n_layers - 1: + if self.batch_norm: + x = self.bns[i](x) + x = self.activation(x) + xs.append(x.cpu()) + + x_all = torch.cat(xs, dim=0) + + return x_all + +class GAT(nn.Module): + def __init__(self, n_feat, n_hid, n_class, n_heads, n_layers=2, dropout=0.6, activation=F.elu, attn_dropout=0.6, batch_norm=False): + super(GAT, self).__init__() + self.gcs = nn.ModuleList() + self.gcs.append(GATConv(n_feat, n_hid // n_heads[0], n_heads[0], dropout=attn_dropout)) + self.n_layers = n_layers + self.batch_norm = batch_norm + if self.batch_norm: + self.bns = nn.ModuleList() + self.bns.append(nn.BatchNorm1d(n_hid)) + for i in range(n_layers-2): + self.gcs.append(GATConv(n_hid, n_hid // n_heads[i + 1], n_heads[i + 1], dropout=attn_dropout)) + if self.batch_norm: + self.bns.append(nn.BatchNorm1d(n_hid)) + self.gcs.append(GATConv(n_hid, n_class, n_heads[-1], concat=False, dropout=attn_dropout)) + self.dropout = dropout + self.activation = activation + + def reset_parameters(self): + for conv in self.gcs: + conv.reset_parameters() + if self.batch_norm: + for bn in self.bns: + bn.reset_parameters() + + def forward(self, x, block): + repr = x + if isinstance(block, (SparseTensor, torch.Tensor)): + block = [block] + if len(block) == self.n_layers: + for i in range(self.n_layers-1): + root_size = block.root_size(i) + root_repr = repr[:root_size] + repr = self.gcs[i]((repr, root_repr), block[i]) + if self.batch_norm: + repr = self.bns[i](repr) + repr = self.activation(repr) + repr = F.dropout(repr, self.dropout, training=self.training) + root_size = block.root_size(-1) + root_repr = repr[:root_size] + repr = self.gcs[-1]((repr, root_repr), block[-1]) + elif len(block) == 1: + for i in range(self.n_layers-1): + repr = self.gcs[i](repr, block[0]) + if self.batch_norm: + repr = self.bns[i](repr) + repr = self.activation(repr) + repr = F.dropout(repr, self.dropout, training=self.training) + repr = self.gcs[-1](repr, block[0]) + else: + raise ValueError('The sampling layer must be equal to GNN layer.') + + return F.log_softmax(repr, dim=-1) + + @torch.no_grad() + def inference(self, x_all, subgraph_loader, device): + # Compute representations of nodes layer by layer, using *all* + # available edges. This leads to faster computation in contrast to + # immediately computing the final representations of each batch. + for i in range(self.n_layers): + xs = [] + for batch in subgraph_loader: + batch_in, batch_out, block = batch + block.to_device(device) + x = x_all[batch_in].to(device) + root_size = len(batch_out) + root_x = x[:root_size] + x = self.gcs[i]((x, root_x), block[0]) + # one-layer sampling + if i != self.n_layers - 1: + if self.batch_norm: + x = self.bns[i](x) + x = self.activation(x) + xs.append(x.cpu()) + + x_all = torch.cat(xs, dim=0) + + return x_all \ No newline at end of file diff --git a/sgl/models/simple_models.py b/sgl/models/simple_models.py index 2cd5d8c..c32fed7 100644 --- a/sgl/models/simple_models.py +++ b/sgl/models/simple_models.py @@ -1,5 +1,6 @@ import torch import torch.nn as nn +import torch.nn.functional as F class OneDimConvolution(nn.Module): @@ -182,3 +183,312 @@ def forward(self, feature): feature = self.__dropout(feature) output = self.__fcs[-1](feature) return output + +class GCNConv(nn.Module): + """ + Simple GCN layer, similar to https://arxiv.org/abs/1609.02907 + """ + + def __init__(self, in_features, out_features, bias=True): + super(GCNConv, self).__init__() + self.in_features = in_features + self.out_features = out_features + self.weight = nn.Parameter(torch.FloatTensor(in_features, out_features)) + if bias: + self.bias = nn.Parameter(torch.FloatTensor(out_features)) + else: + self.register_parameter("bias", None) + self.reset_parameters() + + def reset_parameters(self): + # stdv = 1.0 / math.sqrt(self.weight.size(1)) + # self.weight.data.uniform_(-stdv, stdv) + # if self.bias is not None: + # self.bias.data.uniform_(-stdv, stdv) + for param in self.parameters(): + if len(param.size()) == 2: + nn.init.xavier_uniform_(param) + else: + nn.init.constant_(param, 0.0) + + def forward(self, input, adj): + support = torch.mm(input, self.weight) + output = torch.spmm(adj, support) + if self.bias is not None: + return output + self.bias + else: + return output + +class SAGEConv(nn.Module): + """ + Simple GraphSAGE layer + """ + + def __init__(self, in_features, out_features, normalize=True): + super(SAGEConv, self).__init__() + if isinstance(in_features, int): + in_features = (in_features, in_features) + self.in_features = in_features + self.out_features = out_features + self.normalize = normalize + + self.lin_l = nn.Linear(in_features[0], out_features) + self.lin_r = nn.Linear(in_features[1], out_features) + if normalize: + self.norm = lambda x: F.normalize(x, p=1, dim=1) + + self.reset_parameters() + + def reset_parameters(self): + self.lin_l.reset_parameters() + self.lin_r.reset_parameters() + + def forward(self, x, adj): + output = torch.spmm(adj, x) + output = self.lin_l(output) + + num_tgt = adj.shape[0] + x_r = x[:num_tgt] + output += self.lin_r(x_r) + + if self.normalize: + output = self.norm(output) + + return output + + +class GATConv(nn.Module): + """ + Simple GAT layer + """ + def __init__(self, in_features, out_features, n_heads, bias=True): + super(GATConv, self).__init__() + self.W = nn.Parameter(torch.FloatTensor(in_features, out_features)) + self.n_heads = n_heads + self.attn_l = nn.Linear(out_features, self.n_heads, bias=False) + self.attn_r = nn.Linear(out_features, self.n_heads, bias=False) + self.attn_drop = nn.Dropout(p=0.6) + if bias: + self.b = nn.Parameter(torch.FloatTensor(out_features)) + else: + self.b = None + self.reset_parameters() + + def reset_parameters(self): + """ Initialize weights with xavier uniform and biases with all zeros """ + for param in self.parameters(): + if len(param.size()) == 2: + nn.init.xavier_uniform_(param) + else: + nn.init.constant_(param, 0.0) + + def forward(self, x, adj): + repr = x @ self.W + el = self.attn_l(repr) + er = self.attn_r(repr) + if isinstance(adj, torch.sparse.FloatTensor): + nz_indices = adj._indices() + else: + nz_indices = adj.nonzero().T + attn = el[nz_indices[0]] + er[nz_indices[1]] + attn = F.leaky_relu(attn, negative_slope=0.2).squeeze() + attn = torch.exp(attn) + if self.n_heads == 1: + adj_attn = torch.zeros(size=(adj.size(0), adj.size(1)), device=adj.device) + adj_attn.index_put_((nz_indices[0], nz_indices[1]), attn) + else: + adj_attn = torch.zeros(size=(adj.size(0), adj.size(1), self.n_heads), device=adj.device) + adj_attn.index_put_((nz_indices[0], nz_indices[1]), attn) + adj_attn.transpose_(1, 2) + adj_attn = F.normalize(adj_attn, p=1, dim=-1) + adj_attn = self.attn_drop(adj_attn) + repr = adj_attn @ repr + if self.b is not None: + repr = repr + self.b + if self.n_heads > 1: + repr = repr.flatten(start_dim=1) + return repr + + +class SAGE(nn.Module): + def __init__(self, n_feat, n_hid, n_class, n_layers=2, dropout=0.5, activation=F.relu, batch_norm=False, normalize=True): + super(SAGE, self).__init__() + self.gcs = nn.ModuleList() + self.gcs.append(SAGEConv(n_feat, n_hid, normalize=normalize)) + self.batch_norm = batch_norm + if self.batch_norm: + self.bns = nn.ModuleList() + self.bns.append(nn.BatchNorm1d(n_hid)) + self.n_layers = n_layers + for _ in range(n_layers-2): + self.gcs.append(SAGEConv(n_hid, n_hid, normalize=normalize)) + if self.batch_norm: + self.bns.append(nn.BatchNorm1d(n_hid)) + self.gcs.append(SAGEConv(n_hid, n_class, normalize=False)) + self.dropout = dropout + self.activation = activation + + def reset_parameters(self): + for conv in self.gcs: + conv.reset_parameters() + if self.batch_norm: + for bn in self.bns: + bn.reset_parameters() + + def forward(self, x, block): + repr = x + if isinstance(block, torch.Tensor): + block = [block] + if len(block) == self.n_layers: + for i in range(self.n_layers-1): + repr = self.gcs[i](repr, block[i]) + if self.batch_norm: + repr = self.bns[i](repr) + repr = self.activation(repr) + repr = F.dropout(repr, self.dropout, training=self.training) + repr = self.gcs[-1](repr, block[-1]) + elif len(block) == 1: + for i in range(self.n_layers-1): + repr = self.gcs[i](repr, block[0]) + if self.batch_norm: + repr = self.bns[i](repr) + repr = self.activation(repr) + repr = F.dropout(repr, self.dropout, training=self.training) + repr = self.gcs[-1](repr, block[0]) + else: + raise ValueError('The sampling layer must be equal to GNN layer.') + + return F.log_softmax(repr, dim=1) + + def inference(self, x_all, subgraph_loader, device): + # Compute representations of nodes layer by layer, using *all* + # available edges. This leads to faster computation in contrast to + # immediately computing the final representations of each batch. + for i in range(self.n_layers): + xs = [] + for batch in subgraph_loader: + batch_in, _, block = batch + block.to_device(device) + x = x_all[batch_in].to(device) + x = self.gcs[i](x, block[0]) # one-layer sampling + if i != self.n_layers - 1: + if self.batch_norm: + x = self.bns[i](x) + x = F.relu(x) + xs.append(x.cpu()) + + x_all = torch.cat(xs, dim=0) + + return x_all + + +class GCN(nn.Module): + def __init__(self, n_feat, n_hid, n_class, n_layers=2, dropout=0.5, activation=F.relu, batch_norm=False): + super(GCN, self).__init__() + self.gcs = nn.ModuleList() + self.gcs.append(GCNConv(n_feat, n_hid)) + self.batch_norm = batch_norm + if self.batch_norm: + self.bns = nn.ModuleList() + self.bns.append(nn.BatchNorm1d(n_hid)) + self.n_layers = n_layers + for _ in range(n_layers-2): + self.gcs.append(GCNConv(n_hid, n_hid)) + if self.batch_norm: + self.bns.append(nn.BatchNorm1d(n_hid)) + self.gcs.append(GCNConv(n_hid, n_class)) + self.dropout = dropout + self.activation = activation + + def reset_parameters(self): + for conv in self.gcs: + conv.reset_parameters() + if self.batch_norm: + for bn in self.bns: + bn.reset_parameters() + + def forward(self, x, block): + repr = x + if isinstance(block, torch.Tensor): + block = [block] + if len(block) == self.n_layers: + for i in range(self.n_layers-1): + repr = self.gcs[i](repr, block[i]) + if self.batch_norm: + repr = self.bns[i](repr) + repr = self.activation(repr) + repr = F.dropout(repr, self.dropout, training=self.training) + repr = self.gcs[-1](repr, block[-1]) + elif len(block) == 1: + for i in range(self.n_layers-1): + repr = self.gcs[i](repr, block[0]) + if self.batch_norm: + repr = self.bns[i](repr) + repr = self.activation(repr) + repr = F.dropout(repr, self.dropout, training=self.training) + repr = self.gcs[-1](repr, block[0]) + else: + raise ValueError('The sampling layer must be equal to GNN layer.') + + return F.log_softmax(repr, dim=1) + + def inference(self, x_all, subgraph_loader, device): + # Compute representations of nodes layer by layer, using *all* + # available edges. This leads to faster computation in contrast to + # immediately computing the final representations of each batch. + for i in range(self.n_layers): + xs = [] + for batch in subgraph_loader: + batch_in, _, block = batch + block.to_device(device) + x = x_all[batch_in].to(device) + x = self.gcs[i](x, block[0]) # one-layer sampling + if i != self.n_layers - 1: + if self.batch_norm: + x = self.bns[i](x) + x = self.activation(x) + xs.append(x.cpu()) + + x_all = torch.cat(xs, dim=0) + + return x_all + +class GAT(nn.Module): + """ + This GAT only accepts dense tensor as input (doesn't support torch.sparse.tensor) + """ + def __init__(self, n_feat, n_hid, n_class, n_heads, n_layers=2, dropout=0.6, activation=F.elu): + super(GAT, self).__init__() + self.gcs = nn.ModuleList() + self.gcs.append(GATConv(n_feat, n_hid // n_heads[0], n_heads[0])) + self.n_layers = n_layers + for i in range(n_layers-2): + self.gcs.append(GATConv(n_hid, n_hid // n_heads[i + 1], n_heads[i + 1])) + self.gcs.append(GATConv(n_hid, n_class, n_heads[-1])) + self.dropout = dropout + self.activation = activation + + def reset_parameters(self): + for gc in self.gcs: + gc.reset_parameters() + + def forward(self, x, block): + repr = x + if isinstance(block, torch.Tensor): + block = [block] + if len(block) == self.n_layers: + for i in range(self.n_layers-1): + repr = self.gcs[i](repr, block[i]) + repr = self.activation(repr) + repr = F.dropout(repr, self.dropout, training=self.training) + repr = self.gcs[-1](repr, block[-1]) + elif len(block) == 1: + for gc in self.gcs[:-1]: + repr = gc(repr, block[0]) + repr = self.activation(repr) + repr = F.dropout(repr, self.dropout, training=self.training) + repr = self.gcs[-1](repr, block[0]) + else: + raise ValueError('The sampling layer must be equal to GNN layer.') + + return F.log_softmax(repr, dim=1) \ No newline at end of file diff --git a/sgl/operators/base_op.py b/sgl/operators/base_op.py index 49ae4e2..b74ad49 100644 --- a/sgl/operators/base_op.py +++ b/sgl/operators/base_op.py @@ -18,6 +18,9 @@ def _construct_adj(self, adj): def propagate(self, adj, feature): self._adj = self._construct_adj(adj) + + if isinstance(feature, Tensor): + feature = feature.numpy() if not isinstance(adj, sp.csr_matrix): raise TypeError("The adjacency matrix must be a scipy csr sparse matrix!") @@ -35,6 +38,12 @@ def propagate(self, adj, feature): prop_feat_list.append(feat_temp) return [torch.FloatTensor(feat) for feat in prop_feat_list] +class PreMessageOp: + def __init__(self, dim=1): + self._dim = dim + + def _transform_x(self, x): + raise NotImplementedError # Might include training parameters class MessageOp(nn.Module): diff --git a/sgl/operators/graph_op/__init__.py b/sgl/operators/graph_op/__init__.py index ca92514..05edabf 100644 --- a/sgl/operators/graph_op/__init__.py +++ b/sgl/operators/graph_op/__init__.py @@ -1,7 +1,9 @@ from .laplacian_graph_op import LaplacianGraphOp from .ppr_graph_op import PprGraphOp +from .rw_graph_op import RwGraphOp __all__ = [ "LaplacianGraphOp", "PprGraphOp", + "RwGraphOp" ] diff --git a/sgl/operators/graph_op/laplacian_graph_op.py b/sgl/operators/graph_op/laplacian_graph_op.py index 8973a03..41267bb 100644 --- a/sgl/operators/graph_op/laplacian_graph_op.py +++ b/sgl/operators/graph_op/laplacian_graph_op.py @@ -5,7 +5,7 @@ class LaplacianGraphOp(GraphOp): - def __init__(self, prop_steps, r=0.5): + def __init__(self, prop_steps=-1, r=0.5): super(LaplacianGraphOp, self).__init__(prop_steps) self.__r = r @@ -14,6 +14,6 @@ def _construct_adj(self, adj): adj = adj.tocoo() elif not isinstance(adj, sp.coo_matrix): raise TypeError("The adjacency matrix must be a scipy.sparse.coo_matrix/csr_matrix!") - + adj_normalized = adj_to_symmetric_norm(adj, self.__r) return adj_normalized.tocsr() diff --git a/sgl/operators/graph_op/rw_graph_op.py b/sgl/operators/graph_op/rw_graph_op.py new file mode 100644 index 0000000..ba56472 --- /dev/null +++ b/sgl/operators/graph_op/rw_graph_op.py @@ -0,0 +1,18 @@ +import scipy.sparse as sp + +from sgl.operators.base_op import GraphOp +from sgl.operators.utils import adj_to_row_norm + + +class RwGraphOp(GraphOp): + def __init__(self, prop_steps=-1): + super(RwGraphOp, self).__init__(prop_steps) + + def _construct_adj(self, adj): + if isinstance(adj, sp.csr_matrix): + adj = adj.tocoo() + elif not isinstance(adj, sp.coo_matrix): + raise TypeError("The adjacency matrix must be a scipy.sparse.coo_matrix/csr_matrix!") + + adj_normalized = adj_to_row_norm(adj) + return adj_normalized.tocsr() diff --git a/sgl/operators/message_op/__init__.py b/sgl/operators/message_op/__init__.py index 95fec89..69ac949 100644 --- a/sgl/operators/message_op/__init__.py +++ b/sgl/operators/message_op/__init__.py @@ -9,6 +9,7 @@ from .simple_weighted_message_op import SimpleWeightedMessageOp from .sum_message_op import SumMessageOp from .over_smooth_distance_op import OverSmoothDistanceWeightedOp +from .pre_normalize_message_op import PreNormMessageOp __all__ = [ "ConcatMessageOp", @@ -21,5 +22,6 @@ "ProjectedConcatMessageOp", "SimpleWeightedMessageOp", "SumMessageOp", - "OverSmoothDistanceWeightedOp" + "OverSmoothDistanceWeightedOp", + "PreNormMessageOp" ] diff --git a/sgl/operators/message_op/pre_normalize_message_op.py b/sgl/operators/message_op/pre_normalize_message_op.py new file mode 100644 index 0000000..1b48b5d --- /dev/null +++ b/sgl/operators/message_op/pre_normalize_message_op.py @@ -0,0 +1,11 @@ +from sgl.operators.base_op import PreMessageOp + +import torch.nn.functional as F + +class PreNormMessageOp(PreMessageOp): + def __init__(self, p=1, dim=1): + super(PreNormMessageOp, self).__init__(dim) + self._p = p + + def _transform_x(self, x): + return F.normalize(x, p=self._p, dim=self._dim) \ No newline at end of file diff --git a/sgl/operators/utils.py b/sgl/operators/utils.py index 3b92f1c..e1f878e 100644 --- a/sgl/operators/utils.py +++ b/sgl/operators/utils.py @@ -74,17 +74,26 @@ def cuda_csr_sparse_dense_matmul(adj, feature): def adj_to_symmetric_norm(adj, r): - adj = adj + sp.eye(adj.shape[0]) - degrees = np.array(adj.sum(1)) - r_inv_sqrt_left = np.power(degrees, r - 1).flatten() + adj = adj + sp.eye(*adj.shape) + degrees_left = np.array(adj.sum(1)) + r_inv_sqrt_left = np.power(degrees_left, r - 1).flatten() r_inv_sqrt_left[np.isinf(r_inv_sqrt_left)] = 0. r_mat_inv_sqrt_left = sp.diags(r_inv_sqrt_left) - r_inv_sqrt_right = np.power(degrees, -r).flatten() + degrees_right = np.array(adj.sum(0)) + r_inv_sqrt_right = np.power(degrees_right, -r).flatten() r_inv_sqrt_right[np.isinf(r_inv_sqrt_right)] = 0. r_mat_inv_sqrt_right = sp.diags(r_inv_sqrt_right) + adj_normalized = r_mat_inv_sqrt_left.dot(adj).dot(r_mat_inv_sqrt_right) + return adj_normalized + +def adj_to_row_norm(adj): + degrees = np.array(adj.sum(1)) + r_inv_row = np.power(degrees, -1).flatten() + r_inv_row[np.isinf(r_inv_row)] = 0. + r_mat_inv_row = sp.diags(r_inv_row) - adj_normalized = adj.dot(r_mat_inv_sqrt_left).transpose().dot(r_mat_inv_sqrt_right) + adj_normalized = r_mat_inv_row.dot(adj) return adj_normalized diff --git a/sgl/sampler/__init__.py b/sgl/sampler/__init__.py new file mode 100644 index 0000000..1bfb294 --- /dev/null +++ b/sgl/sampler/__init__.py @@ -0,0 +1,13 @@ +from .sampler import FastGCNSampler, ClusterGCNSampler, GraphSAINTSampler, NeighborSampler +from .base_sampler import FullSampler, NodeWiseSampler, LayerWiseSampler, GraphWiseSampler + +__all__ = [ + "FastGCNSampler", + "ClusterGCNSampler", + "GraphSAINTSampler", + "NeighborSampler", + "FullSampler", + "NodeWiseSampler", + "LayerWiseSampler", + "GraphWiseSampler" +] diff --git a/sgl/sampler/base_sampler.py b/sgl/sampler/base_sampler.py new file mode 100644 index 0000000..29f57da --- /dev/null +++ b/sgl/sampler/base_sampler.py @@ -0,0 +1,185 @@ +import os +import torch +import numpy as np +import pickle as pkl +import scipy.sparse as sp +from scipy.sparse.linalg import norm as sparse_norm + +from sgl.data.base_data import Block +import sgl.operators.graph_op as GraphOps +from sgl.sampler.utils import adj_train_analysis +from sgl.utils import sparse_mx_to_torch_sparse_tensor, sparse_mx_to_pyg_sparse_tensor + +from sampling_ops import NodeWiseOneLayer + +SPARSE_TRANSFORM = {"pyg": sparse_mx_to_pyg_sparse_tensor, "torch": sparse_mx_to_torch_sparse_tensor} + +class BaseSampler: + def __init__(self, adj, **kwargs): + self._adj = adj + self.sampler_name = "None" + self.sample_level = "None" + self._post_sampling_op = None + self.pre_sampling = False + + if "pre_sampling_op" in kwargs.keys(): + graph_op = kwargs.pop("pre_sampling_op") + if graph_op == "LaplacianGraphOp": + graph_op = getattr(GraphOps, "LaplacianGraphOp")(r=0.5) + elif graph_op == "RwGraphOp": + graph_op = getattr(GraphOps, "RwGraphOp")() + self._adj = graph_op._construct_adj(self._adj) + + if "post_sampling_op" in kwargs.keys(): + graph_op = kwargs.pop("post_sampling_op") + if graph_op == "LaplacianGraphOp": + self._post_sampling_op = getattr(GraphOps, "LaplacianGraphOp")(r=0.5) + elif graph_op == "RwGraphOp": + self._post_sampling_op = getattr(GraphOps, "RwGraphOp")() + + self._sparse_type = kwargs.get("sparse_type", "pyg") + + self._pre_process(**kwargs) + + def _pre_process(self, **kwargs): + pass + + def _get_sample_sizes(self, **kwargs): + if "layer_sizes" in kwargs.keys(): + layer_sizes = kwargs.pop("layer_sizes").split(",") + layer_sizes = [int(layer_size) for layer_size in layer_sizes] + self.layer_sizes = layer_sizes + else: + raise ValueError("Please provide layer sizes in the form of either a list or an integer!") + self.num_layers = len(self.layer_sizes) + + def _calc_probs(self, **kwargs): + prob_type = kwargs.get("prob_type", "normalize") + save_dir = kwargs.get("save_dir", None) + if save_dir is not None: + pre_calc_path = os.path.join(save_dir, f"{prob_type}_sample_probs.npy") + if os.path.exists(pre_calc_path): + self.probs = np.load(pre_calc_path) + print(f"Load from pre-calculated sampling probability from {str(pre_calc_path)}.") + return + if prob_type == "normalize": + col_norm = sparse_norm(self._adj, axis=0) + self.probs = col_norm / np.sum(col_norm) + elif prob_type == "uniform": + self.probs = np.ones(self._adj.shape[1]) + elif prob_type == "locality": + """ + This sampling strategy refers to GNNSampler [https://github.com/ICT-GIMLab/GNNSampler] + """ + min_neighs = kwargs.get("min_neighs", 2) + sim_threshold = kwargs.get("sim_threshold", 0.1) + step = kwargs.get("step", 1) + low_quality_score = kwargs.get("low_quality_score", 0.1) + locality_score = adj_train_analysis(self._adj, min_neighs, sim_threshold, step, low_quality_score) + self.probs = locality_score / np.sum(locality_score) + else: + raise ValueError(f"Don\'t support {prob_type} probability calculation. " + "Consider pre-calculating the probability and transfer it to pre_probs.") + if save_dir is not None: + np.save(open(pre_calc_path, "wb"), self.probs) + print(f"Save the sampling probability into {str(pre_calc_path)}.") + + def _post_process(self, adjs, to_sparse_tensor=True): + if isinstance(adjs, list): + if self._post_sampling_op is not None: + adjs = [self._post_sampling_op._construct_adj(adj) for adj in adjs] + if to_sparse_tensor: + sparse_transform_func = SPARSE_TRANSFORM.get(self._sparse_type) + adjs = [sparse_transform_func(adj) for adj in adjs] + else: + if self._post_sampling_op is not None: + adjs = self._post_sampling_op._construct_adj(adjs) + if to_sparse_tensor: + sparse_transform_func = SPARSE_TRANSFORM.get(self._sparse_type) + adjs = [sparse_transform_func(adj) for adj in adjs] + return adjs + + def collate_fn(self, *args): + raise NotImplementedError + +class FullSampler(BaseSampler): + def __init__(self, adj, **kwargs): + """ + In fact, this sampler simply returns the full graph. + """ + super(FullSampler, self).__init__(adj, **kwargs) + self.sampler_name = "FullSampler" + self.sample_level = "graph" + self.pre_sampling = False + self.full_batch = kwargs.get("node_ids", range(self._adj.shape[0])) + self.full_block = Block(self._adj, self._sparse_type) + + def sampling(self): + return self.full_batch, self.full_batch, self.full_block + +class NodeWiseSampler(BaseSampler): + def __init__(self, adj, **kwargs): + super(NodeWiseSampler, self).__init__(adj, **kwargs) + self.__indptr = self._adj.indptr + self.__indices = self._adj.indices + self.__values = self._adj.data + + def _pre_process(self, **kwargs): + self._get_sample_sizes(**kwargs) + self._calc_probs(**kwargs) + self.replace = kwargs.get("replace", True) + + def one_layer_sampling(self, target_nodes, layer_size, biased): + source_nodes, (s_indptr, s_indices, s_data) = NodeWiseOneLayer(target_nodes, self.__indptr, self.__indices, self.__values, layer_size, self.probs, biased, self.replace) + subgraph_adj = sp.csr_matrix((s_data, s_indices, s_indptr), shape=(len(target_nodes), len(source_nodes))) + + return source_nodes, subgraph_adj + +class LayerWiseSampler(BaseSampler): + def __init__(self, adj, **kwargs): + super(LayerWiseSampler, self).__init__(adj, **kwargs) + + def _pre_process(self, **kwargs): + self._get_sample_sizes(**kwargs) + self._calc_probs(**kwargs) + self.replace = kwargs.get("replace", False) + + def one_layer_sampling(self, target_nodes, layer_size, probability): + subgraph_adj = self._adj[target_nodes, :] + neis = np.nonzero(np.sum(subgraph_adj, axis=0))[1] + p1 = probability[neis] + p1 = p1 / np.sum(p1) + + if self.replace is False: + layer_size = min(len(neis), layer_size) + + local_nids = np.random.choice(np.arange(np.size(neis)), + layer_size, self.replace, p1) + + source_nodes = neis[local_nids] + subgraph_adj = subgraph_adj[:, source_nodes] + sampled_p1 = p1[local_nids] + + subgraph_adj = subgraph_adj.dot(sp.diags(1.0 / (sampled_p1 * layer_size))) + return source_nodes, subgraph_adj + +class GraphWiseSampler(BaseSampler): + def __init__(self, adj, **kwargs): + super(GraphWiseSampler, self).__init__(adj, **kwargs) + + @property + def sample_graph_ops(self): + # Each subclass must implement its own sample operations + raise NotImplementedError + + def multiple_graphs_sampling(self): + if self.pre_sampling is False or self.sampling_done is False: + if self._save_dir is not None and os.path.exists(self._save_path_pt) and os.path.exists(self._save_path_pkl): + print("\nLoad from existing subgraphs.\n") + (self.perm_adjs, self.partptr, self.perm_node_idx) = torch.load(self._save_path_pt) + self.splitted_perm_adjs = pkl.load(open(self._save_path_pkl, "rb")) + else: + self.sample_graph_ops() + self.sampling_done = True + else: + print("\nSubgraphs already existed.\n") \ No newline at end of file diff --git a/sgl/sampler/sampler.py b/sgl/sampler/sampler.py new file mode 100644 index 0000000..51ee871 --- /dev/null +++ b/sgl/sampler/sampler.py @@ -0,0 +1,377 @@ +import os +import torch +import numpy as np +import pickle as pkl +import scipy.sparse as sp + +from torch_sparse import SparseTensor +from torch_geometric.utils import mask_to_index + +from sgl.data.base_data import Block +from sgl.utils import sparse_mx_to_pyg_sparse_tensor +from sgl.sampler.base_sampler import NodeWiseSampler, LayerWiseSampler, GraphWiseSampler + + +class NeighborSampler(NodeWiseSampler): + def __init__(self, adj, **kwargs): + """ + Neighborhood sampler + """ + super(NeighborSampler, self).__init__(adj, **kwargs) + self.sampler_name = "NeighborSampler" + self.sample_level = "node" + self.pre_sampling = False + + def collate_fn(self, batch_inds): + """ + Input: + batch_inds: array of batch node inds + Method: + Neighborhood sampling + Outputs: + batch_in: global node index of each source node in the first aggregation layer + batch_out: global node index of each target node in the last aggregation layer + block: sampled adjs in the form of sparse tensors wrapped in Block class + """ + if callable(batch_inds): + batch_inds = batch_inds() + if isinstance(batch_inds, torch.Tensor): + batch_inds = batch_inds.numpy() + if not isinstance(batch_inds, np.ndarray): + batch_inds = np.asarray(batch_inds) + + all_adjs = [] + + cur_tgt_nodes = batch_inds + for layer_index in range(self.num_layers): + cur_src_nodes, adj_sampled = self.one_layer_sampling(cur_tgt_nodes, self.layer_sizes[layer_index], True) + all_adjs.insert(0, adj_sampled) + cur_tgt_nodes = cur_src_nodes + + all_adjs = self._post_process(all_adjs, to_sparse_tensor=False) + + return cur_tgt_nodes, batch_inds, Block(all_adjs, self._sparse_type) + +class FastGCNSampler(LayerWiseSampler): + def __init__(self, adj, **kwargs): + super(FastGCNSampler, self).__init__(adj, **kwargs) + self.sampler_name = "FastGCNSampler" + self.sample_level = "layer" + self.pre_sampling = False + + def collate_fn(self, batch_inds): + """ + Input: + batch_inds: array of batch node inds + Method: + Sample fixed size of nodes independently at each layer. + Outputs: + batch_in: global node index of each source node in the first aggregation layer + batch_out: global node index of each target node in the last aggregation layer + block: sampled adjs in the form of sparse tensors wrapper in Block class + """ + if callable(batch_inds): + batch_inds = batch_inds() + if not isinstance(batch_inds, np.ndarray): + batch_inds = np.asarray(batch_inds) + all_adjs = [] + + cur_out_nodes = batch_inds + for layer_index in range(self.num_layers): + cur_in_nodes, cur_adj = self.one_layer_sampling( + cur_out_nodes, self.layer_sizes[layer_index], self.probs) + all_adjs.insert(0, cur_adj) + cur_out_nodes = cur_in_nodes + + all_adjs = self._post_process(all_adjs, to_sparse_tensor=False) + + return cur_out_nodes, batch_inds, Block(all_adjs, self._sparse_type) + +class ClusterGCNSampler(GraphWiseSampler): + """ + Clustering the graph, feature set and target. + """ + def __init__(self, dataset, inductive=False, **kwargs): + """ + Inputs: + adj: Adjacency matrix (Networkx Graph). + """ + super(ClusterGCNSampler, self).__init__(dataset.adj[dataset.train_idx, :][:, dataset.train_idx] if inductive else dataset.adj, **kwargs) + self.sampler_name = "ClusterGCNSampler" + self.sample_level = "graph" + self.pre_sampling = True # conduct sampling only once before training + self.sampling_done = False + self._masks = {"train": dataset.train_mask, "val": dataset.val_mask, "test": dataset.test_mask} + + @property + def sample_graph_ops(self): + if self.cluster_method == "metis": + return self._metis_clustering + else: + raise NotImplementedError + + def _pre_process(self, **kwargs): + + self.cluster_method = kwargs.get("cluster_method", "metis") + self.cluster_number = kwargs.get("cluster_number", 32) + + self._save_dir = kwargs.get("save_dir", None) + if self._save_dir is not None: + self._save_path_pt = os.path.join(self._save_dir, f"cluster_partition_{self.cluster_method}_{self.cluster_number}.pt") + self._save_path_pkl = os.path.join(self._save_dir, f"cluster_partition_{self.cluster_method}_{self.cluster_number}.pkl") + else: + self._save_path_pt = self._save_path_pkl = None + + def collate_fn(self, batch_inds, mode): + if not isinstance(batch_inds, torch.Tensor): + batch_inds = torch.tensor(batch_inds) + + # stack len(batch_inds) subgraphs into one graph + start = self.partptr[batch_inds].tolist() + end = self.partptr[batch_inds + 1].tolist() + node_idx = torch.cat([torch.arange(s, e) for s, e in zip(start, end)]) + stack_row, stack_col, stack_value = [], [], [] + num_node = 0 + for i, batch_ind in enumerate(batch_inds): + batch_ind = batch_ind.item() + perm_adj = self.splitted_perm_adjs[batch_ind] + row, col, value = perm_adj.coo() + row = row + num_node + col = col + num_node + num_node += end[i] - start[i] + stack_row.append(row) + stack_col.append(col) + stack_value.append(value) + stack_row = torch.cat(stack_row) + stack_col = torch.cat(stack_col) + stack_value = torch.cat(stack_value) + block = Block(SparseTensor(row=stack_row, col=stack_col, value=stack_value, sparse_sizes=(num_node, num_node)), sparse_type=self._sparse_type) + global_node_idx = self.perm_node_idx[node_idx] + if mode in ["train", "val", "test"]: + mask = self._masks[mode][global_node_idx] + global_inds = global_node_idx[mask] + local_inds = mask_to_index(mask) + batch_out = torch.vstack([local_inds, global_inds]) + else: + mode = mode.split("_") + batch_out = {} + for one_mode in mode: + mask = self._masks[one_mode][global_node_idx] + global_inds = global_node_idx[mask] + local_inds = mask_to_index(mask) + batch_out.update({one_mode: torch.vstack([local_inds, global_inds])}) + return global_node_idx, batch_out, block + + def _metis_clustering(self): + adj = sparse_mx_to_pyg_sparse_tensor(self._adj) + r""" + perm_adjs: SparseTensor + len(self.partptr) == self.cluster_number + 1 + len(self.perm_node_idx) = num_nodes + """ + self.perm_adjs, self.partptr, self.perm_node_idx = adj.partition(self.cluster_number, False) + self.splitted_perm_adjs = [] + for i in range(len(self.partptr)-1): + start, end = self.partptr[i], self.partptr[i+1] + node_idx = torch.arange(start, end) + perm_adj = self.perm_adjs.narrow(0, start, end-start) + perm_adj = perm_adj.index_select(1, node_idx) + self.splitted_perm_adjs.append(perm_adj) + if self._save_dir is not None: + torch.save((self.perm_adjs, self.partptr, self.perm_node_idx), self._save_path_pt) + pkl.dump(self.splitted_perm_adjs, open(self._save_path_pkl, "wb")) + print(f"\nSave Metis graph clustering results under the {self._save_dir} directory.\n") + +class GraphSAINTSampler(GraphWiseSampler): + ''' + sample the wholo graph, feature set and label as GraphSAINT method + ''' + def __init__(self, dataset, **kwargs): + """ + Inputs: + adj: Adjacency matrix: scipy.sparse.csr_matrix + """ + super(GraphSAINTSampler, self).__init__(dataset.adj, **kwargs) + + self.replace = True + self.sampler_name = "GraphSAINTSampler" + self.sample_level = "graph" + self.pre_sampling = False + self._masks = {"train": dataset.train_mask, "val": dataset.val_mask, "test": dataset.test_mask} + + def _pre_process(self, **kwargs): + self.num_node = self._adj.shape[0] + self.num_edge = self._adj.nnz + self.pre_sampling_times = kwargs.get("pre_sampling_graphs", 1) + self.used_sample_graphs = 0 + + if kwargs["sampler_type"] == "node": + kwargs.update({"prob_type": "normalize"}) + self._calc_probs(**kwargs) + self.node_probs = self.probs + self.node_budget = kwargs["nodebudget"] + elif kwargs["sampler_type"] == "edge": + self._calc_edge_probs() + self.edge_budget = kwargs["edgebudget"] + elif kwargs["sampler_type"] == "random_walk": + self.r = kwargs["r"] + self.h = kwargs["h"] + else: + raise NotImplementedError + + self.sample_graph_type = kwargs["sampler_type"] + + @property + def sample_graph_ops(self): + return getattr(self, f"{self.sample_graph_type}_sampler") + + def node_sampler(self): + """ + method: sample fixed size of nodes as a subgraph with node_probs + + Outputs: + sampled_node: global node index + block: sampled adjs, csr sparse matrix + """ + + p = self.node_probs + + sampled_node = np.random.choice(a=self.num_node, size=self.node_budget, replace=self.replace, p=p) + sampled_node = np.unique(sampled_node) + + subadj = self._adj[sampled_node, :] + subadj = subadj[:, sampled_node] + + return sampled_node, subadj + + def _calc_edge_probs(self): + """ + method: calculate edge probablity as 1/d(u)+1/d(v) + """ + degrees = self._adj.sum(axis=1).A1 + edges = self._adj.nonzero() + start_degrees = degrees[edges[0]] + end_degrees = degrees[edges[1]] + + self.edge_probs = 1 / start_degrees + 1 / end_degrees + self.edge_probs = self.edge_probs / np.sum(self.edge_probs) + + def edge_sampler(self): + """ + method: sample fixed size of edges as a subgraph with edge_probs + + Outputs: + sampled_node: global node index + block: sampled adjs, csr sparse matrix + """ + + p = self.edge_probs + sampled_edges = np.random.choice(a=self.num_edge, size=self.edge_budget, replace=self.replace, p=p) + sampled_edges = np.unique(sampled_edges) + + edges = self._adj.nonzero() + sampled_start = edges[0][sampled_edges] + sampled_end = edges[1][sampled_edges] + + sampled_node = np.unique(np.concatenate([sampled_start, sampled_end])) + + subadj = self._adj[sampled_node, :] + subadj = subadj[:, sampled_node] + + return sampled_node, subadj + + def random_walk_sampler(self): + """ + method: sample like random walk + + Outputs: + sampled_node: global node index + block: sampled adjs, csr sparse matrix + """ + root_nodes = np.random.choice(a=self.num_node, size=self.r, replace=self.replace) + sampled_node = [] + for v in root_nodes: + sampled_node.append(v) + + neighbors = self._adj.indices[self._adj.indptr[v]:self._adj.indptr[v+1]] + sampled_nei = np.random.choice(a=neighbors, size=self.h, replace=self.replace) + + sampled_node.extend(sampled_nei.tolist()) + + sampled_node = np.unique(np.array(sampled_node)) + + subadj = self._adj[sampled_node, :] + subadj = subadj[:, sampled_node] + + return sampled_node, subadj + + def _calc_norm(self): + """ + methods: calculate the norm to estimate embedding and loss + """ + self.sampled_graphs = [] + + node_value = np.zeros(self.num_node) + edge_value = sp.lil_matrix((self.num_node, self.num_node)) + + for _ in range(self.pre_sampling_times): + sampled_node, adj = self.sample_graph_ops() + adj = self._post_process(adj, to_sparse_tensor=False) + self.sampled_graphs.append((sampled_node, adj)) + adj = adj.tocoo() + for row, col in zip(adj.row, adj.col): + edge_value[sampled_node[row], sampled_node[col]] += 1 + node_value[sampled_node] += 1 + + edge_value = edge_value.tocsr().dot(sp.diags(1.0 / np.maximum(node_value, 1))) + + self.aggr_norm = edge_value + self.loss_norm = torch.FloatTensor(np.maximum(node_value, 1) / self.pre_sampling_times) + + def collate_fn(self, batch_ids, mode): + """ + Inputs: + batch_ids: only the len of it is used, means how many subgraphs are sampled to construct computation graph + + method: sample len(batch_ids) subgraphs as mini-batch + + Outputs: batch_in: global node index + batch_out: global node index + block: sampled adjs in the form of sparse tensors wrapped in Block class + """ + + adjs = [] + batch_in = [] + for _ in range(len(batch_ids)): + if self.used_sample_graphs < self.pre_sampling_times: + sampled, adj = self.sampled_graphs[self.used_sample_graphs] + self.used_sample_graphs += 1 + else: + sampled, adj = self.sample_graph_ops() + + sampled_aggr_norm = self.aggr_norm[sampled, :] + sampled_aggr_norm = sampled_aggr_norm[:, sampled] + adj = adj.multiply(sampled_aggr_norm.transpose()) + adjs.append(adj) + batch_in.extend(sampled) + + batched_adj = sp.block_diag(adjs, format='csr') + batch_in = torch.LongTensor(batch_in) + + if mode in ["train", "val", "test"]: + mask = self._masks[mode][batch_in] + global_inds = batch_in[mask] + local_inds = mask_to_index(mask) + batch_out = torch.vstack([local_inds, global_inds]) + else: + mode = mode.split("_") + batch_out = {} + for one_mode in mode: + mask = self._masks[one_mode][batch_in] + global_inds = batch_in[mask] + local_inds = mask_to_index(mask) + batch_out.update({one_mode: torch.vstack([local_inds, global_inds])}) + + self.cur_index = global_inds + + return batch_in, batch_out, Block(batched_adj, self._sparse_type) \ No newline at end of file diff --git a/sgl/sampler/sampling_ops.cpp b/sgl/sampler/sampling_ops.cpp new file mode 100644 index 0000000..ca1ab54 --- /dev/null +++ b/sgl/sampler/sampling_ops.cpp @@ -0,0 +1,307 @@ +#include + +#include "sampling_ops.hpp" + +std::mt19937 gen; + +SingleSample NodeWiseOneLayer(PyArrInt prev_nodes, PyArrInt indptr, PyArrInt indices, PyArrFloat values, int32_t layer_size, PyArrFloat probability, bool biased, bool replace) { + py::buffer_info buf_prev_nodes = prev_nodes.request(); + py::buffer_info buf_indptr = indptr.request(); + py::buffer_info buf_indices = indices.request(); + py::buffer_info buf_values = values.request(); + py::buffer_info buf_probability = probability.request(); + + int32_t* ptr_prev_nodes = static_cast (buf_prev_nodes.ptr); + int32_t* ptr_indptr = static_cast (buf_indptr.ptr); + int32_t* ptr_indices = static_cast (buf_indices.ptr); + float* ptr_values = static_cast (buf_values.ptr); + float* ptr_probability = static_cast (buf_probability.ptr); + + std::vector>> cols; // col, v + std::vector n_ids; + std::unordered_map n_id_map; + + auto out_indptr = PyArrInt(prev_nodes.size() + 1); + py::buffer_info buf_out_indptr = out_indptr.request(); + int32_t* ptr_out_indptr = static_cast (buf_out_indptr.ptr); + ptr_out_indptr[0] = 0; + + int32_t n, c, e, start_, end_, num_neighbors; + float v; + + for (int32_t i = 0; i < prev_nodes.size(); i++) { + n = ptr_prev_nodes[i]; + cols.push_back(std::vector>()); + n_id_map[n] = i; + n_ids.push_back(n); + } + + if (layer_size < 0) { + // No sampling + for (int32_t i = 0; i < prev_nodes.size(); i++) { + n = ptr_prev_nodes[i]; + start_ = ptr_indptr[n], end_ = ptr_indptr[n + 1]; + num_neighbors = end_ - start_; + + for (int32_t j = 0; j < num_neighbors; j++) { + e = start_ + j; + c = ptr_indices[e]; + v = ptr_values[e]; + + if (n_id_map.count(c) == 0) { + n_id_map[c] = n_ids.size(); + n_ids.push_back(c); + } + cols[i].push_back(std::make_tuple(n_id_map[c], v)); + } + ptr_out_indptr[i + 1] = ptr_out_indptr[i] + cols[i].size(); + } + } + else if (replace) { + // Sample with replacement + if (biased) { + for (int32_t i = 0; i < prev_nodes.size(); i++) { + n = ptr_prev_nodes[i]; + start_ = ptr_indptr[n], end_ = ptr_indptr[n + 1]; + num_neighbors = end_ - start_; + + if (num_neighbors > 0) { + std::vector temp_probability(ptr_probability + start_, ptr_probability + end_); + for (int32_t j = 0; j < layer_size; j++) { + std::discrete_distribution<> d(temp_probability.begin(), temp_probability.end()); + e = start_ + d(gen); + c = ptr_indices[e]; + v = ptr_values[e]; + + if (n_id_map.count(c) == 0) { + n_id_map[c] = n_ids.size(); + n_ids.push_back(c); + } + cols[i].push_back(std::make_tuple(n_id_map[c], v)); + } + } + ptr_out_indptr[i + 1] = ptr_out_indptr[i] + cols[i].size(); + } + } + else { + for (int32_t i = 0; i < prev_nodes.size(); i++) { + n = ptr_prev_nodes[i]; + start_ = ptr_indptr[n], end_ = ptr_indptr[n + 1]; + num_neighbors = end_ - start_; + + if (num_neighbors > 0) { + for (int32_t j = 0; j < layer_size; j++) { + e = start_ + rand() % num_neighbors; + c = ptr_indices[e]; + v = ptr_values[e]; + + if (n_id_map.count(c) == 0) { + n_id_map[c] = n_ids.size(); + n_ids.push_back(c); + } + cols[i].push_back(std::make_tuple(n_id_map[c], v)); + } + } + ptr_out_indptr[i + 1] = ptr_out_indptr[i] + cols[i].size(); + } + } + } + else { + // Sample without replacement + if (biased) { + for (int32_t i = 0; i < prev_nodes.size(); i++) { + n = ptr_prev_nodes[i]; + start_ = ptr_indptr[n], end_ = ptr_indptr[n + 1]; + num_neighbors = end_ - start_; + + if (num_neighbors <= layer_size) { + for(int32_t j = 0; j < num_neighbors; j++) { + e = start_ + j; + c = ptr_indices[e]; + v = ptr_values[e]; + + if (n_id_map.count(c) == 0) { + n_id_map[c] = n_ids.size(); + n_ids.push_back(c); + } + cols[i].push_back(std::make_tuple(n_id_map[c], v)); + } + } + else { + std::vector temp_probability(ptr_probability + start_, ptr_probability + end_); + std::discrete_distribution<> d(temp_probability.begin(), temp_probability.end()); + std::uniform_real_distribution dist(0.0, 1.0); + std::vector vals; + std::generate_n(std::back_inserter(vals), num_neighbors, [&dist]() { return dist(gen); }); + std::transform(vals.begin(), vals.end(), temp_probability.begin(), vals.begin(), [&](auto r, auto prob) { return std::pow(r, 1. / prob); }); + std::vector> valIndices; + int32_t index = 0; + std::transform(vals.begin(), vals.end(), std::back_inserter(valIndices), [&index](auto v) { return std::pair(v, index++); }); + std::sort(valIndices.begin(), valIndices.end(), [](auto x, auto y) { return x.first > y.first; }); + std::vector candidates; + std::transform(valIndices.begin(), valIndices.end(), std::back_inserter(candidates), [](auto v) { return v.second; }); + for(int32_t j = 0; j < layer_size; j++) { + e = start_ + candidates[j]; + c = ptr_indices[e]; + v = ptr_values[e]; + + if (n_id_map.count(c) == 0) { + n_id_map[c] = n_ids.size(); + n_ids.push_back(c); + } + cols[i].push_back(std::make_tuple(n_id_map[c], v)); + } + } + ptr_out_indptr[i + 1] = ptr_out_indptr[i] + cols[i].size(); + } + } + else { + // via Robert Floyd algorithm + for (int32_t i = 0; i < prev_nodes.size(); i++) { + n = ptr_prev_nodes[i]; + start_ = ptr_indptr[n], end_ = ptr_indptr[n + 1]; + num_neighbors = end_ - start_; + + std::unordered_set perm; + if (num_neighbors <= layer_size) { + for (int32_t j = 0; j < num_neighbors; j++) perm.insert(j); + } else { + for (int32_t j = num_neighbors - layer_size; j < num_neighbors; j++) { + if (!perm.insert(rand() % j).second) perm.insert(j); + } + } + + for(const int32_t &p: perm) { + e = start_ + p; + c = ptr_indices[e]; + v = ptr_values[e]; + + if (n_id_map.count(c) == 0) { + n_id_map[c] = n_ids.size(); + n_ids.push_back(c); + } + cols[i].push_back(std::make_tuple(n_id_map[c], v)); + } + ptr_out_indptr[i + 1] = ptr_out_indptr[i] + cols[i].size(); + } + } + } + + int32_t E = ptr_out_indptr[prev_nodes.size()]; + auto out_indices = PyArrInt(E); + py::buffer_info buf_out_indices = out_indices.request(); + int32_t* ptr_out_indices = static_cast (buf_out_indices.ptr); + auto out_values = PyArrFloat(E); + py::buffer_info buf_out_values = out_values.request(); + float* ptr_out_values = static_cast (buf_out_values.ptr); + + int32_t i = 0; + for (std::vector> &col_vec : cols) { + std::sort(col_vec.begin(), col_vec.end(), + [](const std::tuple &a, + const std::tuple &b) -> bool { + return std::get<0>(a) < std::get<0>(b); + }); + for (const std::tuple &value : col_vec) { + ptr_out_indices[i] = std::get<0>(value); + ptr_out_values[i] = std::get<1>(value); + i += 1; + } + } + + PyArrInt out_n_ids(n_ids.size()); + py::buffer_info buf_out_n_ids = out_n_ids.request(); + int32_t *ptr_out_n_ids = static_cast(buf_out_n_ids.ptr); + std::copy(n_ids.begin(), n_ids.end(), ptr_out_n_ids); + Adj out_adj = std::make_tuple(out_indptr, out_indices, out_values); + return std::make_pair(out_n_ids, out_adj); +} + +PyArrInt LayerWiseOneLayer(PyArrInt indices, int32_t layer_size, PyArrFloat probability, bool biased, bool replace) { + py::buffer_info buf_indices = indices.request(); + py::buffer_info buf_probability = probability.request(); + + int32_t* ptr_indices = static_cast (buf_indices.ptr); + float* ptr_probability = static_cast (buf_probability.ptr); + + std::vector neighbors(ptr_indices, ptr_indices + indices.size()); + std::sort(neighbors.begin(), neighbors.end()); + neighbors.erase(std::unique(neighbors.begin(), neighbors.end()), neighbors.end()); + std::vector n_ids; + int32_t e, c, num_neighbors = neighbors.size(); + + if (layer_size < 0) { + // No sampling + n_ids.insert(n_ids.end(), neighbors.begin(), neighbors.end()); + } else if (replace) { + // Sample with replacement + n_ids.resize(layer_size); + if (biased) { + std::vector selectedProbability(num_neighbors); + std::transform(neighbors.begin(), neighbors.end(), selectedProbability.begin(), + [&ptr_probability](int index) { return ptr_probability[index]; }); + + #pragma omp parallel for schedule(static) + for (int32_t j = 0; j < layer_size; j++) { + std::discrete_distribution<> d(selectedProbability.begin(), selectedProbability.end()); + e = d(gen); + c = neighbors[e]; + n_ids[j] = c; + } + } else { + #pragma omp parallel for schedule(static) + for (int32_t j = 0; j < layer_size; j++) { + e = rand() % num_neighbors; + c = neighbors[e]; + n_ids[j] = c; + } + } + } else { + // Sample without replacement + if (num_neighbors <= layer_size) { + n_ids.insert(n_ids.end(), neighbors.begin(), neighbors.end()); + } else if (biased) { + std::vector selectedProbability(num_neighbors); + std::transform(neighbors.begin(), neighbors.end(), selectedProbability.begin(), + [&ptr_probability](int index) { return ptr_probability[index]; }); + std::discrete_distribution<> d(selectedProbability.begin(), selectedProbability.end()); + std::uniform_real_distribution dist(0.0, 1.0); + std::vector vals; + std::generate_n(std::back_inserter(vals), num_neighbors, [&dist]() { return dist(gen); }); + std::transform(vals.begin(), vals.end(), selectedProbability.begin(), vals.begin(), [&](auto r, auto prob) { return std::pow(r, 1. / prob); }); + std::vector> valIndices; + int32_t index = 0; + std::transform(vals.begin(), vals.end(), std::back_inserter(valIndices), [&index](auto v) { return std::pair(v, index++); }); + std::sort(valIndices.begin(), valIndices.end(), [](auto x, auto y) { return x.first > y.first; }); + std::vector candidates; + std::transform(valIndices.begin(), valIndices.end(), std::back_inserter(candidates), [](auto v) { return v.second; }); + + n_ids.resize(layer_size); + #pragma omp parallel for schedule(static) + for (int32_t j = 0; j < layer_size; j++) { + c = candidates[j]; + n_ids[j] = c; + } + } else { + std::unordered_set perm; + for (int32_t j = num_neighbors - layer_size; j < num_neighbors; j++) { + if (!perm.insert(rand() % j).second) perm.insert(j); + } + for (const int32_t &p: perm) { + c = neighbors[p]; + n_ids.push_back(c); + } + } + } + + PyArrInt out_n_ids(n_ids.size()); + py::buffer_info buf_out_n_ids = out_n_ids.request(); + int32_t *ptr_out_n_ids = static_cast(buf_out_n_ids.ptr); + std::copy(n_ids.begin(), n_ids.end(), ptr_out_n_ids); + return out_n_ids; +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("NodeWiseOneLayer", &NodeWiseOneLayer); + m.def("LayerWiseOneLayer", &LayerWiseOneLayer); +} \ No newline at end of file diff --git a/sgl/sampler/sampling_ops.hpp b/sgl/sampler/sampling_ops.hpp new file mode 100644 index 0000000..11a49d6 --- /dev/null +++ b/sgl/sampler/sampling_ops.hpp @@ -0,0 +1,15 @@ +#include +#include +#include +#include + +namespace py = pybind11; +typedef py::array_t PyArrInt; +typedef py::array_t PyArrFloat; + +using Adj = std::tuple; +using SingleSample = std::tuple; + +SingleSample NodeWiseOneLayer(PyArrInt prev_nodes, PyArrInt indptr, PyArrInt indices, + PyArrFloat values, int32_t layer_size, PyArrFloat probability, bool biased, bool replace); +PyArrInt LayerWiseOneLayer(PyArrInt indices, int32_t layer_size, PyArrFloat probability, bool biased, bool replace); \ No newline at end of file diff --git a/sgl/sampler/setup.py b/sgl/sampler/setup.py new file mode 100644 index 0000000..91c77da --- /dev/null +++ b/sgl/sampler/setup.py @@ -0,0 +1,53 @@ +import os +import sys +from pathlib import Path +from setuptools import setup + +from torch.__config__ import parallel_info +from torch.utils.cpp_extension import BuildExtension, CppExtension + + +def flags_to_list(flagstring): + return list(filter(bool, flagstring.split(' '))) + + +WITH_SYMBOLS = True if os.getenv('WITH_SYMBOLS', '0') == '1' else False +CXX_FLAGS = flags_to_list(os.getenv('CXX_FLAGS', '')) +ROOT_PATH = Path(__file__).resolve().parent + + +def get_extensions(): + define_macros = [] + libraries = [] + extra_compile_args = { + 'cxx': ['-O3', '-march=native', '-std=c++17', '-g'] + CXX_FLAGS} + extra_link_args = [] if WITH_SYMBOLS else ['-s'] + + info = parallel_info() + if 'backend: OpenMP' in info and 'OpenMP not found' not in info: + extra_compile_args['cxx'] += ['-DAT_PARALLEL_OPENMP'] + if sys.platform == 'win32': + extra_compile_args['cxx'] += ['/openmp'] + else: + extra_compile_args['cxx'] += ['-fopenmp'] + else: + print('Compiling without OpenMP...') + + return [ + CppExtension( + 'sampling_ops', + ['sampling_ops.cpp'], + define_macros=define_macros, + extra_compile_args=extra_compile_args, + extra_link_args=extra_link_args, + libraries=libraries, + ), + ] + + +setup( + name='sampling_ops', + ext_modules=get_extensions(), + cmdclass={ + 'build_ext': BuildExtension.with_options(no_python_abi_suffix=True, use_ninja=False) + }) diff --git a/sgl/sampler/utils.py b/sgl/sampler/utils.py new file mode 100644 index 0000000..70cfe09 --- /dev/null +++ b/sgl/sampler/utils.py @@ -0,0 +1,50 @@ +import numpy as np +import threading + +def dot_product_ratio(ori_neighbors, good_neighbors): + s = np.sum(np.dot(ori_neighbors, good_neighbors)) + max_s = np.sum(np.power(ori_neighbors, 2)) + return s / max_s + +def adj_train_analysis(adj, minimum_neighbors, similarity_threshold, step=1, low_quality_score=0.2): + nodes_num = adj.get_shape()[0] + sample_mark = [] + + for i in range(nodes_num): + adj_coo = adj.getrow(i).tocoo() + neighbors = adj_coo.col.reshape(-1) + if len(neighbors) < minimum_neighbors: + sample_mark.append(low_quality_score) + continue + else: + avg = int(neighbors.mean()) + neighbors_length = len(neighbors) + if neighbors_length % 2 == 0: + good_neighbors = np.arange((avg-neighbors_length//2*step+step), (avg+neighbors_length//2*step+1*step), step, int) + else: + good_neighbors = np.arange((avg-neighbors_length//2*step+step), (avg+neighbors_length//2*step+2*step), step, int) + + similarity = dot_product_ratio(neighbors, good_neighbors) + if similarity > similarity_threshold: + sample_mark.append(1) + else: + sample_mark.append(low_quality_score) + + sample_mark_np = np.asarray(sample_mark) + + return sample_mark_np + +class threadsafe_generator: + """Takes an generator and makes it thread-safe by + serializing call to the `next` method of given generator. + """ + def __init__(self, gen): + self.gen = gen + self.lock = threading.Lock() + + def __iter__(self): + return self + + def __next__(self): + with self.lock: + return self.gen.__next__() \ No newline at end of file diff --git a/sgl/search/gda_hpo/search_config.py b/sgl/search/gda_hpo/search_config.py new file mode 100644 index 0000000..84dcb6a --- /dev/null +++ b/sgl/search/gda_hpo/search_config.py @@ -0,0 +1,51 @@ +from typing import List +from openbox import space as osp + +import sgl.models.homo.gda as GDAModel +import sgl.tasks as Task + +class BaseGDAConfigManager: + def __init__(self, gda_model_name: str, task_name: str, model_keys: List[str], task_keys: List[str], const_model_kwargs: dict, const_task_kwargs: dict, hier_params: dict): + self._gda_model_name = gda_model_name + self._task_name = task_name + self._model_keys = model_keys + self._task_keys = task_keys + self._const_model_kwargs = const_model_kwargs + self._const_task_kwargs = const_task_kwargs + self._config_space = osp.Space() + self._setupSpace(hier_params) + + def _configTarget(self, params: dict): + model_kwargs, task_kwargs = self._const_model_kwargs.copy(), self._const_task_kwargs.copy() + for p_name, p_value in params.items(): + if p_name in self._model_keys: + model_kwargs.update({p_name: p_value}) + elif p_name in self._task_keys: + task_kwargs.update({p_name: p_value}) + else: + raise ValueError(f"Get unexpected parameter {p_name}") + model = getattr(GDAModel, self._gda_model_name)(**model_kwargs) + task = getattr(Task, self._task_name)(model=model, **task_kwargs) + acc_res = task._execute() + + return dict(objectives=[-acc_res]) + + def _configSpace(self): + return self._config_space + + def _configFunction(self, config_space: osp.Configuration): + params = config_space.get_dictionary().copy() + result = self._configTarget(params) + return result + + def _setupSpace(self, hier_params: dict): + for cls, variables in hier_params.items(): + """ + cls: str, variable class, Real, Int, Constant + variables: dict, key = variable name (e.g., alpha, temperature), + value = variable property (e.g., lower=0, upper=1, default_value=0.4, q=0.01) + """ + variable_list = [] + for var_name, var_kwargs in variables.items(): + variable_list.append(getattr(osp, cls)(var_name, **var_kwargs)) + self._config_space.add_variables(variable_list) \ No newline at end of file diff --git a/sgl/search/search_config.py b/sgl/search/search_config.py index e22b8fa..591b74f 100644 --- a/sgl/search/search_config.py +++ b/sgl/search/search_config.py @@ -1,9 +1,4 @@ import numpy as np -<<<<<<< Updated upstream -from sgl.search.search_models import SearchModel -from sgl.search.auto_search import SearchManager -======= ->>>>>>> Stashed changes from openbox.utils.config_space import ConfigurationSpace, UniformIntegerHyperparameter from sgl.search.auto_search import SearchManager diff --git a/sgl/search/search_config_dist.py b/sgl/search/search_config_dist.py index 97c2316..dbd1685 100644 --- a/sgl/search/search_config_dist.py +++ b/sgl/search/search_config_dist.py @@ -1,4 +1,3 @@ -import argparse import numpy as np from sgl.search.auto_search_dist import SearchManagerDist from sgl.search.search_models_dist import SearchModelDist diff --git a/sgl/search/search_models.py b/sgl/search/search_models.py index fe6a9e4..c4195df 100644 --- a/sgl/search/search_models.py +++ b/sgl/search/search_models.py @@ -2,7 +2,7 @@ from sgl.models.simple_models import LogisticRegression, ResMultiLayerPerceptron from sgl.operators.graph_op import LaplacianGraphOp, PprGraphOp from sgl.operators.message_op import LastMessageOp, ConcatMessageOp, MeanMessageOp, SimpleWeightedMessageOp, \ - LearnableWeightedMessageOp, IterateLearnableWeightedMessageOp, SumMessageOp, MaxMessageOp, MinMessageOp + LearnableWeightedMessageOp, SumMessageOp, MaxMessageOp, MinMessageOp class SearchModel(BaseSGAPModel): diff --git a/sgl/tasks/__init__.py b/sgl/tasks/__init__.py index 195868c..6a9f9d8 100644 --- a/sgl/tasks/__init__.py +++ b/sgl/tasks/__init__.py @@ -1,5 +1,6 @@ from .node_classification import HeteroNodeClassification from .node_classification import NodeClassification +from .node_classification_sampling import NodeClassification_Sampling, NodeClassification_RecycleSampling from .node_clustering import NodeClustering from .node_clustering import NodeClusteringNAFS from .link_prediction import LinkPredictionGAE @@ -7,6 +8,7 @@ from .correct_and_smooth import NodeClassification_With_CorrectAndSmooth from .node_classification_with_label_use import NodeClassificationWithLabelUse from .node_classification_dist import NodeClassificationDist +from .gda_specific_tasks.node_classification_GAug import NodeClassificationGAugO, NodeClassificationGAugM __all__ = [ "NodeClassification", @@ -17,5 +19,9 @@ "LinkPredictionNAFS", "NodeClassification_With_CorrectAndSmooth", "NodeClassificationWithLabelUse", - "NodeClassificationDist" + "NodeClassificationDist", + "NodeClassification_Sampling", + "NodeClassification_RecycleSampling", + "NodeClassificationGAugO", + "NodeClassificationGAugM" ] diff --git a/sgl/tasks/gda_specific_tasks/node_classification_GAug.py b/sgl/tasks/gda_specific_tasks/node_classification_GAug.py new file mode 100644 index 0000000..03d835a --- /dev/null +++ b/sgl/tasks/gda_specific_tasks/node_classification_GAug.py @@ -0,0 +1,374 @@ +import gc +import os +import time +import torch +import torch.nn as nn +from torch.optim import Adam +from torch.utils.data import DataLoader +import torch.nn.functional as F +import numpy as np +import scipy.sparse as sp + +from sgl.tasks.base_task import BaseTask +from sgl.tasks.utils import set_seed, accuracy, MultipleOptimizer + +class NodeClassificationGAugO(BaseTask): + def __init__(self, dataset, model, lr, weight_decay, epochs, device, beta, warmup, max_patience, pretrain_ep, pretrain_nc, runs=1, verbose=True, seed=12345, pretrain_batch_size=None, train_batch_size=None, ep_lr=None): + super(NodeClassificationGAugO, self).__init__() + + self.__dataset = dataset + self.__labels = self.__dataset.y + + self.__model = model + self.__optimizer = MultipleOptimizer(Adam(model.ep_net.parameters(), lr=lr), + Adam(model.nc_net.parameters(), lr=lr, weight_decay=weight_decay)) + + self.__lr = lr + self.__ep_lr = ep_lr if ep_lr is not None else lr + self.__weight_decay = weight_decay + + self.__epochs = epochs + self.__device = device + self.__seed = seed + self.__runs = runs + self.__verbose = verbose + + self.__warmup = warmup + self.__beta = beta + self.__max_patience = max_patience + + self.__pretrain_ep = pretrain_ep + self.__pretrain_nc = pretrain_nc + self.__pretrain_batch_size = pretrain_batch_size + self.__train_batch_size = train_batch_size + + self.__test_acc = self._execute() + + @property + def test_acc(self): + return self.__test_acc + + @staticmethod + def get_lr_schedule_by_sigmoid(n_epochs, lr, warmup): + """ schedule the learning rate with the sigmoid function. + The learning rate will start with near zero and end with near lr """ + factors = torch.FloatTensor(np.arange(n_epochs)) + factors = ((factors / factors[-1]) * (warmup * 2)) - warmup + factors = torch.sigmoid(factors) + # range the factors to [0, 1] + factors = (factors - factors[0]) / (factors[-1] - factors[0]) + lr_schedule = factors * lr + return lr_schedule + + @staticmethod + def loss_fn(nc_logits, norm_w, adj_logits, adj_orig, pos_weight, labels, global_idx, beta, local_idx=None): + if labels.dim() == 2: + nc_criterion = nn.BCEWithLogitsLoss() + else: + nc_criterion = nn.CrossEntropyLoss() + if local_idx is None: + local_idx = global_idx + loss = nc_criterion(nc_logits[local_idx], labels[global_idx]) + ep_loss = norm_w * F.binary_cross_entropy_with_logits(adj_logits, adj_orig, pos_weight=pos_weight) + loss += beta * ep_loss + + return loss + + @staticmethod + def extend_batch(seed_batch, hops, adj_matrix): + nodes_batch = seed_batch + for _ in range(hops): + neigh_block = adj_matrix[nodes_batch] + nodes_batch = neigh_block.sum(0).nonzero()[1] + nodes_batch = np.setdiff1d(nodes_batch, seed_batch, assume_unique=True) + nodes_batch = np.concatenate((seed_batch, nodes_batch)) + return nodes_batch + + def _pretrain_ep_net(self, adj, features, adj_orig, norm_w, pos_weight): + """ pretrain the edge prediction network """ + optimizer = Adam(self.__model.ep_net.parameters(), lr=self.__ep_lr) + + self.__model.train() + for _ in range(self.__pretrain_ep): + adj_logits = self.__model.ep_net(adj, features) + loss = norm_w * F.binary_cross_entropy_with_logits(adj_logits, adj_orig, pos_weight=pos_weight) + if not self.__model.gae: + mu = self.__model.ep_net.mean + lgstd = self.__model.ep_net.logstd + kl_divergence = 0.5 / adj_logits.size(0) * (1 + 2*lgstd - mu**2 - torch.exp(2*lgstd)).sum(1).mean() + loss -= kl_divergence + optimizer.zero_grad() + loss.backward() + optimizer.step() + + def _minibatch_pretrain_ep_net(self, adj, features, adj_orig, norm_w, pos_weight): + """ pretrain the edge prediction network in mini-batches""" + optimizer = Adam(self.__model.ep_net.parameters(), lr=self.__ep_lr) + num_nodes = features.size(0) + train_loader = DataLoader(range(num_nodes), batch_size=self.__pretrain_batch_size, shuffle=True, drop_last=False) + + self.__model.train() + for _ in range(self.__pretrain_ep): + for node_batch in train_loader: + sub_adj_orig = adj_orig[node_batch][:, node_batch].to(self.__device) + sub_adj_logits = self.__model.ep_net(adj, features, node_batch) + loss = norm_w * F.binary_cross_entropy_with_logits(sub_adj_logits, sub_adj_orig, pos_weight=pos_weight) + if not self.__model.gae: + mu = self.__model.ep_net.mean + lgstd = self.__model.ep_net.logstd + kl_divergence = 0.5 / sub_adj_logits.size(0) * (1 + 2*lgstd - mu**2 - torch.exp(2*lgstd)).sum(1).mean() + loss -= kl_divergence + optimizer.zero_grad() + loss.backward() + optimizer.step() + + def _pretrain_nc_net(self, adj, features): + """ pretrain the node classification network """ + optimizer = Adam(self.__model.nc_net.parameters(), lr=self.__lr, weight_decay=self.__weight_decay) + # loss function for node classification + if self.__labels.dim() == 2: + nc_criterion = nn.BCEWithLogitsLoss() + else: + nc_criterion = nn.CrossEntropyLoss() + + for _ in range(self.__pretrain_nc): + self.__model.train() + nc_logits = self.__model.nc_net(features, adj) + # losses + loss = nc_criterion(nc_logits[self.__dataset.train_idx], self.__labels[self.__dataset.train_idx]) + optimizer.zero_grad() + loss.backward() + optimizer.step() + + def _train(self, adj_norm, adj_orig, features, norm_w, pos_weight, epoch, ep_lr_schedule): + # update the learning rate for ep_net if needed + if self.__warmup: + self.__optimizer.update_lr(0, ep_lr_schedule[epoch]) + + self.__model.train() + nc_logits, adj_logits = self.__model(adj_norm, adj_orig, features) + loss_train = self.loss_fn(nc_logits, norm_w, adj_logits, adj_orig, pos_weight, self.__labels, self.__dataset.train_idx, self.__beta) + acc_train = accuracy(nc_logits[self.__dataset.train_idx], self.__labels[self.__dataset.train_idx]) + self.__optimizer.zero_grad() + loss_train.backward() + self.__optimizer.step() + + return loss_train, acc_train + + def _minibatch_train(self, adj_matrix, adj_norm, adj_orig, features, norm_w, pos_weight, epoch, ep_lr_schedule): + # update the learning rate for ep_net if needed + if self.__warmup: + self.__optimizer.update_lr(0, ep_lr_schedule[epoch]) + + seed_size = self.__train_batch_size // 20 + num_batches = int((len(self.__dataset.train_idx) + seed_size - 1) / seed_size) + node_idx_all = np.array(self.__dataset.train_idx) + np.random.shuffle(node_idx_all) + seed_batches = np.array_split(node_idx_all, num_batches) + + train_loss = 0. + train_num, num_correct = 0, 0 + self.__model.train() + for seed_batch in seed_batches: + nodes_batch = self.extend_batch(seed_batch, 2, adj_matrix) + if len(nodes_batch) >= self.__train_batch_size: + nodes_batch = nodes_batch[:self.__train_batch_size] + + sub_adj_orig = adj_orig[nodes_batch][:, nodes_batch].to(self.__device) + nc_logits, sub_adj_logits = self.__model(adj_norm, sub_adj_orig, features, nodes_batch) + num_correct += nc_logits[range(len(seed_batch))].argmax(dim=1).eq(self.__labels[seed_batch]).sum().long().item() + loss = self.loss_fn(nc_logits, norm_w, sub_adj_logits, sub_adj_orig, pos_weight, self.__labels, seed_batch, self.__beta, range(len(seed_batch))) + self.__optimizer.zero_grad() + loss.backward() + self.__optimizer.step() + train_loss += loss.item() * len(seed_batch) + train_num += len(seed_batch) + + return train_loss / train_num, num_correct / train_num + + def _evaluate(self, features, adj): + self.__model.eval() + with torch.no_grad(): + nc_logits_eval = self.__model.nc_net(features, adj) + acc_val = accuracy(nc_logits_eval[self.__dataset.val_idx], self.__labels[self.__dataset.val_idx]) + acc_test = accuracy(nc_logits_eval[self.__dataset.test_idx], self.__labels[self.__dataset.test_idx]) + + return acc_val, acc_test + + def _execute(self): + set_seed(self.__seed) + + features, adj_orig, adj, adj_norm = self.__model.preprocess(self.__dataset.x, self.__dataset.adj, self.__device) + if self.__train_batch_size is not None: + adj_matrix = sp.csr_matrix(adj_orig.numpy()) + + self.__model = self.__model.to(self.__device) + self.__labels = self.__labels.to(self.__device) + + # weights for log_lik loss when training EP net + norm_w = adj_orig.shape[0]**2 / float((adj_orig.shape[0]**2 - adj_orig.sum()) * 2) + pos_weight = torch.FloatTensor([float(adj_orig.shape[0]**2 - adj_orig.sum()) / adj_orig.sum()]).to(self.__device) + + acc_test_list = [] + for _ in range(self.__runs): + # reset model parameters at the beginning of each run + self.__model.reset_parameters() + # pretrain VGAE if needed + if self.__pretrain_ep: + if self.__pretrain_batch_size is None: + self._pretrain_ep_net(adj_norm, features, adj_orig, norm_w, pos_weight) + else: + self._minibatch_pretrain_ep_net(adj_norm, features, adj_orig, norm_w, pos_weight) + # pretrain GCN if needed + if self.__pretrain_nc: + self._pretrain_nc_net(adj, features) + # get the learning rate schedule for the optimizer of ep_net if needed + if self.__warmup: + ep_lr_schedule = self.get_lr_schedule_by_sigmoid(self.__epochs, self.__lr, self.__warmup) + else: + ep_lr_schedule = None + + # keep record of the best validation accuracy for early stopping + best_acc_val, best_acc_test, patience_step = 0., 0., 0 + # train model + for epoch in range(self.__epochs): + t = time.time() + + if self.__train_batch_size is None: + loss_train, acc_train = self._train(adj_norm, adj_orig, features, norm_w, pos_weight, epoch, ep_lr_schedule) + else: + loss_train, acc_train = self._minibatch_train(adj_matrix, adj_norm, adj_orig, features, norm_w, pos_weight, epoch, ep_lr_schedule) + acc_val, acc_test = self._evaluate(features, adj) + + if self.__verbose: + print('Epoch: {:03d}'.format(epoch + 1), + 'loss_train: {:.4f}'.format(loss_train), + 'acc_train: {:.4f}'.format(acc_train), + 'acc_val: {:.4f}'.format(acc_val), + 'acc_test: {:.4f}'.format(acc_test), + 'time: {:.4f}s'.format(time.time() - t)) + + if acc_val > best_acc_val: + best_acc_val = acc_val + best_acc_test = acc_test + patience_step = 0 + else: + patience_step += 1 + if patience_step == self.__max_patience: + break + + acc_test_list.append(best_acc_test) + + # release RAM and GPU memory + del adj, features, adj_orig, adj_norm + torch.cuda.empty_cache() + gc.collect() + + return np.mean(acc_test_list) + + +class NodeClassificationGAugM(BaseTask): + def __init__(self, dataset, model, lr, weight_decay, epochs, device, runs=1, verbose=True, loss_fn=nn.CrossEntropyLoss(), seed=42, max_patience=100): + super(NodeClassificationGAugM, self).__init__() + + self.__dataset = dataset + self.__labels = self.__dataset.y + + self.__model = model + self.__optimizer = Adam(model.parameters(), lr=lr, + weight_decay=weight_decay) + self.__epochs = epochs + self.__loss_fn = loss_fn + self.__device = device + self.__seed = seed + self.__max_patience = max_patience + self.__runs = runs + self.__verbose = verbose + + self.__test_acc = self._execute() + + @property + def test_acc(self): + return self.__test_acc + + def _train(self, adj_norm, features): + self.__model.train() + pred_y = self.__model(adj_norm, features)[self.__dataset.train_idx] + ground_truth_y = self.__labels[self.__dataset.train_idx] + loss_train = self.__loss_fn(pred_y, ground_truth_y) + acc_train = accuracy(pred_y, ground_truth_y) + + self.__optimizer.zero_grad() + loss_train.backward() + self.__optimizer.step() + + return loss_train, acc_train + + def _evaluate(self, adj_norm, features): + self.__model.eval() + with torch.no_grad(): + pred_y = self.__model(adj_norm, features) + acc_val = accuracy(pred_y[self.__dataset.val_idx], self.__labels[self.__dataset.val_idx]) + acc_test = accuracy(pred_y[self.__dataset.test_idx], self.__labels[self.__dataset.test_idx]) + + return acc_val, acc_test + + def _execute(self): + set_seed(self.__seed) + + pre_time_st = time.time() + adj_pred_dir = os.path.join(self.__dataset.processed_dir, "GAugM_edge_probabilities") + adj, features = self.__model.preprocess(self.__dataset.adj, self.__dataset.x, adj_pred_dir, self.__device) + pre_time_ed = time.time() + if self.__verbose: + print(f"Preprocessing done in {(pre_time_ed - pre_time_st):.4f}s") + + self.__model = self.__model.to(self.__device) + self.__labels = self.__labels.to(self.__device) + + acc_val_list = [] + acc_test_list = [] + + for _ in range(self.__runs): + self.__model.reset_parameters() + t_total = time.time() + best_val = 0. + best_test = 0. + patience = 0 + for epoch in range(self.__epochs): + t = time.time() + loss_train, acc_train = self._train(adj, features) + acc_val, acc_test = self._evaluate(adj, features) + + if self.__verbose: + print('Epoch: {:03d}'.format(epoch + 1), + 'loss_train: {:.4f}'.format(loss_train), + 'acc_train: {:.4f}'.format(acc_train), + 'acc_val: {:.4f}'.format(acc_val), + 'acc_test: {:.4f}'.format(acc_test), + 'time: {:.4f}s'.format(time.time() - t)) + + if acc_val > best_val: + best_val = acc_val + best_test = acc_test + patience = 0 + else: + patience += 1 + if patience == self.__max_patience: + break + + acc_val_list.append(best_val) + acc_test_list.append(best_test) + + mean_acc_test = np.mean(acc_test_list) + if self.__verbose: + print("Optimization Finished!") + print("Total time elapsed: {:.4f}s".format(time.time() - t_total)) + print(f'Best val: {np.mean(acc_val_list):.4f}, best test: {mean_acc_test:.4f}') + + del adj, features + torch.cuda.empty_cache() + gc.collect() + + return mean_acc_test + diff --git a/sgl/tasks/node_classification.py b/sgl/tasks/node_classification.py index e352d4c..34aa3ca 100644 --- a/sgl/tasks/node_classification.py +++ b/sgl/tasks/node_classification.py @@ -3,14 +3,18 @@ import torch.nn as nn from torch.optim import Adam from torch.utils.data import DataLoader +import numpy as np +from typing import Callable from sgl.tasks.base_task import BaseTask -from sgl.tasks.utils import accuracy, set_seed, train, mini_batch_train, evaluate, mini_batch_evaluate +from sgl.tasks.utils import accuracy, set_seed +from sgl.tasks.utils import train as vanilla_train, evaluate as vanilla_evaluate +from sgl.tasks.utils import mini_batch_train as vanilla_mini_batch_train, mini_batch_evaluate as vanilla_mini_batch_evaluate class NodeClassification(BaseTask): def __init__(self, dataset, model, lr, weight_decay, epochs, device, loss_fn=nn.CrossEntropyLoss(), seed=42, - train_batch_size=None, eval_batch_size=None): + patience=100, runs=1, verbose=True, train_batch_size=None, eval_batch_size=None): super(NodeClassification, self).__init__() self.__dataset = dataset @@ -23,6 +27,9 @@ def __init__(self, dataset, model, lr, weight_decay, epochs, device, loss_fn=nn. self.__loss_fn = loss_fn self.__device = device self.__seed = seed + self.__patience = patience + self.__runs = runs + self.__verbose = verbose self.__mini_batch = False if train_batch_size is not None: @@ -46,49 +53,74 @@ def _execute(self): set_seed(self.__seed) pre_time_st = time.time() - self.__model.preprocess(self.__dataset.adj, self.__dataset.x) + self.__model.preprocess(self.__dataset.adj, self.__dataset.x, self.__device) pre_time_ed = time.time() - print(f"Preprocessing done in {(pre_time_ed - pre_time_st):.4f}s") + if self.__verbose: + print(f"Preprocessing done in {(pre_time_ed - pre_time_st):.4f}s") self.__model = self.__model.to(self.__device) self.__labels = self.__labels.to(self.__device) - t_total = time.time() - best_val = 0. - best_test = 0. - for epoch in range(self.__epochs): - t = time.time() - if self.__mini_batch is False: - loss_train, acc_train = train(self.__model, self.__dataset.train_idx, self.__labels, self.__device, - self.__optimizer, self.__loss_fn) - acc_val, acc_test = evaluate(self.__model, self.__dataset.val_idx, self.__dataset.test_idx, - self.__labels, self.__device) - else: - loss_train, acc_train = mini_batch_train(self.__model, self.__dataset.train_idx, self.__train_loader, - self.__labels, self.__device, self.__optimizer, self.__loss_fn) - acc_val, acc_test = mini_batch_evaluate(self.__model, self.__dataset.val_idx, self.__val_loader, - self.__dataset.test_idx, self.__test_loader, self.__labels, - self.__device) + best_test_list = [] + for _ in range(self.__runs): + + self.__model.reset_parameters() + t_total = time.time() + best_val = 0. + best_test = 0. + patience = 0 + for epoch in range(self.__epochs): + t = time.time() + if self.__mini_batch is False: + train = self.__model.model_train if hasattr(self.__model, "model_train") and isinstance(self.__model.model_train, Callable) \ + else vanilla_train + loss_train, acc_train = train(self.__model, self.__dataset.train_idx, self.__labels, self.__device, + self.__optimizer, self.__loss_fn, accuracy) + + evaluate = self.__model.model_evaluate if hasattr(self.__model, "model_evaluate") and isinstance(self.__model.model_evaluate, Callable) \ + else vanilla_evaluate + acc_val, acc_test = evaluate(self.__model, self.__dataset.val_idx, self.__dataset.test_idx, + self.__labels, self.__device, accuracy) + else: + mini_batch_train = self.__model.model_mini_batch_train if hasattr(self.__model, "model_mini_batch_train") and isinstance(self.__model.model_mini_batch_train, Callable) \ + else vanilla_mini_batch_train + loss_train, acc_train = mini_batch_train(self.__model, self.__dataset.train_idx, self.__train_loader, + self.__labels, self.__device, self.__optimizer, self.__loss_fn) + mini_batch_evaluate = self.__model.model_mini_batch_evaluate if hasattr(self.__model, "model_mini_batch_evaluate") and isinstance(self.__model.model_mini_batch_evaluate, Callable) \ + else vanilla_mini_batch_evaluate + acc_val, acc_test = mini_batch_evaluate(self.__model, self.__dataset.val_idx, self.__val_loader, + self.__dataset.test_idx, self.__test_loader, self.__labels, + self.__device) + if self.__verbose: + print('Epoch: {:03d}'.format(epoch + 1), + 'loss_train: {:.4f}'.format(loss_train), + 'acc_train: {:.4f}'.format(acc_train), + 'acc_val: {:.4f}'.format(acc_val), + 'acc_test: {:.4f}'.format(acc_test), + 'time: {:.4f}s'.format(time.time() - t)) + + if acc_val > best_val: + patience = 0 + best_val = acc_val + best_test = acc_test + else: + patience += 1 + if patience == self.__patience: + break - print('Epoch: {:03d}'.format(epoch + 1), - 'loss_train: {:.4f}'.format(loss_train), - 'acc_train: {:.4f}'.format(acc_train), - 'acc_val: {:.4f}'.format(acc_val), - 'acc_test: {:.4f}'.format(acc_test), - 'time: {:.4f}s'.format(time.time() - t)) + acc_val, acc_test = self._postprocess() if acc_val > best_val: best_val = acc_val best_test = acc_test - acc_val, acc_test = self._postprocess() - if acc_val > best_val: - best_val = acc_val - best_test = acc_test - - print("Optimization Finished!") - print("Total time elapsed: {:.4f}s".format(time.time() - t_total)) - print(f'Best val: {best_val:.4f}, best test: {best_test:.4f}') - return best_test + best_test_list.append(best_test) + if self.__verbose: + print("Optimization Finished!") + print("Total time elapsed: {:.4f}s".format(time.time() - t_total)) + print(f'Best val: {best_val:.4f}, best test: {best_test:.4f}') + + mean_best_test = np.mean(best_test_list) + return mean_best_test def _postprocess(self): self.__model.eval() @@ -184,14 +216,14 @@ def _execute(self, random_subgraph_num=-1, subgraph_edge_type_num=-1, for epoch in range(self.__epochs): t = time.time() if self.__mini_batch is False: - loss_train, acc_train = train(self.__model, self.__dataset.train_idx, self.__labels, self.__device, + loss_train, acc_train = vanilla_train(self.__model, self.__dataset.train_idx, self.__labels, self.__device, self.__optimizer, self.__loss_fn) - acc_val, acc_test = evaluate(self.__model, self.__dataset.val_idx, self.__dataset.test_idx, + acc_val, acc_test = vanilla_evaluate(self.__model, self.__dataset.val_idx, self.__dataset.test_idx, self.__labels, self.__device) else: - loss_train, acc_train = mini_batch_train(self.__model, self.__dataset.train_idx, self.__train_loader, + loss_train, acc_train = vanilla_mini_batch_train(self.__model, self.__dataset.train_idx, self.__train_loader, self.__labels, self.__device, self.__optimizer, self.__loss_fn) - acc_val, acc_test = mini_batch_evaluate(self.__model, self.__dataset.val_idx, self.__val_loader, + acc_val, acc_test = vanilla_mini_batch_evaluate(self.__model, self.__dataset.val_idx, self.__val_loader, self.__dataset.test_idx, self.__test_loader, self.__labels, self.__device) diff --git a/sgl/tasks/node_classification_sampling.py b/sgl/tasks/node_classification_sampling.py new file mode 100644 index 0000000..eb90d9a --- /dev/null +++ b/sgl/tasks/node_classification_sampling.py @@ -0,0 +1,353 @@ +import time +import torch +import numpy as np +from typing import Callable +from torch.optim import Adam +import torch.nn.functional as F +from torch.utils.data import DataLoader + +from sgl.data.utils import RandomLoader, SplitLoader +from sgl.tasks.base_task import BaseTask +from sgl.tasks.utils import accuracy, set_seed, train, evaluate +from sgl.tasks.utils import mini_batch_train as vanilla_mini_batch_train, mini_batch_evaluate as vanilla_mini_batch_evaluate + + +class NodeClassification_Sampling(BaseTask): + def __init__(self, dataset, model, lr, weight_decay, epochs, device, loss_fn="nll_loss", seed=42, + inductive=False, train_batch_size=None, eval_batch_size=None, eval_freq=1, eval_start=1, runs=1, verbose=True, max_patience=50, **kwargs): + super(NodeClassification_Sampling, self).__init__() + + self.__dataset = dataset + + self.__model = model + self.__optimizer = Adam(model.parameters(), lr=lr, + weight_decay=weight_decay) + self.__epochs = epochs + self.__eval_freq = eval_freq + self.__eval_start = eval_start + self.__loss_fn = getattr(F, loss_fn) if isinstance(loss_fn, str) else loss_fn + self.__device = device + self.__seed = seed + self.__runs = runs + self.__verbose = verbose + self.__max_patience = max_patience + self.__inductive = inductive + self.__train_batch_size= train_batch_size + self.__eval_batch_size = eval_batch_size + self.__mini_batch_train = True if train_batch_size is not None else False + self.__mini_batch_eval = True if eval_batch_size is not None else False + self.__train_determined_sample = False + self.__eval_determined_sample = False + self.__eval_together = kwargs.get("eval_together", False) + if "train_graph_number" in kwargs.keys(): + self.__train_graph_number = kwargs["train_graph_number"] + self.__train_determined_sample = True + if "eval_graph_number" in kwargs.keys(): + self.__eval_graph_number = kwargs["eval_graph_number"] + self.__eval_determined_sample = True + self.__train_num_workers = kwargs.get("train_num_workers", 0) + self.__eval_num_workers = kwargs.get("eval_num_workers", 0) + self.__pin_memory = kwargs.get("pin_memory", False) + self.__test_acc = self._execute() + + @property + def test_acc(self): + return self.__test_acc + + def _execute(self): + set_seed(self.__seed) + + pre_time_st = time.time() + mini_batch = self.__mini_batch_train and self.__mini_batch_eval + kwargs = {"mini_batch": mini_batch} + if self.__inductive is True: + kwargs.update({"inductive": self.__inductive, "train_idx": self.__dataset.train_idx}) + self.__model.preprocess(adj=self.__dataset.adj, x=self.__dataset.x, y=self.__dataset.y, device=self.__device, **kwargs) + pre_time_ed = time.time() + if self.__verbose: + print(f"Preprocessing done in {(pre_time_ed - pre_time_st):.4f}s") + + if self.__mini_batch_train: + if self.__train_determined_sample: + self.__model.pre_sample("train") + self.__train_loader = DataLoader( + range(self.__train_graph_number), batch_size=self.__train_batch_size, num_workers=self.__train_num_workers, collate_fn=lambda x: self.__model.collate_fn(x, "train"), shuffle=True, drop_last=False, pin_memory=self.__pin_memory) + else: + if self.__inductive is False: + self.__train_loader = DataLoader( + self.__dataset.train_idx, batch_size=self.__train_batch_size, num_workers=self.__train_num_workers, collate_fn=self.__model.train_collate_fn, shuffle=True, drop_last=False, pin_memory=self.__pin_memory) + else: + self.__train_loader = DataLoader( + range(len(self.__dataset.train_idx)), batch_size=self.__train_batch_size, num_workers=self.__train_num_workers, collate_fn=self.__model.train_collate_fn, shuffle=True, drop_last=False, pin_memory=self.__pin_memory) + + if self.__mini_batch_eval: + if self.__eval_determined_sample: + self.__model.pre_sample("eval") + self.__val_loader = DataLoader( + range(self.__eval_graph_number), batch_size=self.__eval_batch_size, num_workers=self.__eval_num_workers, collate_fn=lambda x: self.__model.collate_fn(x, "val"), shuffle=False, drop_last=False, pin_memory=self.__pin_memory) + self.__test_loader = DataLoader( + range(self.__eval_graph_number), batch_size=self.__eval_batch_size, num_workers=self.__eval_num_workers, collate_fn=lambda x: self.__model.collate_fn(x, "test"), shuffle=False, drop_last=False, pin_memory=self.__pin_memory) + self.__all_eval_loader = DataLoader( + range(self.__eval_graph_number), batch_size=self.__eval_batch_size, num_workers=self.__eval_num_workers, collate_fn=lambda x: self.__model.collate_fn(x, "val_test"), shuffle=False, drop_last=False, pin_memory=self.__pin_memory) + else: + if self.__eval_together is False: + self.__val_loader = DataLoader( + self.__dataset.val_idx, batch_size=self.__eval_batch_size, num_workers=self.__eval_num_workers, collate_fn=self.__model.eval_collate_fn, shuffle=False, drop_last=False, pin_memory=self.__pin_memory) + self.__test_loader = DataLoader( + self.__dataset.test_idx, batch_size=self.__eval_batch_size, num_workers=self.__eval_num_workers, collate_fn=self.__model.eval_collate_fn, shuffle=False, drop_last=False, pin_memory=self.__pin_memory) + self.__all_eval_loader = DataLoader( + self.__dataset.node_ids, batch_size=self.__eval_batch_size, num_workers=self.__eval_num_workers, collate_fn=self.__model.eval_collate_fn, shuffle=False, drop_last=False, pin_memory=self.__pin_memory) + + self.__model = self.__model.to(self.__device) + + best_test_list = [] + for _ in range(self.__runs): + self.__model.reset_parameters() + + t_total = time.time() + patience = 0 + best_val = 0. + best_test = 0. + + for epoch in range(self.__epochs): + t = time.time() + if self.__mini_batch_train: + mini_batch_train = self.__model.model_train if hasattr(self.__model, "model_train") and isinstance(self.__model.model_train, Callable) \ + else vanilla_mini_batch_train + loss_train, acc_train = mini_batch_train(self.__model, self.__train_loader, self.__inductive, self.__device, self.__optimizer, self.__loss_fn) + else: + loss_train, acc_train = train(self.__model, self.__dataset.train_idx, self.__optimizer, self.__loss_fn) + + if epoch + 1 >= self.__eval_start and (epoch + 1) % self.__eval_freq == 0: + if self.__mini_batch_eval: + if self.__eval_together is False: + mini_batch_evaluate = self.__model.model_evaluate if hasattr(self.__model, "model_evaluate") and isinstance(self.__model.model_evaluate, Callable) \ + else vanilla_mini_batch_evaluate + acc_val, acc_test = mini_batch_evaluate(self.__model, self.__val_loader, self.__test_loader, self.__device) + else: + self.__model.eval() + outputs = self.__model.inference(self.__all_eval_loader, self.__device) + acc_train = accuracy(outputs[self.__dataset.train_idx], self.__dataset.y[self.__dataset.train_idx]) + acc_val = accuracy(outputs[self.__dataset.val_idx], self.__dataset.y[self.__dataset.val_idx]) + acc_test = accuracy(outputs[self.__dataset.test_idx], self.__dataset.y[self.__dataset.test_idx]) + else: + acc_val, acc_test = evaluate(self.__model, self.__dataset.val_idx, self.__dataset.test_idx) + + if acc_val > best_val: + best_val = acc_val + best_test = acc_test + patience = 0 + else: + patience += 1 + if patience == self.__max_patience: + break + + if self.__verbose: + print('Epoch: {:03d}'.format(epoch + 1), + 'loss_train: {:.4f}'.format(loss_train), + 'acc_train: {:.4f}'.format(acc_train), + 'acc_val: {:.4f}'.format(acc_val), + 'acc_test: {:.4f}'.format(acc_test), + 'time: {:.4f}s'.format(time.time() - t)) + else: + if self.__verbose: + print('Epoch: {:03d}'.format(epoch + 1), + 'loss_train: {:.4f}'.format(loss_train), + 'acc_train: {:.4f}'.format(acc_train), + 'time: {:.4f}s'.format(time.time() - t)) + + acc_val, acc_test = self._postprocess() + if acc_val > best_val: + best_val = acc_val + best_test = acc_test + + best_test_list.append(best_test) + + print("Optimization Finished!") + print("Total time elapsed: {:.4f}s".format(time.time() - t_total)) + print(f'Best val: {best_val:.4f}, best test: {best_test:.4f}') + + return np.mean(best_test_list) + + def _postprocess(self): + self.__model.eval() + if self.__eval_determined_sample is False: + if self.__mini_batch_eval is False: + outputs, labels = self.__model.full_batch_prepare_forward( + self.__dataset.node_ids) + else: + outputs = self.__model.inference(self.__all_eval_loader, self.__device) + labels = self.__dataset.y + + # TODO: self.__model.postprocess now directly returns the raw outputs + final_output = self.__model.postprocess(self.__dataset.adj, outputs) + acc_val = accuracy( + final_output[self.__dataset.val_idx], labels[self.__dataset.val_idx]) + acc_test = accuracy( + final_output[self.__dataset.test_idx], labels[self.__dataset.test_idx]) + else: + val_outputs, val_labels = [], [] + test_outputs, test_labels = [], [] + for batch in self.__all_eval_loader: + batch_in, batch_out, block = batch + output = self.__model.model_forward(batch_in, block, self.__device) + output = self.__model.postprocess(block, output) + val_local_inds, val_global_inds = batch_out["val"] + test_local_inds, test_global_inds = batch_out["test"] + val_outputs.append(output[val_local_inds]) + val_labels.append(self.__labels[val_global_inds]) + test_outputs.append(output[test_local_inds]) + test_labels.append(self.__labels[test_global_inds]) + val_outputs = torch.vstack(val_outputs) + val_labels = torch.cat(val_labels) + test_outputs = torch.vstack(test_outputs) + test_labels = torch.cat(test_labels) + + acc_val = accuracy(val_outputs, val_labels) + acc_test = accuracy(test_outputs, test_labels) + + return acc_val, acc_test + +class NodeClassification_RecycleSampling(BaseTask): + def __init__(self, dataset, model, lr, weight_decay, num_iters, device, loss_fn="nll_loss", seed=42, + train_batch_size=1024, eval_batch_size=None, **kwargs): + super(NodeClassification_RecycleSampling, self).__init__() + + self.__dataset = dataset + self.__labels = self.__dataset.y + + self.__model = model + self.__optimizer = Adam(model.parameters(), lr=lr, + weight_decay=weight_decay) + self.__num_iters = num_iters + self.__loss_fn = getattr(F, loss_fn) if isinstance(loss_fn, str) else loss_fn + self.__device = device + self.__seed = seed + self.__train_loader = RandomLoader(dataset.train_idx, train_batch_size) + if eval_batch_size is not None: + self.__val_loader = SplitLoader(dataset.val_idx, eval_batch_size) + self.__test_loader = SplitLoader(dataset.test_idx, eval_batch_size) + self.__eval_minibatch = True + else: + self.__val_loader = self.__test_loader = None + self.__eval_minibatch = False + + self.__test_acc = self._execute() + + @property + def test_acc(self): + return self.__test_acc + + def _execute(self): + set_seed(self.__seed) + + pre_time_st = time.time() + self.__model.preprocess(adj=self.__dataset.adj, x=self.__dataset.x, val_dataloader=self.__val_loader, test_dataloader=self.__test_loader) + pre_time_ed = time.time() + print(f"Preprocessing done in {(pre_time_ed - pre_time_st):.4f}s") + + iter_cnt = 0 + val_score = 0 + best_val_score = 0 + + torch.cuda.synchronize() + train_time_st = time.time() + taus = self.__model.generate_taus(self.__num_iters) + + iter_id = 0 + generator = self.__model.flash_sampling(len(taus), self.__train_loader) + + for batch_in, batch_out, block in generator: + + batch_x = self.__model.processed_feature[batch_in].to(self.__device) + batch_y = self.__labels[batch_out].to(self.__device) + block.to_device(self.__device) + + for rec_itr in range(taus[iter_id]): + self.__optimizer.zero_grad() + + recycle_vector = None + new_batch_y = batch_y + + if rec_itr != 0: + recycle_vector = torch.cuda.FloatTensor(len(batch_out)).uniform_() > 0.2 + new_batch_y = batch_y[recycle_vector] + + self.__model.train() + pred = self.__model.model_forward(batch_x, block) + + if recycle_vector is not None: + pred = pred[recycle_vector] + + loss = self.__loss_fn(pred, new_batch_y) + loss.backward() + self.__optimizer.step() + + val_score = self._validation(iter_cnt, prev_score=val_score) + test_score = self._inference() + + if val_score > best_val_score: + best_val_score = val_score + best_test_score = test_score + + print('Iteration: {:03d}'.format(iter_cnt + 1), + 'loss_train: {:.4f}'.format(loss), + 'acc_val: {:.4f}'.format(val_score), + 'acc_test: {:.4f}'.format(test_score)) + + iter_cnt += 1 + + iter_id += 1 + + torch.cuda.synchronize() + train_time_ed = time.time() + print(f"Trianing done in {(train_time_ed - train_time_st):.4f}s") + print(f'Best val acc: {best_val_score:.4f}') + print(f'Best test acc: {best_test_score:.4f}') + + return best_test_score + + def _validation(self, iter_cnt, prev_score=None, val_freq=1): + if (iter_cnt + 1) % val_freq == 0: + self.__model.eval() + if self.__eval_minibatch is False: + val_y = self.__labels[self.__dataset.val_idx].to(self.__device) + val_pred = self.__model.model_forward(use_full=True)[self.__dataset.val_idx] + val_score = accuracy(val_pred, val_y) + else: + val_scores = [] + val_samples = self.__model.sequential_sampling(do_val=True) + for val_batch_in, val_batch_out, val_block in val_samples: + val_batch_x = self.__model.processed_feature[val_batch_in].to(self.__device) + val_batch_y = self.__labels[val_batch_out].to(self.__device) + val_block.to_device(self.__device) + + pred = self.__model.model_forward(val_batch_x, val_block) + val_score = accuracy(pred, val_batch_y) + val_batch_size = len(val_batch_out) + val_scores.append(val_score * val_batch_size) + val_score = np.sum(val_scores) / len(self.__dataset.val_idx) + return val_score + else: + return prev_score + + def _inference(self): + self.__model.eval() + if self.__eval_minibatch is False: + test_y = self.__labels[self.__dataset.test_idx].to(self.__device, non_blocking=True) + test_pred = self.__model.model_forward(use_full=True)[self.__dataset.test_idx] + test_score = accuracy(test_pred, test_y) + else: + test_scores = [] + test_samples = self.__model.sequential_sampling(do_val=False) + for test_batch_in, test_batch_out, test_block in test_samples: + test_batch_x = self.__model.processed_feature[test_batch_in].to(self.__device) + test_batch_y = self.__labels[test_batch_out].to(self.__device) + test_block.to_device(self.__device) + + pred = self.__model.model_forward(test_batch_x, test_block) + test_score = accuracy(pred, test_batch_y) + test_batch_size = len(test_batch_out) + test_scores.append(test_score * test_batch_size) + test_score = np.sum(test_scores) / len(self.__dataset.test_idx) + return test_score \ No newline at end of file diff --git a/sgl/tasks/utils.py b/sgl/tasks/utils.py index 3a8a631..9f61a90 100644 --- a/sgl/tasks/utils.py +++ b/sgl/tasks/utils.py @@ -1,7 +1,5 @@ -import random -import math import torch -import torch.nn.functional as F +import random import numpy as np import scipy.sparse as sp from sklearn.cluster import KMeans @@ -35,65 +33,74 @@ def add_labels(features, labels, idx, num_classes): onehot[idx, labels[idx]] = 1 return np.concatenate([features, onehot], axis=-1) -def evaluate(model, val_idx, test_idx, labels, device): +@torch.no_grad() +def evaluate(model, val_idx, test_idx): model.eval() - val_output = model.model_forward(val_idx, device) - test_output = model.model_forward(test_idx, device) + output, y = model(model.processed_feature, model.processed_block) - acc_val = accuracy(val_output, labels[val_idx]) - acc_test = accuracy(test_output, labels[test_idx]) + acc_val = accuracy(output[val_idx], y[val_idx]) + acc_test = accuracy(output[test_idx], y[test_idx]) return acc_val, acc_test - -def mini_batch_evaluate(model, val_idx, val_loader, test_idx, test_loader, labels, device): +@torch.no_grad() +def mini_batch_evaluate(model, val_loader, test_loader, device): model.eval() correct_num_val, correct_num_test = 0, 0 + + val_num = 0 for batch in val_loader: - val_output = model.model_forward(batch, device) - pred = val_output.max(1)[1].type_as(labels) - correct_num_val += pred.eq(labels[batch]).double().sum() - acc_val = correct_num_val / len(val_idx) + val_output, out_y = model.mini_batch_prepare_forward(batch, device) + pred = val_output.max(1)[1].type_as(out_y) + correct_num_val += pred.eq(out_y).double().sum() + val_num += len(out_y) + + acc_val = correct_num_val / val_num + test_num = 0 for batch in test_loader: - test_output = model.model_forward(batch, device) - pred = test_output.max(1)[1].type_as(labels) - correct_num_test += pred.eq(labels[batch]).double().sum() - acc_test = correct_num_test / len(test_idx) + test_output, out_y = model.mini_batch_prepare_forward(batch, device) + pred = test_output.max(1)[1].type_as(out_y) + correct_num_test += pred.eq(out_y).double().sum() + test_num += len(out_y) + acc_test = correct_num_test / test_num return acc_val.item(), acc_test.item() -def train(model, train_idx, labels, device, optimizer, loss_fn): +def train(model, train_idx, optimizer, loss_fn): model.train() optimizer.zero_grad() - train_output = model.model_forward(train_idx, device) - loss_train = loss_fn(train_output, labels[train_idx]) - acc_train = accuracy(train_output, labels[train_idx]) + train_output, out_y = model.full_batch_prepare_forward(train_idx) + loss_train = loss_fn(train_output, out_y) + acc_train = accuracy(train_output, out_y) loss_train.backward() optimizer.step() return loss_train.item(), acc_train -def mini_batch_train(model, train_idx, train_loader, labels, device, optimizer, loss_fn): +def mini_batch_train(model, train_loader, inductive, device, optimizer, loss_fn): model.train() correct_num = 0 loss_train_sum = 0. + train_num = 0 + for batch in train_loader: - train_output = model.model_forward(batch, device) - loss_train = loss_fn(train_output, labels[batch]) + optimizer.zero_grad() - pred = train_output.max(1)[1].type_as(labels) - correct_num += pred.eq(labels[batch]).double().sum() + train_output, out_y = model.mini_batch_prepare_forward(batch, device, inductive=inductive) + loss_train = loss_fn(train_output, out_y) + pred = train_output.max(1)[1].type_as(out_y) + correct_num += pred.eq(out_y).double().sum() loss_train_sum += loss_train.item() - - optimizer.zero_grad() + train_num += len(out_y) + loss_train.backward() optimizer.step() loss_train = loss_train_sum / len(train_loader) - acc_train = correct_num / len(train_idx) + acc_train = correct_num / train_num return loss_train, acc_train.item() @@ -431,3 +438,23 @@ def sparse_mx_to_torch_sparse_tensor(sparse_mx): values = torch.from_numpy(sparse_mx.data) shape = torch.Size(sparse_mx.shape) return torch.sparse.FloatTensor(indices, values, shape) + +class MultipleOptimizer(): + """ a class that wraps multiple optimizers """ + def __init__(self, *op): + self.optimizers = op + + def zero_grad(self): + for op in self.optimizers: + op.zero_grad() + + def step(self): + for op in self.optimizers: + op.step() + + def update_lr(self, op_index, new_lr): + """ update the learning rate of one optimizer + Parameters: op_index: the index of the optimizer to update + new_lr: new learning rate for that optimizer """ + for param_group in self.optimizers[op_index].param_groups: + param_group['lr'] = new_lr \ No newline at end of file diff --git a/sgl/utils/__init__.py b/sgl/utils/__init__.py index d493058..d6768a4 100644 --- a/sgl/utils/__init__.py +++ b/sgl/utils/__init__.py @@ -1,5 +1,8 @@ from .auto_choose_gpu import GpuWithMaxFreeMem +from .basic_operations import sparse_mx_to_torch_sparse_tensor, sparse_mx_to_pyg_sparse_tensor __all__ = [ "GpuWithMaxFreeMem", + "sparse_mx_to_torch_sparse_tensor", + "sparse_mx_to_pyg_sparse_tensor" ] diff --git a/sgl/utils/basic_operations.py b/sgl/utils/basic_operations.py new file mode 100644 index 0000000..c5c397d --- /dev/null +++ b/sgl/utils/basic_operations.py @@ -0,0 +1,21 @@ +import torch +import numpy as np +from torch_sparse import SparseTensor + +def sparse_mx_to_torch_sparse_tensor(sparse_mx): + """Convert a scipy sparse matrix to a torch sparse tensor.""" + sparse_mx = sparse_mx.tocoo().astype(np.float32) + indices = torch.from_numpy( + np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)) + values = torch.from_numpy(sparse_mx.data) + shape = torch.Size(sparse_mx.shape) + return torch.sparse.FloatTensor(indices, values, shape) + +def sparse_mx_to_pyg_sparse_tensor(sparse_mx): + """Convert a scipy sparse matrix to a PyG SparseTensor""" + sparse_mx = sparse_mx.tocoo() + row = torch.from_numpy(sparse_mx.row).to(torch.long) + col = torch.from_numpy(sparse_mx.col).to(torch.long) + value = torch.from_numpy(sparse_mx.data) + sparse_sizes = torch.Size(sparse_mx.shape) + return SparseTensor(row=row, col=col, value=value, sparse_sizes=sparse_sizes, is_sorted=True, trust_data=True) \ No newline at end of file