-
Notifications
You must be signed in to change notification settings - Fork 4
/
prepare.sh
executable file
·56 lines (47 loc) · 1.66 KB
/
prepare.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/bin/bash
set -e
DATASET=$1
function prepare_yoochoose {
echo "Downloading yoochoose"
wget -nc https://s3-eu-west-1.amazonaws.com/yc-rdata/yoochoose-data.7z -P datasets/yoochoose/
7zz x -aos datasets/yoochoose/yoochoose-data.7z -odatasets/yoochoose/
echo "Preprocessing yoochoose"
pipenv run python -m src.preprocessing --dataset yoochoose
}
function download_digitinica {
if [ ! -f datasets/diginetica/dataset-train-diginetica.zip ]; then
mkdir -p datasets/diginetica
echo "Please download the dataset and save it to datasets/diginetica/dataset-train-diginetica.zip"
if [ "$(uname)" == "Darwin" ]; then
open https://drive.google.com/uc?id=0B7XZSACQf0KdenRmMk8yVUU5LWc
else
xdg-open https://drive.google.com/uc?id=0B7XZSACQf0KdenRmMk8yVUU5LWc
fi
echo "Press enter to continue"
read
fi
}
function prepare_diginetica {
echo "Downloading diginetica"
download_digitinica
unzip -n datasets/diginetica/dataset-train-diginetica.zip -d datasets/diginetica/
echo "Preprocessing diginetica"
pipenv run python -m src.preprocessing --dataset diginetica
}
function prepare_otto {
echo "Downloading otto"
pipenv run kaggle datasets download -d otto/recsys-dataset -p datasets/otto/
unzip -n datasets/otto/recsys-dataset.zip -d datasets/otto/
echo "Preprocessing otto"
pipenv run python -m src.preprocessing --dataset otto
}
if [ "$DATASET" = "yoochoose" ]; then
prepare_yoochoose
elif [ "$DATASET" = "diginetica" ]; then
prepare_diginetica
elif [ "$DATASET" = "otto" ]; then
prepare_otto
else
echo "Unknown dataset"
exit 1
fi