-
Notifications
You must be signed in to change notification settings - Fork 0
/
check_new_datasets_geocubes.py
62 lines (49 loc) · 2.52 KB
/
check_new_datasets_geocubes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import requests
import pandas as pd
import re
import argparse
import pystac_client
def get_datasets():
"""
Datasets can be obtained from an API endpoint.
Returns a dictionary containing the GeoCubes datasets and their relevant information.
"""
data = requests.get("https://vm0160.kaj.pouta.csc.fi/geocubes/info/getDatasets")
const_url = "https://vm0160.kaj.pouta.csc.fi"
raw_datasets = data.text.split(";")
split_datasets = [x.split(",") for x in raw_datasets]
dataset_dict = {}
for split in split_datasets:
dataset_dict[split[0]] = dict(zip(["name", "layername", "years", "folder", "file_prefix", "max_resolution", "bit_depth", "producer", "metadata_URL"], split))
for d in dataset_dict:
year_split = dataset_dict[d]['years'].split(".")
dataset_dict[d]['paths'] = []
if len(year_split) == 1:
dataset_dict[d]['paths'].append(f"{const_url}{dataset_dict[d]['folder']}{year_split[0]}/")
else:
for year in year_split:
dataset_dict[d]['paths'].append(f"{const_url}{dataset_dict[d]['folder']}{year}/")
return dataset_dict
if __name__ == "__main__":
pw_filename = 'passwords.txt'
parser = argparse.ArgumentParser()
parser.add_argument("--host", type=str, help="Hostname of the selected STAC API", required=True)
args = parser.parse_args()
app_host = f"{args.host}/geoserver/rest/oseo/"
csc_catalog_client = pystac_client.Client.open(f"{args.host}/geoserver/ogc/stac/v1/", headers={"User-Agent":"update-script"})
title_regex_pattern = r" \(GeoCubes\)"
# Get all Geocubes collections from the app_host
csc_collections = [col for col in csc_catalog_client.get_collections() if col.id.endswith("at_geocubes")]
csc_title_id_map = {c.title: c.id for c in csc_collections}
collection_csv = pd.read_csv('karttatasot.csv', index_col='Nimi').to_dict('index')
# Get the titles and IDs from CSC STAC and make the title correspond them to the ones in the CSV
titles_and_ids = {}
for title in csc_title_id_map:
fixed_title = re.sub(title_regex_pattern, '', title)
titles_and_ids[fixed_title] = csc_title_id_map[title]
geocubes_datasets = get_datasets()
for dataset in geocubes_datasets:
if dataset not in collection_csv.keys():
print(f"New dataset in GeoCubes: {geocubes_datasets[dataset]["name"]}")
print(f"Folder: {geocubes_datasets[dataset]['folder']}")
print(f"Metadata: {geocubes_datasets[dataset]['metadata_URL']}")