Skip to content

Commit

Permalink
Merge pull request #23 from CIAT-DAPA/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
santiago123x authored Oct 15, 2024
2 parents 4ba04a2 + f61bae4 commit 0dd9e26
Show file tree
Hide file tree
Showing 5 changed files with 138 additions and 63 deletions.
28 changes: 24 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,33 +1,53 @@
affine==2.4.0
asttokens==2.4.1
attrs==23.2.0
certifi==2024.6.2
cftime==1.6.4
click==8.1.7
click-plugins==1.1.1
cligj==0.7.2
colorama==0.4.6
comm==0.2.2
contourpy==1.2.1
cycler==0.12.1
debugpy==1.8.6
decorator==5.1.1
exceptiongroup==1.2.2
executing==2.1.0
fiona==1.9.6
fonttools==4.53.0
geopandas==0.14.4
kiwisolver==1.4.5
matplotlib==3.9.0
matplotlib-inline==0.1.7
nest-asyncio==1.6.0
netCDF4==1.7.1
numpy==2.0.0
packaging==24.1
pandas==2.2.2
parso==0.8.4
pillow==10.3.0
platformdirs==4.3.6
prompt_toolkit==3.0.48
psutil==6.0.0
pure_eval==0.2.3
Pygments==2.18.0
pyparsing==3.1.2
pyproj==3.6.1
python-dateutil==2.9.0.post0
pytz==2024.1
pywin32==307
pyzmq==26.2.0
rasterio==1.3.10
rioxarray==0.17.0
secure-smtplib==0.1.1
shapely==2.0.4
six==1.16.0
snuggs==1.4.7
stack-data==0.6.3
tornado==6.4.1
traitlets==5.14.3
typing_extensions==4.12.2
tzdata==2024.1
matplotlib==3.9.0
matplotlib-inline==0.1.7
asttokens==2.4.1
secure-smtplib==0.1.1
wcwidth==0.2.13
xarray==2024.9.0
23 changes: 16 additions & 7 deletions src/postprocessing/cut_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,27 +13,36 @@ def cut_rasters(raster_filename, shp_path):


# Convertir el shapefile a una lista de geometrías
shapes = [feature["geometry"] for feature in shapefile.__geo_interface__["features"]]
#shapes = [feature["geometry"] for feature in shapefile.__geo_interface__["features"]]


# Abrir el raster
with rasterio.open(raster_filename) as src:


if shapefile.crs != src.crs:
shapefile = shapefile.to_crs(src.crs)

geometries = shapefile.geometry.values

# Convertir el shapefile a una lista de geometrías
shapes = [mapping(geom) for geom in shapefile.geometry]
#shapes = [mapping(geom) for geom in shapefile.geometry]

# Recortar el raster con el shapefile
out_image, out_transform = mask(src, shapes, crop=True)
out_image, out_transform = mask(src, geometries, crop=True)

#out_image[out_image == 0] = np.nan

if src.nodata is not None:
nodata = src.nodata
out_image[out_image == nodata] = np.nan
zero_mask = (out_image == 0)
if np.any(zero_mask):
print("Hay valores cero en los datos recortados. Reemplazando con np.nan.")
out_image[zero_mask] = np.nan

# Manejo de valores no válidos
invalid_value = -9999
invalid_mask = (out_image == invalid_value)
if np.any(invalid_mask):
print("Hay valores -9999 en los datos recortados. Reemplazando con np.nan.")
out_image[invalid_mask] = np.nan
out_meta = src.meta.copy()

# Actualizar los metadatos del raster recortado
Expand Down
138 changes: 90 additions & 48 deletions src/postprocessing/export_average.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
import netCDF4 as nc
import numpy as np
import rasterio
import xarray as xr
import rioxarray
import geopandas as gpd
from shapely.geometry import mapping
from rasterio.transform import from_origin
import os
from .cut_map import cut_rasters
from .generate_images import generate_image
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

import pandas as pd
import matplotlib

def export_raster(dataset, file_name, specific_variable, output_path, inputs_path, is4Dim=False):
# Get the current script directory
Expand All @@ -29,41 +34,49 @@ def export_raster(dataset, file_name, specific_variable, output_path, inputs_pat

# Extract variables
if specific_variable == "RAIN":
if "RAINNC" not in dataset.variables or "RAINC" not in dataset.variables or "RAINSH" not in dataset.variables:
if "RAINNC" not in dataset.data_vars or "RAINC" not in dataset.data_vars or "RAINSH" not in dataset.data_vars:
raise ValueError(f"The variables 'RAINNC', 'RAINC' or 'RAINSH' are not found in the NetCDF file.")
rainnc_data = dataset.variables["RAINNC"][:]
rainc_data = dataset.variables["RAINC"][:]
rainsh_data = dataset.variables["RAINSH"][:]
rainnc_data = dataset.RAINNC
rainc_data = dataset.RAINC
rainsh_data = dataset.RAINSH
var_data = rainnc_data + rainc_data + rainsh_data
else:
if specific_variable not in dataset.variables:
if specific_variable not in dataset.data_vars:
raise ValueError(f"The variable '{specific_variable}' is not found in the NetCDF file.")
var_data = dataset.variables[specific_variable][:]
var_data = dataset[specific_variable][:]

var_data.rio.write_crs("EPSG:4326")

# Check dimensions
print(f"Dimensions of {specific_variable}: {var_data.shape}")

if is4Dim:
var_data = var_data[:, 0, :, :]
if "bottom_top" in var_data.dims:
var_data = var_data.isel(bottom_top=0)
elif "soil_layers_stag" in var_data.dims:
var_data = var_data.isel(soil_layers_stag=0)

# Assume dimensions are [time, lat, lon]
time_index = 0 # Select a time index for coordinates

lat = dataset.variables['XLAT'][time_index, :, 0] # Assuming XLAT is 3D [time, lat, lon]
lon = dataset.variables['XLONG'][time_index, 0, :] # Assuming XLONG is 3D [time, lat, lon]
lat = pd.to_numeric(dataset.XLAT.isel(Time=0, west_east=0).values)
long = pd.to_numeric(dataset.XLONG.isel(Time=0, south_north=0).values)

inv_lat = np.flip(lat, axis=0)
#inv_lat = np.flip(lat, axis=0)

res_lat = abs(inv_lat[1] - inv_lat[0])
res_lon = abs(lon[1] - lon[0])
transform = from_origin(lon.min(), inv_lat.max(), res_lon, res_lat)

if specific_variable == "T2":
var_data = var_data - 273
var_data.values = var_data.values - 273

xtime = dataset.variables['XTIME'][:]
xtime = dataset.XTIME

previous_day = np.zeros(var_data.shape[1:])

if "d01" in file_name:
shp_path = os.path.join(shape_path, "limite_caribe", "limite_caribe.shp")

shapefile = gpd.read_file(shp_path)

shapefile = shapefile.to_crs("EPSG:4326")

# Iterate over each day (8 intervals per day)
for day in range(var_data.shape[0] // 8):
start_index = day * 8
Expand All @@ -75,58 +88,59 @@ def export_raster(dataset, file_name, specific_variable, output_path, inputs_pat
date = start_datetime + timedelta(minutes=int(xtime[start_index]))
date = date.strftime('%Y-%m-%d')

daily_data = var_data[start_index:end_index, :, :]
daily_data = var_data.isel(Time=slice(start_index, end_index))

data = daily_data

# Calculate the accumulated sum or mean of the daily data
if specific_variable == "RAIN":
# Initialize the previous total for 3-hour difference calculation
previous_total = np.zeros(daily_data.shape[1:])
result_variable = np.zeros(daily_data.shape[1:])
for i in range(daily_data.shape[0]):
current_total = daily_data[i, :, :] - previous_day
current_total = daily_data.isel(Time=i) - previous_day
# Calculate the 3-hourly accumulated precipitation
result_variable += current_total - previous_total
previous_total = current_total
if day == 0 or day == 1:
result_variable = result_variable * 0.6

result_variable_expanded = np.expand_dims(result_variable, axis=0) # (1, y, x)

# Replicate across the Time dimension to match data's shape (Time, y, x)
result_variable_expanded = np.repeat(result_variable_expanded, daily_data.shape[0], axis=0)

# Assign to data with matching dimensions
data.values = result_variable_expanded
previous_day += result_variable

else:
result_variable = np.mean(daily_data, axis=0)
previous_day += result_variable

mean = np.mean(daily_data.values, axis=0)
mean_expanded = np.expand_dims(mean, axis=0)
mean_expanded = np.repeat(mean_expanded, daily_data.shape[0], axis=0)
data.values = mean_expanded

# Create a unique name for the raster file
raster_filename = os.path.join(var_output, f'{specific_variable}_{date}_raster.tif')

# Create the raster file with the accumulated sum or the mean as a single band
with rasterio.open(
raster_filename,
'w',
driver='GTiff',
height=result_variable.shape[0],
width=result_variable.shape[1],
count=1, # Only one band for the accumulated sum or the mean
dtype=result_variable.dtype,
crs='+proj=latlong',
transform=transform,
) as dst:
dst.write(result_variable, 1)
raster_filename = os.path.join(var_output, f'{specific_variable}_{date}.tif')
time = list(range(start_index, end_index))
data_xarray =xr.DataArray(data.values, dims=['t','y','x'], coords={'t': time,'y': lat,'x': long})

data_result = data_xarray.rio.write_crs("EPSG:4326").rio.clip(shapefile.geometry, shapefile.crs)

data_result = data_result.isel(t=0)

print(f"Raster for: {specific_variable} day: {date} created successfully")
save_raster(data_result, raster_filename)

print(f"Raster for: {specific_variable} day: {date} created successfully")

new_raster_filename = raster_filename.replace(os.path.basename(raster_filename), os.path.basename(raster_filename).replace("_raster",""))

shape_cut_path = os.path.join(shape_path, "limites_municipales", "limite_municipal.shp")

if "d02" in file_name:

new_raster_filename = cut_rasters(raster_filename, shp_path)
else:
os.rename(raster_filename, new_raster_filename)
if "d01" in file_name:
shape_cut_path = os.path.join(shape_path, "limite_caribe", "limite_caribe.shp")


print(f"Raster for: {specific_variable} day: {date} cut successfully as '{new_raster_filename}'")

generate_image(new_raster_filename, search_csv(os.path.join(data_path, "ranges"), specific_variable), data_path, shape_cut_path)
generate_image(raster_filename, search_csv(os.path.join(data_path, "ranges"), specific_variable), data_path, shape_cut_path)

return var_output

Expand All @@ -143,3 +157,31 @@ def contains_keyword(file_name, keyword):
filtered_files.append(os.path.join(ranges_path, "ranges_Default.csv"))

return filtered_files[0]


def save_raster(data_array, output_path):
if isinstance(data_array, xr.Dataset):
# Asumiendo que necesitas un DataArray específico dentro del Dataset
data_array = data_array.to_array().isel(variable=0) # Modifica según sea necesario para obtener el DataArray correcto

lon = data_array.coords['x']
lat = data_array.coords['y']

# Voltear el array de datos a lo largo del eje de latitud
flipped_data = np.flip(data_array.values, axis=0)

transform = from_origin(west=lon.min().item(), north=lat.max().item(), xsize=(lon.max().item()-lon.min().item())/len(lon), ysize=(lat.max().item()-lat.min().item())/len(lat))

# Guardar como un archivo raster
with rasterio.open(
output_path,
'w',
driver='GTiff',
height=flipped_data.shape[0],
width=flipped_data.shape[1],
count=1,
dtype=flipped_data.dtype,
crs='+proj=latlong',
transform=transform,
) as dst:
dst.write(flipped_data, 1)
10 changes: 6 additions & 4 deletions src/postprocessing/extract_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np
import rasterio as rio
from rasterio.transform import from_origin, xy
import xarray as xr
from .export_average import export_raster
from .generate_images import generate_image
import rasterio
Expand All @@ -18,17 +19,18 @@ def extract_data(inputs_path, outputs_path):
# Filtrar solo los archivos
nc_files = [os.path.join(wrf_inputs_path, file) for file in os.listdir(wrf_inputs_path)]


for file in nc_files:

file_name = os.path.basename(file)
print(f"Procesando el archivo: {file_name}")

dataset = nc.Dataset(file)
dataset = xr.open_dataset(file, decode_times=False)

RAIN = export_raster(dataset, file_name, "RAIN", outputs_path, inputs_path)

T2 = export_raster(dataset, file_name, "T2", outputs_path, inputs_path)

RAIN = export_raster(dataset, file_name, "RAIN", outputs_path, inputs_path)

HGT = export_raster(dataset, file_name, "HGT", outputs_path, inputs_path)

SWDOWN = export_raster(dataset, file_name, "SWDOWN", outputs_path, inputs_path)
Expand All @@ -53,7 +55,7 @@ def extract_data(inputs_path, outputs_path):

ET0 = calcET0(T2, RH, WS2m, SWDOWN, inputs_path)

dataset.close()
# dataset.close()

return True

Expand Down
2 changes: 2 additions & 0 deletions src/postprocessing/generate_images.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import rasterio
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.colors import LinearSegmentedColormap, BoundaryNorm
Expand All @@ -9,6 +10,7 @@
import numpy as np

def generate_image(raster_path, csv_path, data_path, shapefile_path=None):
matplotlib.use('Agg')
# Definir la ruta del logo y la ruta de guardado del PNG
logo_path = os.path.join(data_path, "instituteLogo.jpg")
png_file = os.path.join(os.path.dirname(raster_path), os.path.basename(raster_path).replace(".tif", "_image.png"))
Expand Down

0 comments on commit 0dd9e26

Please sign in to comment.