Skip to content

Commit

Permalink
Expand new interface
Browse files Browse the repository at this point in the history
  • Loading branch information
Dmitry Duplyakin committed Feb 10, 2024
1 parent e4814fe commit 2fef692
Show file tree
Hide file tree
Showing 14 changed files with 669 additions and 333 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ dw_tap.egg-info/
saved.png
templates/served/*
outputs/*
static/raw/*
6 changes: 6 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ RUN pip install -r requirements.txt
RUN git clone https://github.com/NREL/dw-tap.git
RUN cd dw-tap && python setup.py install && cd ..

# Get bc data
RUN wget https://github.com/NREL/dw-tap-api/raw/master/bc/bc_v4.zip
RUN mkdir /bc
RUN unzip bc_v4.zip -d /bc
# BC data will be available in: /bc/bc_v4/

EXPOSE 80

# Conda env in the path below needs to match the name in the first line of environment.yml
Expand Down
169 changes: 169 additions & 0 deletions bc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
import pandas as pd
import numpy as np
import shapely
import shapely.geometry
import geopandas as gpd
import os
import datetime
from dw_tap.data_fetching import getData
import h5pyd
import joblib
from collections import OrderedDict
import json
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('agg')

def round3(x):
return "%.3f" % x

def bc_for_point(lat, lon, height, model_data, bc_dir, plot_dest):
info = OrderedDict()

site = shapely.geometry.Point(lon, lat)

info["<strong>BC: input and closest site with measurements</strong>"] = ""
info["Point chosen for BC (lon, lat)"] = str(site)
info["Height chosen for BC, m"] = round3(height)

src_crs="EPSG:4326"
dest_crs="EPSG:3857"

site_projected = gpd.GeoDataFrame(geometry=[site], crs=src_crs).to_crs(dest_crs).geometry[0]

info["Projection"] = "%s -> %s" % (src_crs, dest_crs)
info["Point after projection (coords in meters)"] = "("+round3(site_projected.x) + ", " + round3(site_projected.y) + ")"

#bc_dir = os.path.join(dw_tap_data.path, "bc_development", "bc_v4")

bc_index = gpd.read_file(os.path.join(bc_dir, "index.json"))
bc_index.set_crs(src_crs)
bc_index_wtk = bc_index[bc_index["model"] == "wtk"].reset_index(drop=True)
bc_index_wtk_projected = bc_index_wtk.to_crs(dest_crs)

distances_km = bc_index_wtk_projected.distance(site_projected)/1000.0
closest_idx, closest_bc_site_distance_km = distances_km.argmin(), distances_km.min()

closest_cid = bc_index_wtk.at[closest_idx, "cid"]

info["Closest BC site, cid"] = str(closest_cid)
info["Closest BC site after projection (coords in meters)"] = "("+round3(bc_index_wtk_projected.at[closest_idx, "geometry"].x) + ", " + round3(bc_index_wtk_projected.at[closest_idx, "geometry"].y) + ")"#str(bc_index_wtk_projected.at[closest_idx, "geometry"])

info["Distance to closest BC site, km"] = "<strong>" + round3(closest_bc_site_distance_km) + "</strong>"

model_file = bc_index_wtk.at[closest_idx, "model_file"]
#info["File with closest BC site's fit model"] = str(model_file)

bc_height = bc_index_wtk.at[closest_idx, "height"]
info["Height at which the closest BC site's model is fit, m"] = round3(bc_height)
info["Difference between selected height and the BC height, m"] = "<strong>" + round3(np.abs(bc_height-height)) + "</strong>"

info["<br> #1"] = "" # Adding empty line
info["<strong>Site with measurements and its BC model</strong>"] = ""
info["Wind speed RMSE on test subset (m/s), before BC"] = round3(bc_index_wtk_projected.at[closest_idx, "rmse_test_nobc"])
info["Wind speed RMSE on test subset (m/s), after BC"] = round3(bc_index_wtk_projected.at[closest_idx, "rmse_test_bc"])
info["Wind speed RMSE reduction, %"] = round3(bc_index_wtk_projected.at[closest_idx, "rmse_test_percent_reduction"])
info["Number of time-aligned instances for fitting the model"] = round3(bc_index_wtk_projected.at[closest_idx, "train_set_length"])
info["Perf factor for wind speeds in the test subset before BC"] = round3(bc_index_wtk_projected.at[closest_idx, "perf_factor_no_bc"])
info["Perf factor for wind speeds in the test subset after BC"] = round3(bc_index_wtk_projected.at[closest_idx, "perf_factor_bc"])

fit = joblib.load(os.path.join(bc_dir, model_file))
info["Closest BC site's model"] = str(fit)
info["Coefficients of the closest BC site's model"] = str(fit.coef_)

# f = h5pyd.File("/nrel/wtk-us.h5", 'r', bucket="nrel-pds-hsds")
# model_data = getData(f, site.y, site.x, height, "IDW",
# power_estimate=False,
# inverse_monin_obukhov_length=False,
# start_time_idx=0, end_time_idx=8760, time_stride=1)

model_data["datetime"] = pd.to_datetime(model_data["datetime"])
model_data['hour'] = model_data['datetime'].dt.hour
model_data['month'] = model_data['datetime'].dt.month
model_data['year'] = model_data['datetime'].dt.year

features_list = ["ws", "wd", "hour", "month"]
model_data["ws_bc"] = fit.predict(model_data[features_list])

# Simple timeseries plot with no aggregation
#model_data[["ws", "ws_bc"]].plot();
#plt.savefig(plot_dest, dpi=300)

print("before bc_plot_before_and_after")
bc_plot_before_and_after(model_data, ws_before="ws", ws_after="ws_bc", datetime_column="datetime", \
title="Location: (%f, %f), %.0fm hub height" % (lat, lon, height), \
save_to_file=plot_dest, \
show=False)

output = """
<div classes="centered">
<div>
<table>
<tr>
<td><img id=\"monthly_plot\" src=\"%s\"/></td>
</tr>
</table>
</div>
</div>
""" % (plot_dest)

info["<br> #2"] = "" # Adding empty line
info["<strong>Before and after BC</strong>"] = ""
info["Number of timesteps in the model data fetched for Point"] = str(len(model_data))
info["Min wind speed before BC, m/s"] = round3(model_data["ws"].min())
info["Avg wind speed before BC, m/s"] = round3(model_data["ws"].mean())
info["Max wind speed before BC, m/s"] = round3(model_data["ws"].max())

info["Min wind speed after BC, m/s"] = round3(model_data["ws_bc"].min())
info["Avg wind speed after BC, m/s"] = round3(model_data["ws_bc"].mean())
info["Max wind speed after BC, m/s"] = round3(model_data["ws_bc"].max())

info = "<br>".join(["%s:&nbsp;%s" % (k.lstrip(","), info[k]) if (not (k.startswith("<br>"))) else "<br>" for k in info.keys()])

#print(json.dumps(bc_info, indent=4))
#return json.dumps({"output": str(p), "info": info})

return output, info

def bc_plot_before_and_after(atmospheric_df, ws_before="ws", ws_after="ws_bc", datetime_column="datetime",
title="Windspeed monthly averages, with and without BC",
save_to_file=False,
show=True):
print("bc_plot_before_and_after")
df = atmospheric_df[[datetime_column, ws_before, ws_after]].copy()

year_month = pd.Series(pd.PeriodIndex(df[datetime_column], freq="M"))
print("1")
df["month"] = year_month.apply(lambda x: x.month)
df["year"] = year_month.apply(lambda x: x.year)
df["day"] = 15 # For plotting only, middle of the month
df["moyr"] = [str(el[1]) + "-" + str(el[0]).zfill(2) for el in zip(df["month"], df["year"])]
df['middle_of_the_month'] = pd.to_datetime(df[['year','month', "day"]])

fig, ax = plt.subplots(figsize=(10, 3))

df_averaged = df.groupby("middle_of_the_month").agg(ws_before_avg=(ws_before, "mean"), \
ws_after_avg=(ws_after, "mean"), \
count=(ws_after, "count"))
df_averaged.sort_index(inplace=True)

ws_before_overall_avg = df_averaged.ws_before_avg.mean()
ws_after_overall_avg = df_averaged.ws_after_avg.mean()
ax.plot(df_averaged.index, df_averaged.ws_before_avg, \
marker="o", label="Model data; avg=%.3f" % (ws_before_overall_avg), linestyle="--", alpha=1.0)
ax.plot(df_averaged.index, df_averaged.ws_after_avg, \
marker="d", label="Bias-corrected model data; avg=%.3f" % (ws_after_overall_avg), linestyle="-.", alpha=1.0)

ax.set_ylabel("Monthly avg wind speed, m/s")
ax.set_title(title)
plt.legend()

if save_to_file == True:
plt.savefig('%s.png' % title, dpi=300)
elif type(save_to_file) == str:
plt.savefig(save_to_file, dpi=300)

if show:
plt.show()

#return df_averaged
131 changes: 105 additions & 26 deletions proto.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from dw_tap.data_fetching import getData
from v2 import validated_params_v2_w_year
from hsds_helpers import connected_hsds_file
from bc import bc_for_point
#from dw_tap.vis import plot_monthly_avg

#app = Flask(__name__)
Expand Down Expand Up @@ -322,8 +323,98 @@ def serve_ts(req_id, req_args):
if "Unnamed: 0" in atmospheric_df.columns:
atmospheric_df.drop(columns=["Unnamed: 0"], inplace=True)

# Saving to file
csv_dest = "static/raw/ts-%s.csv" % req_id
atmospheric_df.to_csv(csv_dest, index=False)

output = atmospheric_df.to_csv(index=False).replace("\n", "<br>")
info = ""
#save = "Download: static/raw/ts-%s.csv" % req_id
proposed_fname="%.4f_%.4f_%.1f.csv" % (lat, lon, height)
save = "href=\"%s\" download=\"%s\"" % (csv_dest, proposed_fname)
# Example: href="static/raw/ts-cd5e6247a3b935d7770bb1657df34715.csv" download="39.7430_-105.1470_65.000000.csv"
# it will be added inside the <a> tag

json_output = {'output': output, "info": info, "save": save}
with open(output_dest, 'w') as f:
json.dump(json_output, f)
return
except Exception as e:
output = "The following error has occurred:<br>" + str(e)
info = ""
save = ""
json_output = {'output': output, "info": info, "save": save}
with open(output_dest, 'w') as f:
json.dump(json_output, f)
return

def serve_bc(req_id, req_args):
output_dest = os.path.join(outputs_dir, req_id)
try:
height, lat, lon, year_list = validated_params_v2_w_year(req_args)
f = connected_hsds_file(req_args, config)
dt = pd.read_csv("wtk-dt.csv")
dt["datetime"] = pd.to_datetime(dt["datetime"])
dt["year"] = dt["datetime"].apply(lambda x: x.year)

subsets=[]
for yr in year_list:
idx = dt[dt["year"] == yr].index
subsets.append(getData(f, lat, lon, height,
"IDW",
power_estimate=False,
inverse_monin_obukhov_length=False,
start_time_idx=idx[0], end_time_idx=idx[-1], time_stride=1,
saved_dt=dt))
atmospheric_df = pd.concat(subsets)
atmospheric_df.index = range(len(atmospheric_df))

# plot_monthly_avg(atmospheric_df, \
# title="Location: (%f, %f), %.0fm hub height" % (lat, lon, height),\
# save_to_file='static/saved.png',\
# show_avg_across_years=True,
# show_overall_avg=True,
# show=False)
# #return flask.send_file('saved.png')

# output = """
# <div classes="centered">
# <div>
# <table>
# <tr>
# <td><img id=\"monthly_plot\" src=\"static/saved.png\"/></td>
# </tr>
# </table>
# </div>
# </div>
# """

# Ordered list of BC data locations; supports running inside ECS container and locally
# Code below will find first existing and will proceed to using it
bc_locs = ["/bc/bc_v4/", "~/OneDrive - NREL/dw-tap-data/bc_development/bc_v4/"]
selected_bc_loc = None
for bc_loc in bc_locs:
d = os.path.expanduser(bc_loc)
if os.path.isdir(d):
selected_bc_loc = d
if not (selected_bc_loc):
output = """
<div classes="centered">
Unable to locate directory with BC data. Checked locations: %s.
</div>
""" % str(bc_locs)
info = ""
else:
# Todo: check to make sure that atmospheric_df is not empty

output, info = bc_for_point(lon=lon, lat=lat, height=height, \
model_data=atmospheric_df, \
bc_dir=selected_bc_loc,\
plot_dest = 'static/bc.png') # plot_dest="outputs/fig-%s.png" % req_id)

#info = "The shown dataset includes %d timesteps between %s and %s." % \
# (len(atmospheric_df), atmospheric_df.datetime.tolist()[0], atmospheric_df.datetime.tolist()[-1])

json_output = {'output': output, "info": info}
with open(output_dest, 'w') as f:
json.dump(json_output, f)
Expand All @@ -336,32 +427,6 @@ def serve_ts(req_id, req_args):
json.dump(json_output, f)
return

# @app.route('/12x24')
# def endpoint_12x24():
# global req_args
# req_args = request.args
# t1 = Thread(target=serve_12x24)
# t1.start()
# return render_template('12x24_index.html')
#
# @app.route('/monthly')
# def endpoint_monthly():
# global req_args
# req_args = request.args
# t1 = Thread(target=serve_monthly)
# t1.start()
# return render_template('monthly_index.html')
#
# @app.route('/12x24_output', methods=['GET'])
# def output_12x24():
# json_output = {'output': output}
# return json.dumps(json_output)
#
# @app.route('/monthly_output', methods=['GET'])
# def output_monthly():
# json_output = {'output': output}
# return json.dumps(json_output)

@app.route('/output', methods=['GET'])
def get_output():
""" Check if output file for requested req_id has been cretead and, if so, return its contents as part of a json in the form expected by the js in html page """
Expand Down Expand Up @@ -489,6 +554,20 @@ def root(path):

return render_template(os.path.join("served", html_name))

elif req_endpoint == "bc":
th = Thread(target=serve_bc, args=(req_id, req_args))
th.start()

html_name = "bc_%s.html" % req_id

# Copy from a template and replace the string that has the endpoint for fetching outputs from
instantiate_from_template(os.path.join(templates_dir, "bc_index.html"),\
os.path.join(templates_dir, "served", html_name),
old_text="const FETCH_STR = \"/output?req_id=NEED_SPECIFIC_REQ_ID\";",\
new_text="const FETCH_STR = \"/output?req_id=%s\";" % req_id)

return render_template(os.path.join("served", html_name))

elif req_endpoint == "info":
return render_template("info.html")
else:
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ ecmwflibs
cfgrib
eccodes==1.2.0
descartes
joblib
Binary file added static/bc.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added static/save.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 2fef692

Please sign in to comment.