Skip to content

Commit

Permalink
more logging to help troubleshooting
Browse files Browse the repository at this point in the history
  • Loading branch information
basaks committed Nov 9, 2023
1 parent ae3b531 commit 8e65a19
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 30 deletions.
24 changes: 21 additions & 3 deletions uncoverml/features.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import division

import logging
from typing import Optional
from collections import OrderedDict
Expand All @@ -12,13 +14,15 @@
from uncoverml import patch
from uncoverml import transforms
from uncoverml.config import Config
from uncoverml.geoio import RasterioImageSource
# from uncoverml.geoio import RasterioImageSource

log = logging.getLogger(__name__)


def extract_subchunks(image_source: RasterioImageSource, subchunk_index, n_subchunks, patchsize,
template_source: Optional[RasterioImageSource] = None):
def extract_subchunks(image_source, subchunk_index, n_subchunks, patchsize,
template_source):
# extract_subchunks(image_source: RasterioImageSource, subchunk_index, n_subchunks, patchsize,
# template_source: Optional[RasterioImageSource] = None):
equiv_chunks = n_subchunks * mpiops.chunks
equiv_chunk_index = mpiops.chunks*subchunk_index + mpiops.chunk_index
image = Image(image_source, equiv_chunk_index, equiv_chunks, patchsize, template_source)
Expand Down Expand Up @@ -71,7 +75,10 @@ def extract_features(image_source, targets, n_subchunks, patchsize):

def transform_features(feature_sets, transform_sets, final_transform, config):
# apply feature transforms
features = feature_names(config)
log.info(f"features are sorted as: \n {features}")
transformed_vectors = [t(c) for c, t in zip(feature_sets, transform_sets)]

# TODO remove this when cubist gets removed
if config.cubist or config.multicubist:
feature_vec = OrderedDict()
Expand Down Expand Up @@ -211,3 +218,14 @@ def remove_missing(x, targets=None):
return x, classes


def feature_names(config: Config):

results = []
for s in config.feature_sets:
feats = []
for tif in s.files:
name = basename(tif)
feats.append(name)
feats.sort()
results += feats
return results
31 changes: 7 additions & 24 deletions uncoverml/geoio.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from __future__ import division
from typing import Optional
import joblib
import os.path
from subprocess import run
Expand All @@ -14,11 +13,8 @@
import matplotlib.pyplot as plt
import seaborn as sns
import rasterio
from rasterio.warp import reproject
from rasterio.windows import Window
from xgboost import XGBRegressor
from sklearn.cluster import DBSCAN
from affine import Affine
import numpy as np
import shapefile
import tables as hdf
Expand All @@ -27,7 +23,7 @@

from uncoverml import mpiops
from uncoverml import image
from uncoverml import features
from uncoverml import features as feat
from uncoverml.config import Config
from uncoverml.transforms import missing_percentage
from uncoverml.targets import Targets
Expand Down Expand Up @@ -436,19 +432,6 @@ def output_thumbnails(self, ratio=10):
resample(f, output_tif=thumbnail, ratio=ratio)


def feature_names(config: Config):

results = []
for s in config.feature_sets:
feats = []
for tif in s.files:
name = os.path.basename(tif)
feats.append(name)
feats.sort()
results += feats
return results


def _iterate_sources(f, config: Config):

results = []
Expand Down Expand Up @@ -502,7 +485,7 @@ def f(image_source: RasterioImageSource):
template_source = RasterioImageSource(config.prediction_template)
else:
template_source = None
r = features.extract_subchunks(image_source, subchunk_index, config.n_subchunks, config.patchsize,
r = feat.extract_subchunks(image_source, subchunk_index, config.n_subchunks, config.patchsize,
template_source=template_source)
return r
result = _iterate_sources(f, config)
Expand All @@ -522,7 +505,7 @@ def f(image_source):
if config.intersected_features:
r = extract_intersected_features(image_source, targets, config)
else:
r = features.extract_features(image_source, targets,
r = feat.extract_features(image_source, targets,
config.n_subchunks, config.patchsize)
return r
result = _iterate_sources(f, config)
Expand All @@ -534,9 +517,9 @@ def semisupervised_feature_sets(targets, config: Config):
frac = config.subsample_fraction

def f(image_source):
r_t = features.extract_features(image_source, targets, n_subchunks=1,
r_t = feat.extract_features(image_source, targets, n_subchunks=1,
patchsize=config.patchsize)
r_a = features.extract_subchunks(image_source, subchunk_index=0,
r_a = feat.extract_subchunks(image_source, subchunk_index=0,
n_subchunks=1,
patchsize=config.patchsize)
if frac < 1.0:
Expand All @@ -556,7 +539,7 @@ def unsupervised_feature_sets(config):
frac = config.subsample_fraction

def f(image_source):
r = features.extract_subchunks(image_source, subchunk_index=0,
r = feat.extract_subchunks(image_source, subchunk_index=0,
n_subchunks=1,
patchsize=config.patchsize)
if frac < 1.0:
Expand Down Expand Up @@ -739,7 +722,7 @@ def export_validation_scatter_plot_and_validation_csv(outfile_results, config: C

def plot_feature_correlation_matrix(config: Config, x_all):
fig, corr_ax = plt.subplots()
features = [Path(f).stem for f in feature_names(config)]
features = [Path(f).stem for f in feat.feature_names(config)]
corr_df = pd.DataFrame(x_all)
corr_df.columns = features
sns.heatmap(corr_df.corr(),
Expand Down
2 changes: 1 addition & 1 deletion uncoverml/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def _fix_for_corrupt_data(x, feature_names):


def _get_data(subchunk, config):
features_names = geoio.feature_names(config)
features_names = features.feature_names(config)

# NOTE: This returns an *untransformed* x,
# which is ok as we just need dummies here
Expand Down
4 changes: 2 additions & 2 deletions uncoverml/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def permutation_importance(model, x_all, targets_all, config: Config):
refit=False).fit,
data=(x_all, y)
)
feature_names = geoio.feature_names(config)
feature_names = feat.feature_names(config)
df_picv = eli5.explain_weights_df(
pi_cv, feature_names=feature_names, top=100)
csv = Path(config.output_dir).joinpath(
Expand Down Expand Up @@ -573,7 +573,7 @@ def plot_permutation_feature_importance(model, x_all, targets_all, conf: Config,
data=(x_all, y),
model=model
)
feature_names = [Path(f).stem for f in geoio.feature_names(conf)]
feature_names = [Path(f).stem for f in feat.feature_names(conf)]
df_picv = eli5.explain_weights_df(
pi_cv, feature_names=feature_names, top=100)
csv = Path(conf.output_dir).joinpath(
Expand Down

0 comments on commit 8e65a19

Please sign in to comment.