Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
112 commits
Select commit Hold shift + click to select a range
0805bb0
added files for new data types
rmclaren Jul 30, 2025
2db4f2b
removed all post processing from ssmis, ascat, seviri so were left wi…
rmclaren Aug 4, 2025
2145e31
updated config for runnins
rmclaren Aug 4, 2025
c68a0c7
updated some of the configuration
rmclaren Aug 6, 2025
67b80db
back to appeneding entire date range to the zarr file, using config f…
rmclaren Aug 7, 2025
9468b36
apply quality flag to virtual temperature
rmclaren Aug 8, 2025
bbf8565
bug fix
rmclaren Aug 8, 2025
e6c0834
Merge branch 'feature/data_v5' of https://github.com/NOAA-EMC/ocelot …
rmclaren Aug 8, 2025
1b97ba2
apply temperature event code
rmclaren Aug 8, 2025
6d6ab47
Merge branch 'feature/data_v5' of https://github.com/NOAA-EMC/ocelot …
rmclaren Aug 8, 2025
2012cb4
Added IASI PCA
rmclaren Aug 9, 2025
cecdbe6
using second element in W_EVENT
rmclaren Aug 13, 2025
f1593e7
using second element in W_EVENT
rmclaren Aug 13, 2025
862eda0
added amsua
rmclaren Aug 14, 2025
ba2e192
added amsua
rmclaren Aug 14, 2025
0b5a7a4
added amsua
rmclaren Aug 14, 2025
f3cc8f7
updted ssmis mapping a little
rmclaren Aug 14, 2025
e324496
removed usage of the observation type field
rmclaren Aug 14, 2025
a8436a4
Add Apache Parquet encoder (#46)
rmclaren Aug 14, 2025
66b5da9
added flag so select zarr or parquet for output
rmclaren Aug 15, 2025
dbbf25d
some improvemnets
rmclaren Aug 15, 2025
1c2ca6e
quick fix
rmclaren Aug 15, 2025
31920c2
Add PCA data support to data prep reader (#50)
rmclaren Aug 18, 2025
fc1cbb0
updates for iasi pca
rmclaren Aug 20, 2025
f607c2d
playing with adpsfc
rmclaren Aug 21, 2025
19f69d4
Merge branch 'feature/data_v5' of https://github.com/NOAA-EMC/ocelot …
rmclaren Aug 21, 2025
f8a60e7
adding code to process raw surface obs data
rmclaren Aug 22, 2025
217d969
bugfixes
rmclaren Aug 22, 2025
c6891ce
yaml updtaes
rmclaren Aug 22, 2025
eb6c794
Merge branch 'feature/data_v5' of https://github.com/NOAA-EMC/ocelot …
rmclaren Aug 22, 2025
902763e
bug fixes
rmclaren Aug 22, 2025
fdd8aa6
added code to merge prepbufr, adpsfc, and sfcshp
rmclaren Aug 22, 2025
ddf182a
bug fixes
rmclaren Aug 22, 2025
9ea03de
fixed output description
rmclaren Aug 22, 2025
f1e4694
fixed raw surface obs
rmclaren Aug 23, 2025
db94034
added radiosonde
rmclaren Aug 25, 2025
fafe4ab
Merge branch 'feature/data_v5' of https://github.com/NOAA-EMC/ocelot …
rmclaren Aug 25, 2025
843ea86
radiosonde enhancemenst
rmclaren Aug 25, 2025
ad386b3
radiosonde enhancemenst
rmclaren Aug 25, 2025
8178f9d
radiosonde enhancemenst
rmclaren Aug 25, 2025
9386f08
radiosonde enhancements
rmclaren Aug 25, 2025
acbdaa4
radiosonde fixes
rmclaren Aug 25, 2025
2909905
updated radiosonde to manually save the events
rmclaren Aug 25, 2025
5008192
fixed the way radiosonde data is read
rmclaren Aug 27, 2025
6266c21
tweaks to radiosonde and surface obs
rmclaren Aug 27, 2025
ed2e61c
saving u v wind info
rmclaren Aug 27, 2025
3e1e9ac
added specific humidity
rmclaren Aug 28, 2025
b794428
added snow cover, tweaking radiosonde
rmclaren Aug 28, 2025
c0b230d
added avhrr
rmclaren Aug 28, 2025
b1408b0
more specific humidity fixes for surface obs
rmclaren Aug 28, 2025
fcbb85f
fixed avhrr bug
rmclaren Aug 28, 2025
f56bd5a
fixed avhrr dims
rmclaren Aug 28, 2025
6326ee9
fixed avhrr dims
rmclaren Aug 28, 2025
2857ca5
radiosonde fix
rmclaren Aug 28, 2025
6de8785
updated avhrr code
rmclaren Sep 2, 2025
6d53656
updated avhrr code
rmclaren Sep 2, 2025
07974a4
extended avhrr to make more complete exaple
rmclaren Sep 2, 2025
bc7b5bf
extended avhrr to make more complete exaple
rmclaren Sep 2, 2025
d356bad
avhrr bugfixes
rmclaren Sep 2, 2025
0a18ed4
fixed bug in runner
rmclaren Sep 3, 2025
332d311
improved runner so it doesn't throw when file is missing
rmclaren Sep 4, 2025
f41f3f2
changed field satellite to satelliteId
rmclaren Sep 4, 2025
ccb6278
updates to snow cover
rmclaren Sep 4, 2025
eb44bea
update to avhrr
rmclaren Sep 4, 2025
b2e4c05
Updated snow cover code.
rmclaren Sep 10, 2025
9838018
version of radiosonde that works
rmclaren Sep 11, 2025
e4a1f41
radiosonde fixes
rmclaren Sep 11, 2025
dd6cff1
added some plotting code
rmclaren Sep 12, 2025
67ea91f
got things kinda working. adding uprair processing
rmclaren Sep 17, 2025
6f6177e
got things kinda working. adding uprair processing
rmclaren Sep 17, 2025
8c4df1c
got uprair working
rmclaren Sep 18, 2025
cefcb00
rraw addiosonde yaml files
rmclaren Sep 18, 2025
73de181
cleaned up code a little
rmclaren Sep 18, 2025
380e550
updated dump time
rmclaren Sep 18, 2025
ae2997a
updated dump time
rmclaren Sep 18, 2025
30efe61
support data gen for month
rmclaren Sep 22, 2025
9363184
fixed mpi problem
rmclaren Sep 22, 2025
4b63498
fixed the way that datetimes are split
rmclaren Sep 24, 2025
c5bc179
added memory to config
rmclaren Oct 9, 2025
23e7d82
updated ascat
rmclaren Oct 16, 2025
16599c0
added ascat to ursa
rmclaren Oct 30, 2025
da3d24f
Fix DDP synchronization: Add window broadcasting and sequential sampl…
azadeh-gh Nov 20, 2025
5f92fd5
improve documentation
azadeh-gh Nov 20, 2025
a0890cc
merged latest from main
rmclaren Nov 21, 2025
fc99c03
pynorm fixes
rmclaren Nov 21, 2025
2c1ab6a
pynorm fixes
rmclaren Nov 21, 2025
b71aa63
pynorm fixes
rmclaren Nov 21, 2025
1e63fa6
Update data_prep/src/runner.py
rmclaren Nov 21, 2025
81d16ab
Update data_prep/mapping/bufr_radiosonde_uprair.yaml
rmclaren Nov 21, 2025
39dadf6
Update data_prep/mapping/bufr_amsua.py
rmclaren Nov 21, 2025
76cbc23
Update data_prep/mapping/bufr_amsua.py
rmclaren Nov 21, 2025
2714965
Update data_prep/mapping/bufr_ascat.py
rmclaren Nov 21, 2025
6698baa
Update data_prep/mapping/bufr_ascat.py
rmclaren Nov 21, 2025
91bd979
Update data_prep/mapping/bufr_ascat.py
rmclaren Nov 21, 2025
93fc068
Update data_prep/mapping/bufr_seviri.py
rmclaren Nov 21, 2025
4fe052a
Update data_prep/mapping/bufr_ssmis.py
rmclaren Nov 21, 2025
b9e50ab
Update data_prep/mapping/pca_iasi.py
rmclaren Nov 21, 2025
0747f13
Update data_prep/scripts/plots/surface_obs_plots.py
rmclaren Nov 21, 2025
be969a6
Update data_prep/mapping/bufr_avhrr.py
rmclaren Nov 21, 2025
ccdb5a3
fixed code review comment
rmclaren Nov 21, 2025
5a99ddd
Update data_prep/src/reader.py
rmclaren Nov 21, 2025
cf7e066
Add SEVIRI and SSMIS (#71)
rmclaren Dec 10, 2025
5625c4d
Merge branch 'main' into feature/data_v6
rmclaren Dec 15, 2025
0fe432e
tweaks to fix some small issues, make things run more smoothely
rmclaren Dec 15, 2025
c76d587
Merge branch 'feature/data_v6' of https://github.com/NOAA-EMC/ocelot …
rmclaren Dec 15, 2025
e5bcc9f
force height to be integer in surface obs.
rmclaren Dec 16, 2025
17aa791
Update data_prep/scripts/gen_data.py
rmclaren Dec 29, 2025
171164f
[WIP] Address feedback on 'Merge Data V6 Branch' PR (#81)
Copilot Dec 29, 2025
9178904
added int type for max-obs
rmclaren Dec 29, 2025
aafc3c8
undid changes in gnn_model
rmclaren Dec 29, 2025
eff530d
Apply suggestion from @Copilot
rmclaren Dec 29, 2025
53e4af4
Feature/sfc and radiosonde check (#80)
nicholasesposito Dec 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 36 additions & 4 deletions data_prep/configs/ursa.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ data types:

- name: raw_surface_obs
type: tank
num_tasks: 4
num_tasks: 1
batch_days: 30
memory: 48G
mapping: bufr_surface_obs_raw.py
paths:
prepbufr:
Expand Down Expand Up @@ -57,7 +58,7 @@ data types:
- name: raw_radiosonde
type: tank
num_tasks: 1
batch_days: 30
batch_days: 15
mapping: bufr_radiosonde_raw.py
paths:
prepbufr:
Expand All @@ -77,6 +78,7 @@ data types:
- 18/atmos/gdas.t18z.uprair.tm00.bufr_d

- name: ssmis
memory: 128G
type: tank
num_tasks: 4
batch_days: 30
Expand All @@ -87,6 +89,24 @@ data types:
- 12/atmos/gdas.t12z.ssmisu.tm00.bufr_d
- 18/atmos/gdas.t18z.ssmisu.tm00.bufr_d

- name: seviri
memory: 128G
type: tank
num_tasks: 4
batch_days: 30
mapping: bufr_seviri.py
paths:
sevcsr:
- 00/atmos/gdas.t00z.sevcsr.tm00.bufr_d
- 06/atmos/gdas.t06z.sevcsr.tm00.bufr_d
- 12/atmos/gdas.t12z.sevcsr.tm00.bufr_d
- 18/atmos/gdas.t18z.sevcsr.tm00.bufr_d
sevasr:
- 00/atmos/gdas.t00z.sevasr.tm00.bufr_d
- 06/atmos/gdas.t06z.sevasr.tm00.bufr_d
- 12/atmos/gdas.t12z.sevasr.tm00.bufr_d
- 18/atmos/gdas.t18z.sevasr.tm00.bufr_d

- name: amsua
type: tank
num_tasks: 4
Expand All @@ -98,6 +118,17 @@ data types:
- 12/atmos/gdas.t12z.1bamua.tm00.bufr_d
- 18/atmos/gdas.t18z.1bamua.tm00.bufr_d

- name: ascat
type: tank
num_tasks: 16
batch_days: 30
mapping: bufr_ascat.py
paths:
- 00/atmos/gdas.t00z.ascatt.tm00.bufr_d
- 06/atmos/gdas.t06z.ascatt.tm00.bufr_d
- 12/atmos/gdas.t12z.ascatt.tm00.bufr_d
- 18/atmos/gdas.t18z.ascatt.tm00.bufr_d

- name: viirs
type: tank
num_tasks: 24
Expand All @@ -111,8 +142,9 @@ data types:

- name: avhrr
type: tank
num_tasks: 24
batch_days: 15
num_tasks: 64
batch_days: 7
memory: 128G
mapping: bufr_avhrr.py
paths:
am:
Expand Down
2 changes: 2 additions & 0 deletions data_prep/mapping/bufr_amsua.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
#!/usr/bin/env python3

import os

from bufr.obs_builder import ObsBuilder, add_main_functions, map_path


MAPPING_PATH = map_path('bufr_amsua.yaml')


Expand Down
4 changes: 3 additions & 1 deletion data_prep/mapping/bufr_ascat.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
#!/usr/bin/env python3

import os

from bufr.obs_builder import ObsBuilder, add_main_functions, map_path

MAPPING_PATH = map_path('bufr_scatwnd_ascat.yaml')

MAPPING_PATH = map_path('bufr_ascat.yaml')


class BufrAscatObsBuilder(ObsBuilder):
Expand Down
96 changes: 54 additions & 42 deletions data_prep/mapping/bufr_ascat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,74 +13,86 @@ bufr:
minute: "*/MINU"
second: "*/SECO"

dataReceiptTime:
datetime:
year: "*/RCYR"
month: "*/RCMO"
day: "*/RCDY"
hour: "*/RCHR"
minute: "*/RCMI"

latitude:
query: "*/CLAT"
query: "*/CLATH"

longitude:
query: "*/CLON"
query: "*/CLONH"

satelliteId:
query: "*/SAID"

qualityFlags:
query: "*/WVCQ"
backscatter:
query: "*/ASCATSG0/BKST"

beamId:
query: "*/ASCATSG0/BEAMI"

radarIncidenceAngle:
query: "*/ASCATSG0/RAIA"

# ObsValue - Wind Direction
windDirectionAt10M:
query: '*/WD10'
type: float
antennaBeamAzimuthAngle:
query: "*/ASCATSG0/ANAZ"

# ObsValue - Wind Speed
windSpeedAt10M:
query: '*/WS10'
ascatKpEstimateQuality:
query: "*/ASCATSG0/AKPEQ"

ascatSigma0Usability:
query: "*/ASCATSG0/ASGU"

encoder:

dimensions:
- name: ch
path: "*/ASCATSG0"

variables:
# MetaData
- name: "MetaData/time"
source: variables/timestamp
- name: time
source: timestamp
longName: "Observation Time"
units: "seconds since 1970-01-01T00:00:00Z"

- name: "MetaData/dataReceiptTime"
source: variables/dataReceiptTime
longName: "Observation Receipt Time"
units: "seconds since 1970-01-01T00:00:00Z"

- name: "MetaData/latitude"
source: variables/latitude
- name: latitude
source: latitude
longName: "Latitude"
units: "degrees_north"
range: [ -90, 90 ]

- name: "MetaData/longitude"
source: variables/longitude
- name: longitude
source: longitude
longName: "Longitude"
units: "degrees_east"
range: [ -180, 180 ]

- name: "MetaData/satelliteIdentifier"
source: variables/satelliteId
- name: satelliteIdentifier
source: satelliteId
longName: "Satellite Identifier"

- name: "MetaData/qualityFlags"
source: variables/qualityFlags
longName: "Quality Flags"
- name: backscatter
source: backscatter
longName: "Backscatter"

- name: beamId
source: beamId
longName: "Beam Identifier"

- name: "ObsValue/windDirection"
source: variables/windDirectionAt10M
longName: "10-meter Wind Direction"
- name: radarIncidenceAngle
source: radarIncidenceAngle
longName: "Radar Incidence Angle"
units: "degree"
range: [ 0, 90 ]

- name: antennaBeamAzimuthAngle
source: antennaBeamAzimuthAngle
longName: "Antenna Beam Azimuth Angle"
units: "degree"
range: [ 0, 360 ]

- name: ascatKpEstimateQuality
source: ascatKpEstimateQuality
longName: "ASCAT Kp Estimate Quality"

- name: "ObsValue/windSpeed"
source: variables/windSpeedAt10M
longName: "10-meter Wind Speed"
units: "m s-1"
- name: ascatSigma0Usability
source: ascatSigma0Usability
longName: "ASCAT Sigma0 Usability"
2 changes: 1 addition & 1 deletion data_prep/mapping/bufr_radiosonde.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def make_obs(self, comm, input_path):
self._apply_quality_flag(container, 'northwardWind', 'windQuality')
self._apply_quality_flag(container, 'eastwardWind', 'windQuality')
self._apply_quality_flag(container, 'airPressure', 'airPressureQuality')
self._apply_quality_flag(container, 'height', 'heightQuality')
self._apply_quality_flag(container, 'height_prepbufr', 'heightQuality')

# Add timestamps
reference_time = self._get_reference_time(input_path)
Expand Down
12 changes: 6 additions & 6 deletions data_prep/mapping/bufr_radiosonde.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
bufr:
group_by_variable: height
group_by_variable: height_prepbufr
subsets:
- ADPUPA
variables:
Expand Down Expand Up @@ -43,7 +43,7 @@ bufr:
airPressure:
query: "*/PRSLEVEL/P___INFO/P__EVENT{1}/POB"

height:
height_prepbufr:
query: "*/PRSLEVEL/Z___INFO/Z__EVENT{1}/ZOB"

# Quality Markers
Expand Down Expand Up @@ -83,8 +83,8 @@ encoder:
source: stationIdentification
longName: "Station Identification"

- name: "stationElevation"
source: stationElevation
- name: "stationElevation_prepbufr"
source: stationElevation_prepbufr
longName: "Station Elevation"
units: "meters"
range: [-100, 5000]
Expand Down Expand Up @@ -130,8 +130,8 @@ encoder:
units: "hPa"
range: [0, 1200]

- name: "height"
source: height
- name: "height_prepbufr"
source: height_prepbufr
longName: "Height"
units: "meters"
range: [0, 60000]
Expand Down
11 changes: 10 additions & 1 deletion data_prep/mapping/bufr_radiosonde_adpupa.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ bufr:
reportId:
query: "*/RPID"

height_bufr:
query: "*/SELV"
type: float

airPressure:
query: "[*/UARLV/PRLC, */PRLC]" # Air pressure at station level
type: float
Expand Down Expand Up @@ -52,6 +56,11 @@ encoder:
variables:
# MetaData

- name: "height_bufr"
source: height_bufr
longName: "height"
units: "m"

- name: "airTemperature"
source: airTemperature
longName: "Air Temperature"
Expand All @@ -64,7 +73,7 @@ encoder:
units: "deg C"
range: [ -100, 100 ]

- name: "ralativeHumidity"
- name: "relativeHumidity"
source: relativeHumidity
longName: "Specific Humidity"
units: "mg/kg"
Expand Down
5 changes: 4 additions & 1 deletion data_prep/mapping/bufr_radiosonde_prepbufr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ bufr:
transforms:
- wrap: [ -180.0, 180.0 ]

stationElevation:
query: "*/ELV"

sequenceId:
query: "*/SID"

Expand All @@ -41,7 +44,7 @@ bufr:
airPressureProgramCode:
query: "*/PRSLEVEL/P___INFO/P__EVENT{1}/PPC"

height:
height_prepbufr:
query: "*/PRSLEVEL/Z___INFO/Z__EVENT{1}/ZOB"

# Quality Markers
Expand Down
23 changes: 18 additions & 5 deletions data_prep/mapping/bufr_radiosonde_raw.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ def __init__(self):

# Override
def make_obs(self, comm, input_dict) -> bufr.DataContainer:
if PrepbufrKey not in input_dict or LowResDumpKey not in input_dict:
return bufr.DataContainer()

prep_container = bufr.Parser(input_dict[PrepbufrKey], self.map_dict[PrepbufrKey]).parse(comm)
prep_container.apply_mask(~prep_container.get('launchCycleTime').mask)
prep_container.apply_mask(~prep_container.get('driftLatitude').mask)
Expand Down Expand Up @@ -120,14 +123,16 @@ def _process_dump(self, comm, input_dict, prep_container, data_key) -> bufr.Data
for var in container.list():
data = container.get(var)
path = container.get_paths(var)
matched_data = np.array([data[dump_idx] for dump_idx, prep_idx, flight_idx in matching_idxs])
idxs = np.array([dump_idx for dump_idx, prep_idx, flight_idx in matching_idxs])
matched_data = data[idxs]
new_container.add(var, matched_data, path)

# Add the prepbufr data to the new container
for var in ['driftTime',
'driftLatitude',
'driftLongitude',
'height',
'height_prepbufr',
'stationElevation',
'airTemperatureQuality',
'specificHumidityQuality',
'dewPointTemperatureQuality',
Expand All @@ -137,7 +142,9 @@ def _process_dump(self, comm, input_dict, prep_container, data_key) -> bufr.Data

data = prep_container.get(var)
path = prep_container.get_paths(var)
matched_data = np.array([data[prep_idx] for dump_idx, prep_idx, flight_idx in matching_idxs])
idxs = np.array([prep_idx for dump_idx, prep_idx, flight_idx in matching_idxs])
matched_data = data[idxs]

if matched_data.dtype == np.dtype('float64'):
matched_data = matched_data.astype('float32')
new_container.add(var, matched_data, path)
Expand Down Expand Up @@ -174,11 +181,17 @@ def _make_description(self):
'units': "degree_east"
},
{
'name': "height",
'source': 'height',
'name': "height_prepbufr",
'source': 'height_prepbufr',
'longName': "Height",
'units': "meters"
},
{
'name': "stationElevation",
'source': 'stationElevation',
'longName': "Station Elevation",
'units': "meters"
},
{
'name': "airTemperatureQuality",
'source': 'airTemperatureQuality',
Expand Down
Loading