BASTET/ERA5_Data_Request.py

384 lines
15 KiB
Python

"""
ERA5 Download- and Stitch-Tool
by Marcel Frommelt
"""
"""
INPUT SECTION
"""
folder = "ERA5" # name for output folder (either already existing or not, if not it will be created)
ident1 = "McMurdo" # unique identifier for ERA5 ascent data (suggestion: launch location)
ident2 = "Antarctica" # unique identifier for ERA float and radiation data (e. g. "southern polar region")
"""
Balloon launch location on ground (LATitude/LONgitude in degrees):
"""
start_lat = -77.8535 # McMurdo
start_lon = 167.2022
# ---------------------------------
# start_lat = 67.887382 # Kiruna
# start_lon = 21.081452
# ---------------------------------
# start_lat = 78.22 # Svalbard
# start_lon = 15.65
"""
Geographical boundaries for atmospheric data (in degrees):
"""
# north_lim = 90
# west_lim, east_lim = -180, 180 # suggestion for northern polar region
# south_lim = 45
north_lim = -45
west_lim, east_lim = -180, 180 # suggestion for southern polar region
south_lim = -90
"""
Start and end time for atmospheric data:
"""
startdate = '2019-12-15' # also acts as launch data of ERA5 data for ascent
enddate = '2020-01-17'
ascent_window = 10 # time length of ERA5 data for ascent phase in (full) days from start date
"""
Operations you want to be performed:
"""
ascent_only = False # True # get only ERA5 data for ascent (e. g. for new location or time within same region)
download = False # True # download raw ERA5 files
stitch = True # False # create stitched files
delete = True # False # delete original files
"""
EXECUTE SECTION
"""
import os
import cdsapi
from datetime import datetime
from datetime import date
import numpy as np
import xarray as xr
from netCDF4 import Dataset
def ERAsingle(yr, mon, dayrange, nlim, slim, elim, wlim, name):
single = cdsapi.Client().retrieve(
'reanalysis-era5-single-levels',
{
'product_type': 'reanalysis',
'variable': [
'cloud_base_height', 'high_cloud_cover', 'low_cloud_cover', 'medium_cloud_cover',
'skin_temperature', 'surface_net_solar_radiation', 'surface_net_thermal_radiation',
'surface_pressure', 'surface_solar_radiation_downwards', 'surface_thermal_radiation_downward_clear_sky',
'surface_thermal_radiation_downwards', 'toa_incident_solar_radiation',
'top_net_solar_radiation', 'top_net_thermal_radiation', 'total_cloud_cover',
'total_sky_direct_solar_radiation_at_surface',
],
'time': [
'00:00', '01:00', '02:00',
'03:00', '04:00', '05:00',
'06:00', '07:00', '08:00',
'09:00', '10:00', '11:00',
'12:00', '13:00', '14:00',
'15:00', '16:00', '17:00',
'18:00', '19:00', '20:00',
'21:00', '22:00', '23:00',
],
'year': str(yr),
'month': str(mon),
'day': dayrange,
'area': [
nlim, wlim, slim, elim,
],
'format': 'netcdf',
})
single.download(name)
def ERAlevelAscent(yr, mon, dayrange, start_lat, start_lon, name):
nlim = start_lat + 10.0
slim = start_lat - 10.0
elim = start_lon + 10.0
wlim = start_lon - 10.0
ascent = cdsapi.Client().retrieve(
'reanalysis-era5-pressure-levels',
{
'product_type': 'reanalysis',
'variable': [
'geopotential', 'temperature', 'u_component_of_wind',
'v_component_of_wind', 'vertical_velocity',
],
'pressure_level': [
'1', '2', '3',
'5', '7', '10',
'20', '30', '50',
'70', '100', '125',
'150', '175', '200',
'225', '250', '300',
'350', '400', '450',
'500', '550', '600',
'650', '700', '750',
'775', '800', '825',
'850', '875', '900',
'925', '950', '975',
'1000',
],
'year': str(yr),
'month': str(mon),
'day': dayrange,
'time': [
'00:00', '01:00', '02:00',
'03:00', '04:00', '05:00',
'06:00', '07:00', '08:00',
'09:00', '10:00', '11:00',
'12:00', '13:00', '14:00',
'15:00', '16:00', '17:00',
'18:00', '19:00', '20:00',
'21:00', '22:00', '23:00',
],
'area': [
nlim, wlim, slim, elim,
],
'format': 'netcdf',
})
ascent.download(name)
def ERAlevelFloat(yr, mon, dayrange, nlim, slim, elim, wlim, name):
floating = cdsapi.Client().retrieve(
'reanalysis-era5-pressure-levels',
{
'product_type': 'reanalysis',
'variable': [
'geopotential', 'temperature', 'u_component_of_wind',
'v_component_of_wind', 'vertical_velocity',
],
'pressure_level': [
'1', '2', '3',
'5', '7', '10',
'20',
],
'year': str(yr),
'month': str(mon),
'day': dayrange,
'time': [
'00:00', '01:00', '02:00',
'03:00', '04:00', '05:00',
'06:00', '07:00', '08:00',
'09:00', '10:00', '11:00',
'12:00', '13:00', '14:00',
'15:00', '16:00', '17:00',
'18:00', '19:00', '20:00',
'21:00', '22:00', '23:00',
],
'area': [
nlim, wlim, slim, elim,
],
'format': 'netcdf',
})
floating.download(name)
try:
os.makedirs(folder)
except FileExistsError:
pass
start = datetime.fromisoformat(startdate)
end = datetime.fromisoformat(enddate)
"""
Download of ERA5-Data:
"""
if download == True:
if start.month == 12:
next_month = 1
year2 = start.year + 1
else:
next_month = start.month + 1
year2 = start.year
if start.month == end.month:
month = 'same'
elif end.month - start.month == 1 or end.month - start.month == -11:
month = 'next'
else:
month = 'more'
if start.day + ascent_window > (date(year2, next_month, 1) - date(start.year, start.month, 1)).days:
short_ascent = 0
startdays = [str(start.day + i).zfill(2) for i in range(
(date(year2, next_month, 1) - date(start.year, start.month, 1)).days - start.day + 1)]
endascent = [str(i + 1).zfill(2) for i in range(ascent_window - len(startdays) + 1)]
else:
short_ascent = 1
startdays = [str(start.day + i).zfill(2) for i in range(
(date(year2, next_month, 1) - date(start.year, start.month, 1)).days - start.day + 1)]
ascentdays = [str(start.day + i).zfill(2) for i in range(ascent_window)]
if end.year == start.year:
same_year = 1
else:
same_year = 0
if month == 'more':
days = [str(i + 1).zfill(2) for i in range(31)]
if month != 'same':
endfloat = [str(i + 1).zfill(2) for i in range(end.day)]
if short_ascent == 1:
ERAlevelAscent(start.year, start.month, ascentdays, start_lat, start_lon, os.path.join(folder, "ascent1.nc"))
else:
ERAlevelAscent(start.year, start.month, startdays, start_lat, start_lon, os.path.join(folder, "ascent1.nc"))
ERAlevelAscent(year2, next_month, endascent, start_lat, start_lon, os.path.join(folder, "ascent2.nc"))
if ascent_only == True:
pass
else:
if month == 'same':
ERAsingle(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single1.nc"))
ERAlevelFloat(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float1.nc"))
elif month == 'next':
ERAsingle(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single1.nc"))
ERAsingle(year2, end.month, endfloat, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single2.nc"))
ERAlevelFloat(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float1.nc"))
ERAlevelFloat(year2, end.month, endfloat, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float2.nc"))
else:
ERAsingle(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single1.nc"))
ERAlevelFloat(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float1.nc"))
if same_year == 1:
for m in range(end.month - start.month - 1):
ERAsingle(start.year, next_month + m, days, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single" + str(m + 2) + ".nc"))
ERAlevelFloat(start.year, next_month + m, days, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float" + str(m + 2) + ".nc"))
ERAsingle(start.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single" + str(end.month - start.month + 1) + ".nc"))
ERAlevelFloat(start.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float" + str(end.month - start.month + 1) + ".nc"))
else:
count1 = 0
count2 = 0
for m in range(12 - start.month):
ERAsingle(start.year, start.month + 1 + m, days, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single" + str(m + 2) + ".nc"))
ERAlevelFloat(start.year, start.month + 1 + m, days, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float" + str(m + 2) + ".nc"))
count1 = m + 2
for m in range(end.month - 1):
ERAsingle(end.year, 1 + m, days, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single" + str(count1 + m + 2) + ".nc"))
ERAlevelFloat(end.year, 1 + m, days, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float" + str(count1 + m + 2) + ".nc"))
count2 = count1 + m + 2
ERAsingle(end.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single" + str(count2 + 1) + ".nc"))
ERAlevelFloat(end.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float" + str(count2 + 1) + ".nc"))
else:
pass
floatfiles = []
singlefiles = []
ascentfiles = []
for (root, dirs, files) in os.walk(folder):
for name in files:
if name.startswith("float"):
floatfiles.append(os.path.join(folder, str(name)))
elif name.startswith("single"):
singlefiles.append(os.path.join(folder, str(name)))
elif name.startswith("ascent"):
ascentfiles.append(os.path.join(folder, str(name)))
else:
pass
"""
Stitching of multiple *.nc-files to one:
"""
if stitch == True:
startfile = Dataset(floatfiles[0], "r", format="NETCDF4")
endfile = Dataset(floatfiles[-1], "r", format="NETCDF4")
tstart = int(startfile.variables['time'][0])
tend = int(endfile.variables['time'][-1])
startfile.close()
endfile.close()
print("Stitching files, please wait...")
if ascent_only == True:
pass
else:
with xr.open_mfdataset(floatfiles, chunks={'time': 100}, combine="nested", engine='netcdf4', concat_dim="time", parallel=True) as df1:
df1 = df1.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1))
df1.to_netcdf(os.path.join(folder, "FLOAT_" + str(ident2) + "_" + str(start.year) + str(start.month) + "_to_" + str(end.year) + str(end.month) + ".nc"),
mode='w', format="NETCDF4", engine="netcdf4",
encoding={"z": {"dtype": "float32"}, "t": {"dtype": "float32"}, "u": {"dtype": "float32"}, "v": {"dtype": "float32"}, "w": {"dtype": "float32"}})
with xr.open_mfdataset(singlefiles, chunks={'time': 500}, combine="nested", engine='netcdf4', concat_dim="time", parallel=True) as df2:
df2 = df2.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1))
df2.to_netcdf(os.path.join(folder, "RAD_" + str(ident2) + "_" + str(start.year) + str(start.month) + "_to_" + str(end.year) + str(end.month) + ".nc"),
mode='w', format="NETCDF4",
engine="netcdf4",
encoding={"cbh": {"dtype": "float32"}, "hcc": {"dtype": "float32"},
"lcc": {"dtype": "float32"},
"mcc": {"dtype": "float32"}, "skt": {"dtype": "float32"},
"ssr": {"dtype": "float32"},
"str": {"dtype": "float32"}, "sp": {"dtype": "float32"},
"ssrd": {"dtype": "float32"},
"strdc": {"dtype": "float32"}, "strd": {"dtype": "float32"},
"tisr": {"dtype": "float32"},
"tsr": {"dtype": "float32"}, "ttr": {"dtype": "float32"},
"tcc": {"dtype": "float32"},
"fdir": {"dtype": "float32"}})
startfile = Dataset(ascentfiles[0], "r", format="NETCDF4")
endfile = Dataset(ascentfiles[-1], "r", format="NETCDF4")
tstart = int(startfile.variables['time'][0])
tend = int(endfile.variables['time'][-1])
startfile.close()
endfile.close()
with xr.open_mfdataset(ascentfiles, chunks={'time': 800}, combine="nested", engine='netcdf4', concat_dim="time", parallel=True) as df3:
df3 = df3.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1))
df3.to_netcdf(os.path.join(folder, "ASCENT_" + str(ident1) + "_" + str(start.year) + str(start.month) + ".nc"),
mode='w', format="NETCDF4", engine="netcdf4",
encoding={"z": {"dtype": "float32"}, "t": {"dtype": "float32"}, "u": {"dtype": "float32"},
"v": {"dtype": "float32"}, "w": {"dtype": "float32"}})
print("Stitching finished!")
else:
pass
"""
Deleting original files:
"""
if delete == True:
print("Deleting obsolete files.")
for (root, dirs, files) in os.walk("ERA5"):
for name in files:
if os.path.join(folder, name) in floatfiles + singlefiles + ascentfiles:
os.remove(os.path.join(folder, name))
else:
pass