BASTET/ERA5_Data_Request.py

384 lines
15 KiB
Python
Raw Normal View History

"""
ERA5 Download- and Stitch-Tool
by Marcel Frommelt
"""
2021-06-18 16:24:18 +02:00
"""
INPUT SECTION
"""
2021-06-18 16:24:18 +02:00
folder = "ERA5" # name for output folder (either already existing or not, if not it will be created)
ident1 = "McMurdo" # unique identifier for ERA5 ascent data (suggestion: launch location)
ident2 = "Antarctica" # unique identifier for ERA float and radiation data (e. g. "southern polar region")
2021-06-18 16:24:18 +02:00
"""
Balloon launch location on ground (LATitude/LONgitude in degrees):
"""
start_lat = -77.8535 # McMurdo
start_lon = 167.2022
# ---------------------------------
# start_lat = 67.887382 # Kiruna
# start_lon = 21.081452
# ---------------------------------
# start_lat = 78.22 # Svalbard
# start_lon = 15.65
2021-06-18 16:24:18 +02:00
"""
Geographical boundaries for atmospheric data (in degrees):
"""
# north_lim = 90
# west_lim, east_lim = -180, 180 # suggestion for northern polar region
# south_lim = 45
north_lim = -45
west_lim, east_lim = -180, 180 # suggestion for southern polar region
south_lim = -90
2021-06-18 16:24:18 +02:00
"""
Start and end time for atmospheric data:
"""
startdate = '2019-12-15' # also acts as launch data of ERA5 data for ascent
enddate = '2020-01-17'
2021-06-18 16:24:18 +02:00
ascent_window = 10 # time length of ERA5 data for ascent phase in (full) days from start date
"""
Operations you want to be performed:
"""
ascent_only = False # True # get only ERA5 data for ascent (e. g. for new location or time within same region)
download = False # True # download raw ERA5 files
stitch = True # False # create stitched files
delete = True # False # delete original files
"""
EXECUTE SECTION
"""
2021-06-18 16:24:18 +02:00
import os
import cdsapi
from datetime import datetime
from datetime import date
import numpy as np
import xarray as xr
from netCDF4 import Dataset
2021-06-18 16:24:18 +02:00
def ERAsingle(yr, mon, dayrange, nlim, slim, elim, wlim, name):
single = cdsapi.Client().retrieve(
'reanalysis-era5-single-levels',
{
'product_type': 'reanalysis',
'variable': [
'cloud_base_height', 'high_cloud_cover', 'low_cloud_cover', 'medium_cloud_cover',
'skin_temperature', 'surface_net_solar_radiation', 'surface_net_thermal_radiation',
'surface_pressure', 'surface_solar_radiation_downwards', 'surface_thermal_radiation_downward_clear_sky',
'surface_thermal_radiation_downwards', 'toa_incident_solar_radiation',
'top_net_solar_radiation', 'top_net_thermal_radiation', 'total_cloud_cover',
'total_sky_direct_solar_radiation_at_surface',
],
'time': [
'00:00', '01:00', '02:00',
'03:00', '04:00', '05:00',
'06:00', '07:00', '08:00',
'09:00', '10:00', '11:00',
'12:00', '13:00', '14:00',
'15:00', '16:00', '17:00',
'18:00', '19:00', '20:00',
'21:00', '22:00', '23:00',
],
'year': str(yr),
'month': str(mon),
'day': dayrange,
'area': [
2021-06-18 16:24:18 +02:00
nlim, wlim, slim, elim,
],
2021-06-18 16:24:18 +02:00
'format': 'netcdf',
})
single.download(name)
def ERAlevelAscent(yr, mon, dayrange, start_lat, start_lon, name):
2021-06-18 16:24:18 +02:00
nlim = start_lat + 10.0
slim = start_lat - 10.0
elim = start_lon + 10.0
wlim = start_lon - 10.0
2021-06-18 16:24:18 +02:00
ascent = cdsapi.Client().retrieve(
'reanalysis-era5-pressure-levels',
{
'product_type': 'reanalysis',
'variable': [
'geopotential', 'temperature', 'u_component_of_wind',
'v_component_of_wind', 'vertical_velocity',
],
'pressure_level': [
'1', '2', '3',
'5', '7', '10',
'20', '30', '50',
'70', '100', '125',
'150', '175', '200',
'225', '250', '300',
'350', '400', '450',
'500', '550', '600',
'650', '700', '750',
'775', '800', '825',
'850', '875', '900',
'925', '950', '975',
'1000',
],
'year': str(yr),
'month': str(mon),
2021-06-18 16:24:18 +02:00
'day': dayrange,
'time': [
'00:00', '01:00', '02:00',
'03:00', '04:00', '05:00',
'06:00', '07:00', '08:00',
'09:00', '10:00', '11:00',
'12:00', '13:00', '14:00',
'15:00', '16:00', '17:00',
'18:00', '19:00', '20:00',
'21:00', '22:00', '23:00',
],
'area': [
2021-06-18 16:24:18 +02:00
nlim, wlim, slim, elim,
],
2021-06-18 16:24:18 +02:00
'format': 'netcdf',
})
2021-06-18 16:24:18 +02:00
ascent.download(name)
def ERAlevelFloat(yr, mon, dayrange, nlim, slim, elim, wlim, name):
floating = cdsapi.Client().retrieve(
'reanalysis-era5-pressure-levels',
{
'product_type': 'reanalysis',
'variable': [
'geopotential', 'temperature', 'u_component_of_wind',
'v_component_of_wind', 'vertical_velocity',
],
'pressure_level': [
'1', '2', '3',
'5', '7', '10',
'20',
],
'year': str(yr),
'month': str(mon),
2021-06-18 16:24:18 +02:00
'day': dayrange,
'time': [
'00:00', '01:00', '02:00',
'03:00', '04:00', '05:00',
'06:00', '07:00', '08:00',
'09:00', '10:00', '11:00',
'12:00', '13:00', '14:00',
'15:00', '16:00', '17:00',
'18:00', '19:00', '20:00',
'21:00', '22:00', '23:00',
],
'area': [
2021-06-18 16:24:18 +02:00
nlim, wlim, slim, elim,
],
2021-06-18 16:24:18 +02:00
'format': 'netcdf',
})
floating.download(name)
try:
os.makedirs(folder)
except FileExistsError:
pass
2021-06-18 16:24:18 +02:00
start = datetime.fromisoformat(startdate)
end = datetime.fromisoformat(enddate)
2021-06-18 16:24:18 +02:00
"""
Download of ERA5-Data:
"""
if download == True:
if start.month == 12:
next_month = 1
year2 = start.year + 1
else:
next_month = start.month + 1
year2 = start.year
if start.month == end.month:
month = 'same'
elif end.month - start.month == 1 or end.month - start.month == -11:
month = 'next'
else:
month = 'more'
if start.day + ascent_window > (date(year2, next_month, 1) - date(start.year, start.month, 1)).days:
short_ascent = 0
startdays = [str(start.day + i).zfill(2) for i in range(
(date(year2, next_month, 1) - date(start.year, start.month, 1)).days - start.day + 1)]
endascent = [str(i + 1).zfill(2) for i in range(ascent_window - len(startdays) + 1)]
else:
short_ascent = 1
startdays = [str(start.day + i).zfill(2) for i in range(
(date(year2, next_month, 1) - date(start.year, start.month, 1)).days - start.day + 1)]
ascentdays = [str(start.day + i).zfill(2) for i in range(ascent_window)]
if end.year == start.year:
same_year = 1
else:
same_year = 0
if month == 'more':
days = [str(i + 1).zfill(2) for i in range(31)]
if month != 'same':
endfloat = [str(i + 1).zfill(2) for i in range(end.day)]
if short_ascent == 1:
ERAlevelAscent(start.year, start.month, ascentdays, start_lat, start_lon, os.path.join(folder, "ascent1.nc"))
else:
ERAlevelAscent(start.year, start.month, startdays, start_lat, start_lon, os.path.join(folder, "ascent1.nc"))
ERAlevelAscent(year2, next_month, endascent, start_lat, start_lon, os.path.join(folder, "ascent2.nc"))
if ascent_only == True:
pass
else:
if month == 'same':
ERAsingle(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single1.nc"))
ERAlevelFloat(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float1.nc"))
elif month == 'next':
ERAsingle(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single1.nc"))
ERAsingle(year2, end.month, endfloat, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single2.nc"))
ERAlevelFloat(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float1.nc"))
ERAlevelFloat(year2, end.month, endfloat, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float2.nc"))
else:
ERAsingle(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single1.nc"))
ERAlevelFloat(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float1.nc"))
if same_year == 1:
for m in range(end.month - start.month - 1):
ERAsingle(start.year, next_month + m, days, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single" + str(m + 2) + ".nc"))
ERAlevelFloat(start.year, next_month + m, days, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float" + str(m + 2) + ".nc"))
ERAsingle(start.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single" + str(end.month - start.month + 1) + ".nc"))
ERAlevelFloat(start.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float" + str(end.month - start.month + 1) + ".nc"))
else:
count1 = 0
count2 = 0
for m in range(12 - start.month):
ERAsingle(start.year, start.month + 1 + m, days, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single" + str(m + 2) + ".nc"))
ERAlevelFloat(start.year, start.month + 1 + m, days, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float" + str(m + 2) + ".nc"))
count1 = m + 2
for m in range(end.month - 1):
ERAsingle(end.year, 1 + m, days, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single" + str(count1 + m + 2) + ".nc"))
ERAlevelFloat(end.year, 1 + m, days, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float" + str(count1 + m + 2) + ".nc"))
count2 = count1 + m + 2
ERAsingle(end.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single" + str(count2 + 1) + ".nc"))
ERAlevelFloat(end.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float" + str(count2 + 1) + ".nc"))
else:
pass
2021-06-18 16:24:18 +02:00
floatfiles = []
singlefiles = []
ascentfiles = []
for (root, dirs, files) in os.walk(folder):
2021-06-18 16:24:18 +02:00
for name in files:
if name.startswith("float"):
floatfiles.append(os.path.join(folder, str(name)))
elif name.startswith("single"):
2021-06-18 16:24:18 +02:00
singlefiles.append(os.path.join(folder, str(name)))
elif name.startswith("ascent"):
2021-06-18 16:24:18 +02:00
ascentfiles.append(os.path.join(folder, str(name)))
else:
pass
2021-06-18 16:24:18 +02:00
"""
Stitching of multiple *.nc-files to one:
"""
if stitch == True:
startfile = Dataset(floatfiles[0], "r", format="NETCDF4")
endfile = Dataset(floatfiles[-1], "r", format="NETCDF4")
tstart = int(startfile.variables['time'][0])
tend = int(endfile.variables['time'][-1])
startfile.close()
endfile.close()
print("Stitching files, please wait...")
if ascent_only == True:
pass
else:
with xr.open_mfdataset(floatfiles, chunks={'time': 100}, combine="nested", engine='netcdf4', concat_dim="time", parallel=True) as df1:
df1 = df1.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1))
df1.to_netcdf(os.path.join(folder, "FLOAT_" + str(ident2) + "_" + str(start.year) + str(start.month) + "_to_" + str(end.year) + str(end.month) + ".nc"),
mode='w', format="NETCDF4", engine="netcdf4",
encoding={"z": {"dtype": "float32"}, "t": {"dtype": "float32"}, "u": {"dtype": "float32"}, "v": {"dtype": "float32"}, "w": {"dtype": "float32"}})
with xr.open_mfdataset(singlefiles, chunks={'time': 500}, combine="nested", engine='netcdf4', concat_dim="time", parallel=True) as df2:
df2 = df2.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1))
df2.to_netcdf(os.path.join(folder, "RAD_" + str(ident2) + "_" + str(start.year) + str(start.month) + "_to_" + str(end.year) + str(end.month) + ".nc"),
mode='w', format="NETCDF4",
engine="netcdf4",
encoding={"cbh": {"dtype": "float32"}, "hcc": {"dtype": "float32"},
"lcc": {"dtype": "float32"},
"mcc": {"dtype": "float32"}, "skt": {"dtype": "float32"},
"ssr": {"dtype": "float32"},
"str": {"dtype": "float32"}, "sp": {"dtype": "float32"},
"ssrd": {"dtype": "float32"},
"strdc": {"dtype": "float32"}, "strd": {"dtype": "float32"},
"tisr": {"dtype": "float32"},
"tsr": {"dtype": "float32"}, "ttr": {"dtype": "float32"},
"tcc": {"dtype": "float32"},
"fdir": {"dtype": "float32"}})
startfile = Dataset(ascentfiles[0], "r", format="NETCDF4")
endfile = Dataset(ascentfiles[-1], "r", format="NETCDF4")
tstart = int(startfile.variables['time'][0])
tend = int(endfile.variables['time'][-1])
startfile.close()
endfile.close()
with xr.open_mfdataset(ascentfiles, chunks={'time': 800}, combine="nested", engine='netcdf4', concat_dim="time", parallel=True) as df3:
df3 = df3.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1))
df3.to_netcdf(os.path.join(folder, "ASCENT_" + str(ident1) + "_" + str(start.year) + str(start.month) + ".nc"),
mode='w', format="NETCDF4", engine="netcdf4",
encoding={"z": {"dtype": "float32"}, "t": {"dtype": "float32"}, "u": {"dtype": "float32"},
"v": {"dtype": "float32"}, "w": {"dtype": "float32"}})
print("Stitching finished!")
else:
pass
2021-06-18 16:24:18 +02:00
"""
Deleting original files:
"""
if delete == True:
print("Deleting obsolete files.")
for (root, dirs, files) in os.walk("ERA5"):
for name in files:
if os.path.join(folder, name) in floatfiles + singlefiles + ascentfiles:
os.remove(os.path.join(folder, name))
else:
pass