""" ERA5 Download- and Stitch-Tool by Marcel Frommelt """ """ INPUT SECTION """ folder = "ERA5" # name for output folder (either already existing or not, if not it will be created) ident1 = "McMurdo" # unique identifier for ERA5 ascent data (suggestion: launch location) ident2 = "Antarctica" # unique identifier for ERA float and radiation data (e. g. "southern polar region") """ Balloon launch location on ground (LATitude/LONgitude in degrees): """ start_lat = -77.8535 # McMurdo start_lon = 167.2022 # --------------------------------- # start_lat = 67.887382 # Kiruna # start_lon = 21.081452 # --------------------------------- # start_lat = 78.22 # Svalbard # start_lon = 15.65 """ Geographical boundaries for atmospheric data (in degrees): """ # north_lim = 90 # west_lim, east_lim = -180, 180 # suggestion for northern polar region # south_lim = 45 north_lim = -45 west_lim, east_lim = -180, 180 # suggestion for southern polar region south_lim = -90 """ Start and end time for atmospheric data: """ startdate = '2019-12-15' # also acts as launch data of ERA5 data for ascent enddate = '2020-01-17' ascent_window = 10 # time length of ERA5 data for ascent phase in (full) days from start date """ Operations you want to be performed: """ ascent_only = False # True # get only ERA5 data for ascent (e. g. for new location or time within same region) download = False # True # download raw ERA5 files stitch = True # False # create stitched files delete = True # False # delete original files """ EXECUTE SECTION """ import os import cdsapi from datetime import datetime from datetime import date import numpy as np import xarray as xr from netCDF4 import Dataset def ERAsingle(yr, mon, dayrange, nlim, slim, elim, wlim, name): single = cdsapi.Client().retrieve( 'reanalysis-era5-single-levels', { 'product_type': 'reanalysis', 'variable': [ 'cloud_base_height', 'high_cloud_cover', 'low_cloud_cover', 'medium_cloud_cover', 'skin_temperature', 'surface_net_solar_radiation', 'surface_net_thermal_radiation', 'surface_pressure', 'surface_solar_radiation_downwards', 'surface_thermal_radiation_downward_clear_sky', 'surface_thermal_radiation_downwards', 'toa_incident_solar_radiation', 'top_net_solar_radiation', 'top_net_thermal_radiation', 'total_cloud_cover', 'total_sky_direct_solar_radiation_at_surface', ], 'time': [ '00:00', '01:00', '02:00', '03:00', '04:00', '05:00', '06:00', '07:00', '08:00', '09:00', '10:00', '11:00', '12:00', '13:00', '14:00', '15:00', '16:00', '17:00', '18:00', '19:00', '20:00', '21:00', '22:00', '23:00', ], 'year': str(yr), 'month': str(mon), 'day': dayrange, 'area': [ nlim, wlim, slim, elim, ], 'format': 'netcdf', }) single.download(name) def ERAlevelAscent(yr, mon, dayrange, start_lat, start_lon, name): nlim = start_lat + 10.0 slim = start_lat - 10.0 elim = start_lon + 10.0 wlim = start_lon - 10.0 ascent = cdsapi.Client().retrieve( 'reanalysis-era5-pressure-levels', { 'product_type': 'reanalysis', 'variable': [ 'geopotential', 'temperature', 'u_component_of_wind', 'v_component_of_wind', 'vertical_velocity', ], 'pressure_level': [ '1', '2', '3', '5', '7', '10', '20', '30', '50', '70', '100', '125', '150', '175', '200', '225', '250', '300', '350', '400', '450', '500', '550', '600', '650', '700', '750', '775', '800', '825', '850', '875', '900', '925', '950', '975', '1000', ], 'year': str(yr), 'month': str(mon), 'day': dayrange, 'time': [ '00:00', '01:00', '02:00', '03:00', '04:00', '05:00', '06:00', '07:00', '08:00', '09:00', '10:00', '11:00', '12:00', '13:00', '14:00', '15:00', '16:00', '17:00', '18:00', '19:00', '20:00', '21:00', '22:00', '23:00', ], 'area': [ nlim, wlim, slim, elim, ], 'format': 'netcdf', }) ascent.download(name) def ERAlevelFloat(yr, mon, dayrange, nlim, slim, elim, wlim, name): floating = cdsapi.Client().retrieve( 'reanalysis-era5-pressure-levels', { 'product_type': 'reanalysis', 'variable': [ 'geopotential', 'temperature', 'u_component_of_wind', 'v_component_of_wind', 'vertical_velocity', ], 'pressure_level': [ '1', '2', '3', '5', '7', '10', '20', ], 'year': str(yr), 'month': str(mon), 'day': dayrange, 'time': [ '00:00', '01:00', '02:00', '03:00', '04:00', '05:00', '06:00', '07:00', '08:00', '09:00', '10:00', '11:00', '12:00', '13:00', '14:00', '15:00', '16:00', '17:00', '18:00', '19:00', '20:00', '21:00', '22:00', '23:00', ], 'area': [ nlim, wlim, slim, elim, ], 'format': 'netcdf', }) floating.download(name) try: os.makedirs(folder) except FileExistsError: pass start = datetime.fromisoformat(startdate) end = datetime.fromisoformat(enddate) """ Download of ERA5-Data: """ if download == True: if start.month == 12: next_month = 1 year2 = start.year + 1 else: next_month = start.month + 1 year2 = start.year if start.month == end.month: month = 'same' elif end.month - start.month == 1 or end.month - start.month == -11: month = 'next' else: month = 'more' if start.day + ascent_window > (date(year2, next_month, 1) - date(start.year, start.month, 1)).days: short_ascent = 0 startdays = [str(start.day + i).zfill(2) for i in range( (date(year2, next_month, 1) - date(start.year, start.month, 1)).days - start.day + 1)] endascent = [str(i + 1).zfill(2) for i in range(ascent_window - len(startdays) + 1)] else: short_ascent = 1 startdays = [str(start.day + i).zfill(2) for i in range( (date(year2, next_month, 1) - date(start.year, start.month, 1)).days - start.day + 1)] ascentdays = [str(start.day + i).zfill(2) for i in range(ascent_window)] if end.year == start.year: same_year = 1 else: same_year = 0 if month == 'more': days = [str(i + 1).zfill(2) for i in range(31)] if month != 'same': endfloat = [str(i + 1).zfill(2) for i in range(end.day)] if short_ascent == 1: ERAlevelAscent(start.year, start.month, ascentdays, start_lat, start_lon, os.path.join(folder, "ascent1.nc")) else: ERAlevelAscent(start.year, start.month, startdays, start_lat, start_lon, os.path.join(folder, "ascent1.nc")) ERAlevelAscent(year2, next_month, endascent, start_lat, start_lon, os.path.join(folder, "ascent2.nc")) if ascent_only == True: pass else: if month == 'same': ERAsingle(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "single1.nc")) ERAlevelFloat(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "float1.nc")) elif month == 'next': ERAsingle(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "single1.nc")) ERAsingle(year2, end.month, endfloat, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "single2.nc")) ERAlevelFloat(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "float1.nc")) ERAlevelFloat(year2, end.month, endfloat, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "float2.nc")) else: ERAsingle(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "single1.nc")) ERAlevelFloat(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "float1.nc")) if same_year == 1: for m in range(end.month - start.month - 1): ERAsingle(start.year, next_month + m, days, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "single" + str(m + 2) + ".nc")) ERAlevelFloat(start.year, next_month + m, days, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "float" + str(m + 2) + ".nc")) ERAsingle(start.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "single" + str(end.month - start.month + 1) + ".nc")) ERAlevelFloat(start.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "float" + str(end.month - start.month + 1) + ".nc")) else: count1 = 0 count2 = 0 for m in range(12 - start.month): ERAsingle(start.year, start.month + 1 + m, days, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "single" + str(m + 2) + ".nc")) ERAlevelFloat(start.year, start.month + 1 + m, days, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "float" + str(m + 2) + ".nc")) count1 = m + 2 for m in range(end.month - 1): ERAsingle(end.year, 1 + m, days, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "single" + str(count1 + m + 2) + ".nc")) ERAlevelFloat(end.year, 1 + m, days, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "float" + str(count1 + m + 2) + ".nc")) count2 = count1 + m + 2 ERAsingle(end.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "single" + str(count2 + 1) + ".nc")) ERAlevelFloat(end.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "float" + str(count2 + 1) + ".nc")) else: pass floatfiles = [] singlefiles = [] ascentfiles = [] for (root, dirs, files) in os.walk(folder): for name in files: if name.startswith("float"): floatfiles.append(os.path.join(folder, str(name))) elif name.startswith("single"): singlefiles.append(os.path.join(folder, str(name))) elif name.startswith("ascent"): ascentfiles.append(os.path.join(folder, str(name))) else: pass """ Stitching of multiple *.nc-files to one: """ if stitch == True: startfile = Dataset(floatfiles[0], "r", format="NETCDF4") endfile = Dataset(floatfiles[-1], "r", format="NETCDF4") tstart = int(startfile.variables['time'][0]) tend = int(endfile.variables['time'][-1]) startfile.close() endfile.close() print("Stitching files, please wait...") if ascent_only == True: pass else: with xr.open_mfdataset(floatfiles, chunks={'time': 100}, combine="nested", engine='netcdf4', concat_dim="time", parallel=True) as df1: df1 = df1.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1)) df1.to_netcdf(os.path.join(folder, "FLOAT_" + str(ident2) + "_" + str(start.year) + str(start.month) + "_to_" + str(end.year) + str(end.month) + ".nc"), mode='w', format="NETCDF4", engine="netcdf4", encoding={"z": {"dtype": "float32"}, "t": {"dtype": "float32"}, "u": {"dtype": "float32"}, "v": {"dtype": "float32"}, "w": {"dtype": "float32"}}) with xr.open_mfdataset(singlefiles, chunks={'time': 500}, combine="nested", engine='netcdf4', concat_dim="time", parallel=True) as df2: df2 = df2.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1)) df2.to_netcdf(os.path.join(folder, "RAD_" + str(ident2) + "_" + str(start.year) + str(start.month) + "_to_" + str(end.year) + str(end.month) + ".nc"), mode='w', format="NETCDF4", engine="netcdf4", encoding={"cbh": {"dtype": "float32"}, "hcc": {"dtype": "float32"}, "lcc": {"dtype": "float32"}, "mcc": {"dtype": "float32"}, "skt": {"dtype": "float32"}, "ssr": {"dtype": "float32"}, "str": {"dtype": "float32"}, "sp": {"dtype": "float32"}, "ssrd": {"dtype": "float32"}, "strdc": {"dtype": "float32"}, "strd": {"dtype": "float32"}, "tisr": {"dtype": "float32"}, "tsr": {"dtype": "float32"}, "ttr": {"dtype": "float32"}, "tcc": {"dtype": "float32"}, "fdir": {"dtype": "float32"}}) startfile = Dataset(ascentfiles[0], "r", format="NETCDF4") endfile = Dataset(ascentfiles[-1], "r", format="NETCDF4") tstart = int(startfile.variables['time'][0]) tend = int(endfile.variables['time'][-1]) startfile.close() endfile.close() with xr.open_mfdataset(ascentfiles, chunks={'time': 800}, combine="nested", engine='netcdf4', concat_dim="time", parallel=True) as df3: df3 = df3.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1)) df3.to_netcdf(os.path.join(folder, "ASCENT_" + str(ident1) + "_" + str(start.year) + str(start.month) + ".nc"), mode='w', format="NETCDF4", engine="netcdf4", encoding={"z": {"dtype": "float32"}, "t": {"dtype": "float32"}, "u": {"dtype": "float32"}, "v": {"dtype": "float32"}, "w": {"dtype": "float32"}}) print("Stitching finished!") else: pass """ Deleting original files: """ if delete == True: print("Deleting obsolete files.") for (root, dirs, files) in os.walk("ERA5"): for name in files: if os.path.join(folder, name) in floatfiles + singlefiles + ascentfiles: os.remove(os.path.join(folder, name)) else: pass