From 4db0f70ad1ff189d0a2ec00a8d324982d301f48a Mon Sep 17 00:00:00 2001 From: Marcel Frommelt Date: Tue, 22 Jun 2021 22:14:28 +0900 Subject: [PATCH] Script for download and stitching of atmospheric data --- DataRequest.py | 365 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 258 insertions(+), 107 deletions(-) diff --git a/DataRequest.py b/DataRequest.py index 1d3f502..02e5f22 100644 --- a/DataRequest.py +++ b/DataRequest.py @@ -1,54 +1,68 @@ +""" +ERA5 Download- and Stitch-Tool +by Marcel Frommelt +""" + +""" +INPUT SECTION +""" + +folder = "ERA5" # name for output folder (either already existing or not, if not it will be created) +ident1 = "McMurdo" # unique identifier for ERA5 ascent data (suggestion: launch location) +ident2 = "Antarctica" # unique identifier for ERA float and radiation data (e. g. "southern polar region") + +""" +Balloon launch location on ground (LATitude/LONgitude in degrees): +""" +start_lat = -77.8535 # McMurdo +start_lon = 167.2022 +# --------------------------------- +# start_lat = 67.887382 # Kiruna +# start_lon = 21.081452 +# --------------------------------- +# start_lat = 78.22 # Svalbard +# start_lon = 15.65 + +""" +Geographical boundaries for atmospheric data (in degrees): +""" +# north_lim = 90 +# west_lim, east_lim = -180, 180 # suggestion for northern polar region +# south_lim = 45 +north_lim = -45 +west_lim, east_lim = -180, 180 # suggestion for southern polar region +south_lim = -90 + +""" +Start and end time for atmospheric data: +""" +startdate = '2012-12-08' # also acts as launch data of ERA5 data for ascent +enddate = '2013-02-01' + +ascent_window = 10 # time length of ERA5 data for ascent phase in (full) days from start date + +""" +Operations you want to be performed: +""" +ascent_only = False # True # get only ERA5 data for ascent (e. g. for new location or time within same region) +download = True # True # download raw ERA5 files +stitch = False # False # create stitched files +delete = False # False # delete original files + + +""" +EXECUTE SECTION +""" + import os -import sys import cdsapi from datetime import datetime from datetime import date import numpy as np import xarray as xr from netCDF4 import Dataset -from dask.diagnostics import ProgressBar -folder = "ERA5" -ident = "McMurdo" - - - -north_lim, south_lim, east_lim, west_lim = 90, 45, 180, -180 # Northern Polar Region -# north_lim, south_lim, east_lim, west_lim = -45, -90, 180, -180 # Southern Polar Region - -start_lat = 67.887382 # Kiruna -start_lon = 21.081452 - -# SOME START LOCATIONS: - -# start_lat = 78.22 # Svalbard -# start_lon = 15.65 -# -# start_lat = -77.8535 # McMurdo -# start_lon = 167.2022 - -startdate = '2019-12-15' -enddate = '2020-01-10' - -try: - os.makedirs(folder) -except FileExistsError: - pass - - -start = datetime.fromisoformat(startdate) -end = datetime.fromisoformat(enddate) - -#""" -# DOWNLOAD OF ERA5-DATA: - -startdays = [str(start.day+i).zfill(2) for i in range((date(start.year, start.month + 1, 1) - date(start.year, start.month, 1)).days - start.day + 1)] -endascent = [str(i+1).zfill(2) for i in range(30 - len(startdays))] -days = [str(i+1).zfill(2) for i in range(31)] -endfloat = [str(i+1).zfill(2) for i in range(end.day)] - - -def ERAsingle(year, month, days, nlim, slim, elim, wlim, name): +def ERAsingle(yr, mon, dayrange, nlim, slim, elim, wlim, name): single = cdsapi.Client().retrieve( 'reanalysis-era5-single-levels', { @@ -71,9 +85,9 @@ def ERAsingle(year, month, days, nlim, slim, elim, wlim, name): '18:00', '19:00', '20:00', '21:00', '22:00', '23:00', ], - 'year': str(year), - 'month': str(month), - 'day': days, + 'year': str(yr), + 'month': str(mon), + 'day': dayrange, 'area': [ nlim, wlim, slim, elim, ], @@ -82,7 +96,7 @@ def ERAsingle(year, month, days, nlim, slim, elim, wlim, name): single.download(name) -def ERAlevelAscent(year, month, dayrange, start_lat, start_lon, name): +def ERAlevelAscent(yr, mon, dayrange, start_lat, start_lon, name): nlim = start_lat + 10.0 slim = start_lat - 10.0 elim = start_lon + 10.0 @@ -111,8 +125,8 @@ def ERAlevelAscent(year, month, dayrange, start_lat, start_lon, name): '925', '950', '975', '1000', ], - 'year': str(year), - 'month': str(month), + 'year': str(yr), + 'month': str(mon), 'day': dayrange, 'time': [ '00:00', '01:00', '02:00', @@ -132,7 +146,7 @@ def ERAlevelAscent(year, month, dayrange, start_lat, start_lon, name): ascent.download(name) -def ERAlevelFloat(year, month, dayrange, nlim, slim, elim, wlim, name): +def ERAlevelFloat(yr, mon, dayrange, nlim, slim, elim, wlim, name): floating = cdsapi.Client().retrieve( 'reanalysis-era5-pressure-levels', { @@ -146,8 +160,8 @@ def ERAlevelFloat(year, month, dayrange, nlim, slim, elim, wlim, name): '5', '7', '10', '20', ], - 'year': str(year), - 'month': str(month), + 'year': str(yr), + 'month': str(mon), 'day': dayrange, 'time': [ '00:00', '01:00', '02:00', @@ -167,76 +181,213 @@ def ERAlevelFloat(year, month, dayrange, nlim, slim, elim, wlim, name): floating.download(name) -ERAlevelAscent(start.year, start.month, startdays, start_lat, start_lon, os.path.join(folder, "ascent1.nc")) -ERAlevelAscent(start.year, start.month + 1, endascent, start_lat, start_lon, os.path.join(folder, "ascent2.nc")) +try: + os.makedirs(folder) +except FileExistsError: + pass -ERAsingle(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "single" + str(start.month) + ".nc")) -ERAlevelFloat(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "float" + str(start.month) + ".nc")) +start = datetime.fromisoformat(startdate) +end = datetime.fromisoformat(enddate) -for m in range(end.month - start.month - 1): - ERAsingle(start.year, start.month + m + 1, days, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "single" + str(start.month + m + 1) + ".nc")) - ERAlevelFloat(start.year, start.month + m + 1, days, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "float" + str(start.month + m + 1) + ".nc")) -ERAsingle(start.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "single" + str(end.month) + ".nc")) -ERAlevelFloat(start.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "float" + str(end.month) + ".nc")) +""" +Download of ERA5-Data: +""" +if download == True: + if start.month == 12: + next_month = 1 + year2 = start.year + 1 + else: + next_month = start.month + 1 + year2 = start.year -#""" + if start.month == end.month: + month = 'same' + print("same") + elif end.month - start.month == 1 or end.month - start.month == -11: + month = 'next' + print("next") + else: + month = 'more' + print("more") + + if start.day + ascent_window > (date(year2, next_month, 1) - date(start.year, start.month, 1)).days: + short_ascent = 0 + startdays = [str(start.day + i).zfill(2) for i in range( + (date(start.year, start.month + 1, 1) - date(start.year, start.month, 1)).days - start.day + 1)] + endascent = [str(i + 1).zfill(2) for i in range(ascent_window - len(startdays) + 1)] + print(startdays) + else: + short_ascent = 1 + startdays = [str(start.day + i).zfill(2) for i in range(ascent_window)] + print(startdays) + + if end.year == start.year: + same_year = 1 + else: + same_year = 0 + + if month == 'more': + days = [str(i + 1).zfill(2) for i in range(31)] + + if month != 'same': + endfloat = [str(i + 1).zfill(2) for i in range(end.day)] + + if short_ascent == 1: + ERAlevelAscent(start.year, start.month, startdays, start_lat, start_lon, os.path.join(folder, "ascent1.nc")) + else: + ERAlevelAscent(start.year, start.month, startdays, start_lat, start_lon, os.path.join(folder, "ascent1.nc")) + ERAlevelAscent(year2, next_month, endascent, start_lat, start_lon, os.path.join(folder, "ascent2.nc")) + + if ascent_only == True: + pass + else: + if month == 'same': + ERAsingle(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim, + os.path.join(folder, "single1.nc")) + ERAlevelFloat(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim, + os.path.join(folder, "float1.nc")) + elif month == 'next': + ERAsingle(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim, + os.path.join(folder, "single1.nc")) + ERAsingle(year2, end.month, endfloat, north_lim, south_lim, east_lim, west_lim, + os.path.join(folder, "single2.nc")) + ERAlevelFloat(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim, + os.path.join(folder, "float1.nc")) + ERAlevelFloat(year2, end.month, endfloat, north_lim, south_lim, east_lim, west_lim, + os.path.join(folder, "float2.nc")) + else: + ERAsingle(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim, + os.path.join(folder, "single1.nc")) + ERAlevelFloat(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim, + os.path.join(folder, "float1.nc")) + + if same_year == 1: + for m in range(end.month - start.month - 1): + ERAsingle(start.year, start.month + 1 + m, days, north_lim, south_lim, east_lim, west_lim, + os.path.join(folder, "single" + str(m + 2) + ".nc")) + ERAlevelFloat(start.year, start.month + 1 + m, days, north_lim, south_lim, east_lim, west_lim, + os.path.join(folder, "float" + str(m + 2) + ".nc")) + + ERAsingle(start.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim, + os.path.join(folder, "single" + str(end.month - start.month + 1) + ".nc")) + ERAlevelFloat(start.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim, + os.path.join(folder, "float" + str(end.month - start.month + 1) + ".nc")) + else: + count1 = 0 + count2 = 0 + for m in range(12 - start.month): + ERAsingle(start.year, start.month + 1 + m, days, north_lim, south_lim, east_lim, west_lim, + os.path.join(folder, "single" + str(m + 2) + ".nc")) + ERAlevelFloat(start.year, start.month + 1 + m, days, north_lim, south_lim, east_lim, west_lim, + os.path.join(folder, "float" + str(m + 2) + ".nc")) + count1 = m + 2 + for m in range(end.month - 1): + ERAsingle(end.year, 1 + m, days, north_lim, south_lim, east_lim, west_lim, + os.path.join(folder, "single" + str(count1 + m + 1) + ".nc")) + ERAlevelFloat(end.year, 1 + m, days, north_lim, south_lim, east_lim, west_lim, + os.path.join(folder, "float" + str(count1 + m + 1) + ".nc")) + count2 = count1 + m + 1 + + ERAsingle(end.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim, + os.path.join(folder, "single" + str(count2 + 1) + ".nc")) + ERAlevelFloat(end.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim, + os.path.join(folder, "float" + str(count2 + 1) + ".nc")) +else: + pass -# STITCHING OF MULTIPLE *.NC-FILES TO ONE: floatfiles = [] singlefiles = [] ascentfiles = [] - for (root, dirs, files) in os.walk("ERA5"): for name in files: if name.startswith("float"): floatfiles.append(os.path.join(folder, str(name))) - elif name.startswith("radiation"): + elif name.startswith("single"): singlefiles.append(os.path.join(folder, str(name))) - else: + elif name.startswith("ascent"): ascentfiles.append(os.path.join(folder, str(name))) - - -startfile = Dataset(floatfiles[0], "r", format="NETCDF4") -endfile = Dataset(floatfiles[-1], "r", format="NETCDF4") - -tstart = int(startfile.variables['time'][0]) -tend = int(endfile.variables['time'][-1]) - -startfile.close() -endfile.close() - -df1 = xr.open_mfdataset(floatfiles, chunks={'time': 100}, combine="nested", engine='netcdf4', concat_dim="time", parallel=True) -df1 = df1.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1)) -df1.to_netcdf(os.path.join(folder, "FLOAT_" + str(ident) + "_" + str(start.year) + "_" + str(start.month) + "to" + str(end.year) + "_" + str(end.month) + ".nc"), mode='w', format="NETCDF4", engine="netcdf4", encoding={"z": {"dtype": "float32"}, "t": {"dtype": "float32"}, "u": {"dtype": "float32"}, "v": {"dtype": "float32"}, "w": {"dtype": "float32"}}) - -df2 = xr.open_mfdataset(singlefiles, chunks={'time': 500}, combine="nested", engine='netcdf4', concat_dim="time", parallel=True) -df2 = df2.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1)) -df2.to_netcdf(os.path.join(folder, "SINGLE_" + str(ident) + "_" + str(start.year) + "_" + str(start.month) + "to" + str(end.year) + "_" + str(end.month) + ".nc"), mode='w', format="NETCDF4", engine="netcdf4", encoding={"cbh": {"dtype": "float32"}, "hcc": {"dtype": "float32"}, "lcc": {"dtype": "float32"}, "mcc": {"dtype": "float32"}, "skt": {"dtype": "float32"}, "ssr": {"dtype": "float32"}, "str": {"dtype": "float32"}, "sp": {"dtype": "float32"}, "ssrd": {"dtype": "float32"}, "strdc": {"dtype": "float32"}, "strd": {"dtype": "float32"}, "tisr": {"dtype": "float32"}, "tsr": {"dtype": "float32"}, "ttr": {"dtype": "float32"}, "tcc": {"dtype": "float32"}, "fdir": {"dtype": "float32"}}) - -startfile = Dataset(ascentfiles[0], "r", format="NETCDF4") -endfile = Dataset(ascentfiles[-1], "r", format="NETCDF4") - -tstart = int(startfile.variables['time'][0]) -tend = int(endfile.variables['time'][-1]) - -startfile.close() -endfile.close() - -df3 = xr.open_mfdataset(ascentfiles, chunks={'time': 800}, combine="nested", engine='netcdf4', concat_dim="time", parallel=True) -df3 = df3.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1)) -df3.to_netcdf(os.path.join(folder, "ASCENT_" + str(ident) + "_" + str(start.year) + "_" + str(start.month) + ".nc"), mode='w', format="NETCDF4", engine="netcdf4", encoding={"z": {"dtype": "float32"}, "t": {"dtype": "float32"}, "u": {"dtype": "float32"}, "v": {"dtype": "float32"}, "w": {"dtype": "float32"}}) - - -# DELETING ORIGINAL FILES: - -""" -for (root, dirs, files) in os.walk("ERA5"): - for name in files: - if name in floatfiles + singlefiles + ascentfiles: - os.remove(name) else: pass -""" \ No newline at end of file + + +""" +Stitching of multiple *.nc-files to one: +""" +if stitch == True: + startfile = Dataset(floatfiles[0], "r", format="NETCDF4") + endfile = Dataset(floatfiles[-1], "r", format="NETCDF4") + + tstart = int(startfile.variables['time'][0]) + tend = int(endfile.variables['time'][-1]) + + startfile.close() + endfile.close() + + print("Stitching files, please wait...") + + if ascent_only == True: + pass + else: + df1 = xr.open_mfdataset(floatfiles, chunks={'time': 100}, combine="nested", engine='netcdf4', concat_dim="time", + parallel=True) + df1 = df1.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1)) + df1.to_netcdf(os.path.join(folder, + "FLOAT_" + str(ident2) + "_" + str(start.year) + str(start.month) + "_to_" + str( + end.year) + str(end.month) + ".nc"), mode='w', format="NETCDF4", + engine="netcdf4", + encoding={"z": {"dtype": "float32"}, "t": {"dtype": "float32"}, "u": {"dtype": "float32"}, + "v": {"dtype": "float32"}, "w": {"dtype": "float32"}}) + + df2 = xr.open_mfdataset(singlefiles, chunks={'time': 500}, combine="nested", engine='netcdf4', + concat_dim="time", + parallel=True) + df2 = df2.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1)) + df2.to_netcdf(os.path.join(folder, + "RAD_" + str(ident2) + "_" + str(start.year) + str(start.month) + "_to_" + str( + end.year) + str(end.month) + ".nc"), mode='w', format="NETCDF4", + engine="netcdf4", + encoding={"cbh": {"dtype": "float32"}, "hcc": {"dtype": "float32"}, "lcc": {"dtype": "float32"}, + "mcc": {"dtype": "float32"}, "skt": {"dtype": "float32"}, "ssr": {"dtype": "float32"}, + "str": {"dtype": "float32"}, "sp": {"dtype": "float32"}, "ssrd": {"dtype": "float32"}, + "strdc": {"dtype": "float32"}, "strd": {"dtype": "float32"}, + "tisr": {"dtype": "float32"}, + "tsr": {"dtype": "float32"}, "ttr": {"dtype": "float32"}, "tcc": {"dtype": "float32"}, + "fdir": {"dtype": "float32"}}) + + + startfile = Dataset(ascentfiles[0], "r", format="NETCDF4") + endfile = Dataset(ascentfiles[-1], "r", format="NETCDF4") + + tstart = int(startfile.variables['time'][0]) + tend = int(endfile.variables['time'][-1]) + + startfile.close() + endfile.close() + + df3 = xr.open_mfdataset(ascentfiles, chunks={'time': 800}, combine="nested", engine='netcdf4', concat_dim="time", + parallel=True) + df3 = df3.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1)) + df3.to_netcdf(os.path.join(folder, "ASCENT_" + str(ident1) + "_" + str(start.year) + str(start.month) + ".nc"), + mode='w', format="NETCDF4", engine="netcdf4", + encoding={"z": {"dtype": "float32"}, "t": {"dtype": "float32"}, "u": {"dtype": "float32"}, + "v": {"dtype": "float32"}, "w": {"dtype": "float32"}}) + + print("Stitching finished!") +else: + pass + + +""" +Deleting original files: +""" +if delete == True: + print("Deleting obsolete files.") + for (root, dirs, files) in os.walk("ERA5"): + for name in files: + if os.path.join(folder, name) in floatfiles + singlefiles + ascentfiles: + os.remove(os.path.join(folder, name)) + else: + pass