Script for download and stitching of atmospheric data

This commit is contained in:
Marcel Christian Frommelt 2021-06-22 22:14:28 +09:00
parent 65d80b00ee
commit 4db0f70ad1
1 changed files with 258 additions and 107 deletions

View File

@ -1,54 +1,68 @@
"""
ERA5 Download- and Stitch-Tool
by Marcel Frommelt
"""
"""
INPUT SECTION
"""
folder = "ERA5" # name for output folder (either already existing or not, if not it will be created)
ident1 = "McMurdo" # unique identifier for ERA5 ascent data (suggestion: launch location)
ident2 = "Antarctica" # unique identifier for ERA float and radiation data (e. g. "southern polar region")
"""
Balloon launch location on ground (LATitude/LONgitude in degrees):
"""
start_lat = -77.8535 # McMurdo
start_lon = 167.2022
# ---------------------------------
# start_lat = 67.887382 # Kiruna
# start_lon = 21.081452
# ---------------------------------
# start_lat = 78.22 # Svalbard
# start_lon = 15.65
"""
Geographical boundaries for atmospheric data (in degrees):
"""
# north_lim = 90
# west_lim, east_lim = -180, 180 # suggestion for northern polar region
# south_lim = 45
north_lim = -45
west_lim, east_lim = -180, 180 # suggestion for southern polar region
south_lim = -90
"""
Start and end time for atmospheric data:
"""
startdate = '2012-12-08' # also acts as launch data of ERA5 data for ascent
enddate = '2013-02-01'
ascent_window = 10 # time length of ERA5 data for ascent phase in (full) days from start date
"""
Operations you want to be performed:
"""
ascent_only = False # True # get only ERA5 data for ascent (e. g. for new location or time within same region)
download = True # True # download raw ERA5 files
stitch = False # False # create stitched files
delete = False # False # delete original files
"""
EXECUTE SECTION
"""
import os
import sys
import cdsapi
from datetime import datetime
from datetime import date
import numpy as np
import xarray as xr
from netCDF4 import Dataset
from dask.diagnostics import ProgressBar
folder = "ERA5"
ident = "McMurdo"
north_lim, south_lim, east_lim, west_lim = 90, 45, 180, -180 # Northern Polar Region
# north_lim, south_lim, east_lim, west_lim = -45, -90, 180, -180 # Southern Polar Region
start_lat = 67.887382 # Kiruna
start_lon = 21.081452
# SOME START LOCATIONS:
# start_lat = 78.22 # Svalbard
# start_lon = 15.65
#
# start_lat = -77.8535 # McMurdo
# start_lon = 167.2022
startdate = '2019-12-15'
enddate = '2020-01-10'
try:
os.makedirs(folder)
except FileExistsError:
pass
start = datetime.fromisoformat(startdate)
end = datetime.fromisoformat(enddate)
#"""
# DOWNLOAD OF ERA5-DATA:
startdays = [str(start.day+i).zfill(2) for i in range((date(start.year, start.month + 1, 1) - date(start.year, start.month, 1)).days - start.day + 1)]
endascent = [str(i+1).zfill(2) for i in range(30 - len(startdays))]
days = [str(i+1).zfill(2) for i in range(31)]
endfloat = [str(i+1).zfill(2) for i in range(end.day)]
def ERAsingle(year, month, days, nlim, slim, elim, wlim, name):
def ERAsingle(yr, mon, dayrange, nlim, slim, elim, wlim, name):
single = cdsapi.Client().retrieve(
'reanalysis-era5-single-levels',
{
@ -71,9 +85,9 @@ def ERAsingle(year, month, days, nlim, slim, elim, wlim, name):
'18:00', '19:00', '20:00',
'21:00', '22:00', '23:00',
],
'year': str(year),
'month': str(month),
'day': days,
'year': str(yr),
'month': str(mon),
'day': dayrange,
'area': [
nlim, wlim, slim, elim,
],
@ -82,7 +96,7 @@ def ERAsingle(year, month, days, nlim, slim, elim, wlim, name):
single.download(name)
def ERAlevelAscent(year, month, dayrange, start_lat, start_lon, name):
def ERAlevelAscent(yr, mon, dayrange, start_lat, start_lon, name):
nlim = start_lat + 10.0
slim = start_lat - 10.0
elim = start_lon + 10.0
@ -111,8 +125,8 @@ def ERAlevelAscent(year, month, dayrange, start_lat, start_lon, name):
'925', '950', '975',
'1000',
],
'year': str(year),
'month': str(month),
'year': str(yr),
'month': str(mon),
'day': dayrange,
'time': [
'00:00', '01:00', '02:00',
@ -132,7 +146,7 @@ def ERAlevelAscent(year, month, dayrange, start_lat, start_lon, name):
ascent.download(name)
def ERAlevelFloat(year, month, dayrange, nlim, slim, elim, wlim, name):
def ERAlevelFloat(yr, mon, dayrange, nlim, slim, elim, wlim, name):
floating = cdsapi.Client().retrieve(
'reanalysis-era5-pressure-levels',
{
@ -146,8 +160,8 @@ def ERAlevelFloat(year, month, dayrange, nlim, slim, elim, wlim, name):
'5', '7', '10',
'20',
],
'year': str(year),
'month': str(month),
'year': str(yr),
'month': str(mon),
'day': dayrange,
'time': [
'00:00', '01:00', '02:00',
@ -167,76 +181,213 @@ def ERAlevelFloat(year, month, dayrange, nlim, slim, elim, wlim, name):
floating.download(name)
ERAlevelAscent(start.year, start.month, startdays, start_lat, start_lon, os.path.join(folder, "ascent1.nc"))
ERAlevelAscent(start.year, start.month + 1, endascent, start_lat, start_lon, os.path.join(folder, "ascent2.nc"))
try:
os.makedirs(folder)
except FileExistsError:
pass
ERAsingle(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "single" + str(start.month) + ".nc"))
ERAlevelFloat(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "float" + str(start.month) + ".nc"))
start = datetime.fromisoformat(startdate)
end = datetime.fromisoformat(enddate)
for m in range(end.month - start.month - 1):
ERAsingle(start.year, start.month + m + 1, days, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "single" + str(start.month + m + 1) + ".nc"))
ERAlevelFloat(start.year, start.month + m + 1, days, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "float" + str(start.month + m + 1) + ".nc"))
ERAsingle(start.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "single" + str(end.month) + ".nc"))
ERAlevelFloat(start.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "float" + str(end.month) + ".nc"))
"""
Download of ERA5-Data:
"""
if download == True:
if start.month == 12:
next_month = 1
year2 = start.year + 1
else:
next_month = start.month + 1
year2 = start.year
#"""
if start.month == end.month:
month = 'same'
print("same")
elif end.month - start.month == 1 or end.month - start.month == -11:
month = 'next'
print("next")
else:
month = 'more'
print("more")
if start.day + ascent_window > (date(year2, next_month, 1) - date(start.year, start.month, 1)).days:
short_ascent = 0
startdays = [str(start.day + i).zfill(2) for i in range(
(date(start.year, start.month + 1, 1) - date(start.year, start.month, 1)).days - start.day + 1)]
endascent = [str(i + 1).zfill(2) for i in range(ascent_window - len(startdays) + 1)]
print(startdays)
else:
short_ascent = 1
startdays = [str(start.day + i).zfill(2) for i in range(ascent_window)]
print(startdays)
if end.year == start.year:
same_year = 1
else:
same_year = 0
if month == 'more':
days = [str(i + 1).zfill(2) for i in range(31)]
if month != 'same':
endfloat = [str(i + 1).zfill(2) for i in range(end.day)]
if short_ascent == 1:
ERAlevelAscent(start.year, start.month, startdays, start_lat, start_lon, os.path.join(folder, "ascent1.nc"))
else:
ERAlevelAscent(start.year, start.month, startdays, start_lat, start_lon, os.path.join(folder, "ascent1.nc"))
ERAlevelAscent(year2, next_month, endascent, start_lat, start_lon, os.path.join(folder, "ascent2.nc"))
if ascent_only == True:
pass
else:
if month == 'same':
ERAsingle(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single1.nc"))
ERAlevelFloat(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float1.nc"))
elif month == 'next':
ERAsingle(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single1.nc"))
ERAsingle(year2, end.month, endfloat, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single2.nc"))
ERAlevelFloat(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float1.nc"))
ERAlevelFloat(year2, end.month, endfloat, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float2.nc"))
else:
ERAsingle(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single1.nc"))
ERAlevelFloat(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float1.nc"))
if same_year == 1:
for m in range(end.month - start.month - 1):
ERAsingle(start.year, start.month + 1 + m, days, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single" + str(m + 2) + ".nc"))
ERAlevelFloat(start.year, start.month + 1 + m, days, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float" + str(m + 2) + ".nc"))
ERAsingle(start.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single" + str(end.month - start.month + 1) + ".nc"))
ERAlevelFloat(start.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float" + str(end.month - start.month + 1) + ".nc"))
else:
count1 = 0
count2 = 0
for m in range(12 - start.month):
ERAsingle(start.year, start.month + 1 + m, days, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single" + str(m + 2) + ".nc"))
ERAlevelFloat(start.year, start.month + 1 + m, days, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float" + str(m + 2) + ".nc"))
count1 = m + 2
for m in range(end.month - 1):
ERAsingle(end.year, 1 + m, days, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single" + str(count1 + m + 1) + ".nc"))
ERAlevelFloat(end.year, 1 + m, days, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float" + str(count1 + m + 1) + ".nc"))
count2 = count1 + m + 1
ERAsingle(end.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "single" + str(count2 + 1) + ".nc"))
ERAlevelFloat(end.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim,
os.path.join(folder, "float" + str(count2 + 1) + ".nc"))
else:
pass
# STITCHING OF MULTIPLE *.NC-FILES TO ONE:
floatfiles = []
singlefiles = []
ascentfiles = []
for (root, dirs, files) in os.walk("ERA5"):
for name in files:
if name.startswith("float"):
floatfiles.append(os.path.join(folder, str(name)))
elif name.startswith("radiation"):
elif name.startswith("single"):
singlefiles.append(os.path.join(folder, str(name)))
else:
elif name.startswith("ascent"):
ascentfiles.append(os.path.join(folder, str(name)))
startfile = Dataset(floatfiles[0], "r", format="NETCDF4")
endfile = Dataset(floatfiles[-1], "r", format="NETCDF4")
tstart = int(startfile.variables['time'][0])
tend = int(endfile.variables['time'][-1])
startfile.close()
endfile.close()
df1 = xr.open_mfdataset(floatfiles, chunks={'time': 100}, combine="nested", engine='netcdf4', concat_dim="time", parallel=True)
df1 = df1.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1))
df1.to_netcdf(os.path.join(folder, "FLOAT_" + str(ident) + "_" + str(start.year) + "_" + str(start.month) + "to" + str(end.year) + "_" + str(end.month) + ".nc"), mode='w', format="NETCDF4", engine="netcdf4", encoding={"z": {"dtype": "float32"}, "t": {"dtype": "float32"}, "u": {"dtype": "float32"}, "v": {"dtype": "float32"}, "w": {"dtype": "float32"}})
df2 = xr.open_mfdataset(singlefiles, chunks={'time': 500}, combine="nested", engine='netcdf4', concat_dim="time", parallel=True)
df2 = df2.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1))
df2.to_netcdf(os.path.join(folder, "SINGLE_" + str(ident) + "_" + str(start.year) + "_" + str(start.month) + "to" + str(end.year) + "_" + str(end.month) + ".nc"), mode='w', format="NETCDF4", engine="netcdf4", encoding={"cbh": {"dtype": "float32"}, "hcc": {"dtype": "float32"}, "lcc": {"dtype": "float32"}, "mcc": {"dtype": "float32"}, "skt": {"dtype": "float32"}, "ssr": {"dtype": "float32"}, "str": {"dtype": "float32"}, "sp": {"dtype": "float32"}, "ssrd": {"dtype": "float32"}, "strdc": {"dtype": "float32"}, "strd": {"dtype": "float32"}, "tisr": {"dtype": "float32"}, "tsr": {"dtype": "float32"}, "ttr": {"dtype": "float32"}, "tcc": {"dtype": "float32"}, "fdir": {"dtype": "float32"}})
startfile = Dataset(ascentfiles[0], "r", format="NETCDF4")
endfile = Dataset(ascentfiles[-1], "r", format="NETCDF4")
tstart = int(startfile.variables['time'][0])
tend = int(endfile.variables['time'][-1])
startfile.close()
endfile.close()
df3 = xr.open_mfdataset(ascentfiles, chunks={'time': 800}, combine="nested", engine='netcdf4', concat_dim="time", parallel=True)
df3 = df3.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1))
df3.to_netcdf(os.path.join(folder, "ASCENT_" + str(ident) + "_" + str(start.year) + "_" + str(start.month) + ".nc"), mode='w', format="NETCDF4", engine="netcdf4", encoding={"z": {"dtype": "float32"}, "t": {"dtype": "float32"}, "u": {"dtype": "float32"}, "v": {"dtype": "float32"}, "w": {"dtype": "float32"}})
# DELETING ORIGINAL FILES:
"""
for (root, dirs, files) in os.walk("ERA5"):
for name in files:
if name in floatfiles + singlefiles + ascentfiles:
os.remove(name)
else:
pass
"""
"""
Stitching of multiple *.nc-files to one:
"""
if stitch == True:
startfile = Dataset(floatfiles[0], "r", format="NETCDF4")
endfile = Dataset(floatfiles[-1], "r", format="NETCDF4")
tstart = int(startfile.variables['time'][0])
tend = int(endfile.variables['time'][-1])
startfile.close()
endfile.close()
print("Stitching files, please wait...")
if ascent_only == True:
pass
else:
df1 = xr.open_mfdataset(floatfiles, chunks={'time': 100}, combine="nested", engine='netcdf4', concat_dim="time",
parallel=True)
df1 = df1.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1))
df1.to_netcdf(os.path.join(folder,
"FLOAT_" + str(ident2) + "_" + str(start.year) + str(start.month) + "_to_" + str(
end.year) + str(end.month) + ".nc"), mode='w', format="NETCDF4",
engine="netcdf4",
encoding={"z": {"dtype": "float32"}, "t": {"dtype": "float32"}, "u": {"dtype": "float32"},
"v": {"dtype": "float32"}, "w": {"dtype": "float32"}})
df2 = xr.open_mfdataset(singlefiles, chunks={'time': 500}, combine="nested", engine='netcdf4',
concat_dim="time",
parallel=True)
df2 = df2.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1))
df2.to_netcdf(os.path.join(folder,
"RAD_" + str(ident2) + "_" + str(start.year) + str(start.month) + "_to_" + str(
end.year) + str(end.month) + ".nc"), mode='w', format="NETCDF4",
engine="netcdf4",
encoding={"cbh": {"dtype": "float32"}, "hcc": {"dtype": "float32"}, "lcc": {"dtype": "float32"},
"mcc": {"dtype": "float32"}, "skt": {"dtype": "float32"}, "ssr": {"dtype": "float32"},
"str": {"dtype": "float32"}, "sp": {"dtype": "float32"}, "ssrd": {"dtype": "float32"},
"strdc": {"dtype": "float32"}, "strd": {"dtype": "float32"},
"tisr": {"dtype": "float32"},
"tsr": {"dtype": "float32"}, "ttr": {"dtype": "float32"}, "tcc": {"dtype": "float32"},
"fdir": {"dtype": "float32"}})
startfile = Dataset(ascentfiles[0], "r", format="NETCDF4")
endfile = Dataset(ascentfiles[-1], "r", format="NETCDF4")
tstart = int(startfile.variables['time'][0])
tend = int(endfile.variables['time'][-1])
startfile.close()
endfile.close()
df3 = xr.open_mfdataset(ascentfiles, chunks={'time': 800}, combine="nested", engine='netcdf4', concat_dim="time",
parallel=True)
df3 = df3.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1))
df3.to_netcdf(os.path.join(folder, "ASCENT_" + str(ident1) + "_" + str(start.year) + str(start.month) + ".nc"),
mode='w', format="NETCDF4", engine="netcdf4",
encoding={"z": {"dtype": "float32"}, "t": {"dtype": "float32"}, "u": {"dtype": "float32"},
"v": {"dtype": "float32"}, "w": {"dtype": "float32"}})
print("Stitching finished!")
else:
pass
"""
Deleting original files:
"""
if delete == True:
print("Deleting obsolete files.")
for (root, dirs, files) in os.walk("ERA5"):
for name in files:
if os.path.join(folder, name) in floatfiles + singlefiles + ascentfiles:
os.remove(os.path.join(folder, name))
else:
pass