BASTET/DataRequest.py

242 lines
9.4 KiB
Python
Raw Normal View History

2021-06-18 16:24:18 +02:00
import os
import sys
import cdsapi
2021-06-18 16:24:18 +02:00
from datetime import datetime
from datetime import date
import numpy as np
import xarray as xr
from netCDF4 import Dataset
from dask.diagnostics import ProgressBar
2021-06-18 16:24:18 +02:00
folder = "ERA5"
ident = "McMurdo"
north_lim, south_lim, east_lim, west_lim = 90, 45, 180, -180 # Northern Polar Region
# north_lim, south_lim, east_lim, west_lim = -45, -90, 180, -180 # Southern Polar Region
start_lat = 67.887382 # Kiruna
start_lon = 21.081452
# SOME START LOCATIONS:
# start_lat = 78.22 # Svalbard
# start_lon = 15.65
2021-06-18 16:24:18 +02:00
#
# start_lat = -77.8535 # McMurdo
# start_lon = 167.2022
startdate = '2019-12-15'
enddate = '2020-01-10'
try:
os.makedirs(folder)
except FileExistsError:
pass
2021-06-18 16:24:18 +02:00
start = datetime.fromisoformat(startdate)
end = datetime.fromisoformat(enddate)
2021-06-18 16:24:18 +02:00
#"""
# DOWNLOAD OF ERA5-DATA:
2021-06-18 16:24:18 +02:00
startdays = [str(start.day+i).zfill(2) for i in range((date(start.year, start.month + 1, 1) - date(start.year, start.month, 1)).days - start.day + 1)]
endascent = [str(i+1).zfill(2) for i in range(30 - len(startdays))]
days = [str(i+1).zfill(2) for i in range(31)]
endfloat = [str(i+1).zfill(2) for i in range(end.day)]
def ERAsingle(year, month, days, nlim, slim, elim, wlim, name):
single = cdsapi.Client().retrieve(
'reanalysis-era5-single-levels',
{
'product_type': 'reanalysis',
'variable': [
'cloud_base_height', 'high_cloud_cover', 'low_cloud_cover', 'medium_cloud_cover',
'skin_temperature', 'surface_net_solar_radiation', 'surface_net_thermal_radiation',
'surface_pressure', 'surface_solar_radiation_downwards', 'surface_thermal_radiation_downward_clear_sky',
'surface_thermal_radiation_downwards', 'toa_incident_solar_radiation',
'top_net_solar_radiation', 'top_net_thermal_radiation', 'total_cloud_cover',
'total_sky_direct_solar_radiation_at_surface',
],
'time': [
'00:00', '01:00', '02:00',
'03:00', '04:00', '05:00',
'06:00', '07:00', '08:00',
'09:00', '10:00', '11:00',
'12:00', '13:00', '14:00',
'15:00', '16:00', '17:00',
'18:00', '19:00', '20:00',
'21:00', '22:00', '23:00',
],
'year': str(year),
'month': str(month),
'day': days,
'area': [
2021-06-18 16:24:18 +02:00
nlim, wlim, slim, elim,
],
2021-06-18 16:24:18 +02:00
'format': 'netcdf',
})
single.download(name)
2021-06-18 16:24:18 +02:00
def ERAlevelAscent(year, month, dayrange, start_lat, start_lon, name):
nlim = start_lat + 10.0
slim = start_lat - 10.0
elim = start_lon + 10.0
wlim = start_lon - 10.0
2021-06-18 16:24:18 +02:00
ascent = cdsapi.Client().retrieve(
'reanalysis-era5-pressure-levels',
{
'product_type': 'reanalysis',
'variable': [
'geopotential', 'temperature', 'u_component_of_wind',
'v_component_of_wind', 'vertical_velocity',
],
'pressure_level': [
'1', '2', '3',
'5', '7', '10',
'20', '30', '50',
'70', '100', '125',
'150', '175', '200',
'225', '250', '300',
'350', '400', '450',
'500', '550', '600',
'650', '700', '750',
'775', '800', '825',
'850', '875', '900',
'925', '950', '975',
'1000',
],
'year': str(year),
'month': str(month),
2021-06-18 16:24:18 +02:00
'day': dayrange,
'time': [
'00:00', '01:00', '02:00',
'03:00', '04:00', '05:00',
'06:00', '07:00', '08:00',
'09:00', '10:00', '11:00',
'12:00', '13:00', '14:00',
'15:00', '16:00', '17:00',
'18:00', '19:00', '20:00',
'21:00', '22:00', '23:00',
],
'area': [
2021-06-18 16:24:18 +02:00
nlim, wlim, slim, elim,
],
2021-06-18 16:24:18 +02:00
'format': 'netcdf',
})
2021-06-18 16:24:18 +02:00
ascent.download(name)
2021-06-18 16:24:18 +02:00
def ERAlevelFloat(year, month, dayrange, nlim, slim, elim, wlim, name):
floating = cdsapi.Client().retrieve(
'reanalysis-era5-pressure-levels',
{
'product_type': 'reanalysis',
'variable': [
'geopotential', 'temperature', 'u_component_of_wind',
'v_component_of_wind', 'vertical_velocity',
],
'pressure_level': [
'1', '2', '3',
'5', '7', '10',
'20',
],
'year': str(year),
'month': str(month),
2021-06-18 16:24:18 +02:00
'day': dayrange,
'time': [
'00:00', '01:00', '02:00',
'03:00', '04:00', '05:00',
'06:00', '07:00', '08:00',
'09:00', '10:00', '11:00',
'12:00', '13:00', '14:00',
'15:00', '16:00', '17:00',
'18:00', '19:00', '20:00',
'21:00', '22:00', '23:00',
],
'area': [
2021-06-18 16:24:18 +02:00
nlim, wlim, slim, elim,
],
2021-06-18 16:24:18 +02:00
'format': 'netcdf',
})
floating.download(name)
2021-06-18 16:24:18 +02:00
ERAlevelAscent(start.year, start.month, startdays, start_lat, start_lon, os.path.join(folder, "ascent1.nc"))
ERAlevelAscent(start.year, start.month + 1, endascent, start_lat, start_lon, os.path.join(folder, "ascent2.nc"))
ERAsingle(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "single" + str(start.month) + ".nc"))
ERAlevelFloat(start.year, start.month, startdays, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "float" + str(start.month) + ".nc"))
for m in range(end.month - start.month - 1):
ERAsingle(start.year, start.month + m + 1, days, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "single" + str(start.month + m + 1) + ".nc"))
ERAlevelFloat(start.year, start.month + m + 1, days, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "float" + str(start.month + m + 1) + ".nc"))
ERAsingle(start.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "single" + str(end.month) + ".nc"))
ERAlevelFloat(start.year, end.month, endfloat, north_lim, south_lim, east_lim, west_lim, os.path.join(folder, "float" + str(end.month) + ".nc"))
#"""
# STITCHING OF MULTIPLE *.NC-FILES TO ONE:
floatfiles = []
singlefiles = []
ascentfiles = []
for (root, dirs, files) in os.walk("ERA5"):
for name in files:
if name.startswith("float"):
floatfiles.append(os.path.join(folder, str(name)))
elif name.startswith("radiation"):
singlefiles.append(os.path.join(folder, str(name)))
else:
ascentfiles.append(os.path.join(folder, str(name)))
startfile = Dataset(floatfiles[0], "r", format="NETCDF4")
endfile = Dataset(floatfiles[-1], "r", format="NETCDF4")
tstart = int(startfile.variables['time'][0])
tend = int(endfile.variables['time'][-1])
startfile.close()
endfile.close()
df1 = xr.open_mfdataset(floatfiles, chunks={'time': 100}, combine="nested", engine='netcdf4', concat_dim="time", parallel=True)
df1 = df1.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1))
df1.to_netcdf(os.path.join(folder, "FLOAT_" + str(ident) + "_" + str(start.year) + "_" + str(start.month) + "to" + str(end.year) + "_" + str(end.month) + ".nc"), mode='w', format="NETCDF4", engine="netcdf4", encoding={"z": {"dtype": "float32"}, "t": {"dtype": "float32"}, "u": {"dtype": "float32"}, "v": {"dtype": "float32"}, "w": {"dtype": "float32"}})
df2 = xr.open_mfdataset(singlefiles, chunks={'time': 500}, combine="nested", engine='netcdf4', concat_dim="time", parallel=True)
df2 = df2.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1))
df2.to_netcdf(os.path.join(folder, "SINGLE_" + str(ident) + "_" + str(start.year) + "_" + str(start.month) + "to" + str(end.year) + "_" + str(end.month) + ".nc"), mode='w', format="NETCDF4", engine="netcdf4", encoding={"cbh": {"dtype": "float32"}, "hcc": {"dtype": "float32"}, "lcc": {"dtype": "float32"}, "mcc": {"dtype": "float32"}, "skt": {"dtype": "float32"}, "ssr": {"dtype": "float32"}, "str": {"dtype": "float32"}, "sp": {"dtype": "float32"}, "ssrd": {"dtype": "float32"}, "strdc": {"dtype": "float32"}, "strd": {"dtype": "float32"}, "tisr": {"dtype": "float32"}, "tsr": {"dtype": "float32"}, "ttr": {"dtype": "float32"}, "tcc": {"dtype": "float32"}, "fdir": {"dtype": "float32"}})
startfile = Dataset(ascentfiles[0], "r", format="NETCDF4")
endfile = Dataset(ascentfiles[-1], "r", format="NETCDF4")
tstart = int(startfile.variables['time'][0])
tend = int(endfile.variables['time'][-1])
2021-06-18 16:24:18 +02:00
startfile.close()
endfile.close()
2021-06-18 16:24:18 +02:00
df3 = xr.open_mfdataset(ascentfiles, chunks={'time': 800}, combine="nested", engine='netcdf4', concat_dim="time", parallel=True)
df3 = df3.assign_coords(time=np.linspace(tstart, tend, (tend - tstart) + 1))
df3.to_netcdf(os.path.join(folder, "ASCENT_" + str(ident) + "_" + str(start.year) + "_" + str(start.month) + ".nc"), mode='w', format="NETCDF4", engine="netcdf4", encoding={"z": {"dtype": "float32"}, "t": {"dtype": "float32"}, "u": {"dtype": "float32"}, "v": {"dtype": "float32"}, "w": {"dtype": "float32"}})
2021-06-18 16:24:18 +02:00
# DELETING ORIGINAL FILES:
2021-06-18 16:24:18 +02:00
"""
for (root, dirs, files) in os.walk("ERA5"):
for name in files:
if name in floatfiles + singlefiles + ascentfiles:
os.remove(name)
else:
pass
"""