diff --git a/.gitignore b/.gitignore index 5a98549..d2b9cc0 100644 --- a/.gitignore +++ b/.gitignore @@ -134,3 +134,5 @@ dmypy.json # Ephemeral .nfs files .nfs* + +.DSStore diff --git a/notebooks/.virtual_documents/.DS_Store b/notebooks/.virtual_documents/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/notebooks/.virtual_documents/.DS_Store differ diff --git a/notebooks/.virtual_documents/cesmLME.ipynb b/notebooks/.virtual_documents/cesmLME.ipynb deleted file mode 100644 index 11e7c3b..0000000 --- a/notebooks/.virtual_documents/cesmLME.ipynb +++ /dev/null @@ -1,83 +0,0 @@ - - - - - - - - - -import s3fs -import fsspec -import xarray as xr -import glob - - - - - -URL = 'https://js2.jetstream-cloud.org:8001/' - - - - - - - - -path = f'pythia/cesmLME' - - - - - - - - - - - -fs = fsspec.filesystem("s3", anon=True, client_kwargs=dict(endpoint_url=URL)) - - -pattern = f's3://{path}/*.nc' - - -files = sorted(fs.glob(pattern)) - - - - - -files[0:2] - - -fileset = [fs.open(file) for file in files[0:2]] - - -data = xr.open_dataset(fileset[0]) - - -data - - - - - -# This works -dataMul = xr.open_mfdataset(fileset[:3], combine='by_coords') - - -dataMul - - - - - - - - - - - - diff --git a/notebooks/.virtual_documents/notebook-template.ipynb b/notebooks/.virtual_documents/notebook-template.ipynb deleted file mode 100644 index e644bea..0000000 --- a/notebooks/.virtual_documents/notebook-template.ipynb +++ /dev/null @@ -1,418 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - -#To deal with model data -import s3fs -import fsspec -import xarray as xr -import glob - -#To deal with proxy data -import pandas as pd -import numpy as np -import json -import requests -import pandas as pd -import io -import ast - -#To deal with analysis -import pyleoclim as pyleo -from eofs.xarray import Eof - -#Plotting and mapping -import cartopy.crs as ccrs -import matplotlib.pyplot as plt -import nc_time_axis - - - - - - - - -URL = 'https://js2.jetstream-cloud.org:8001/' #Locate and read a file - - -path = f'pythia/cesmLME' # specify data location - - -fs = fsspec.filesystem("s3", anon=True, client_kwargs=dict(endpoint_url=URL)) -pattern = f's3://{path}/*.nc' -files = sorted(fs.glob(pattern)) - - - - - -base_name = 'pythia/cesmLME/b.ie12.B1850C5CN.f19_g16.LME.002.cam.h0.' -time_period = '085001-184912' - -names = [name for name in files if base_name in name and time_period in name] - -names - - -fileset = [fs.open(file) for file in names] - - - - - -%%time -for idx,item in enumerate(fileset): - ds_u = xr.open_dataset(item) - var_name = names[idx].split('.')[-3] - da = ds_u[var_name] - try: - ds[var_name]= da - except: - ds = da.to_dataset() - ds.attrs = ds_u.attrs - ds_u.close() - da.close() - - - - - -ds - - - - - -ds_geo = ds.sel(lat=slice(-27,27), lon=slice(250,330)) - - - - - -#ds_geo.to_netcdf(path='../data/LME.002.cam.h0.precip_iso.085001-184912.nc') - - - - - -%%time -p16O = ds_geo['PRECRC_H216Or'] + ds_geo['PRECSC_H216Os'] + ds_geo['PRECRL_H216OR'] + ds_geo['PRECSL_H216OS'] -p18O = ds_geo['PRECRC_H218Or'] + ds_geo['PRECSC_H218Os'] + ds_geo['PRECRL_H218OR'] + ds_geo['PRECSL_H218OS'] - -p16O = p16O.where(p16O > 1e-18, 1e-18) -p18O = p18O.where(p18O > 1e-18, 1e-18) - -d18Op = (p18O / p16O - 1)*1000 - - - - - -d18Oa = (d18Op - d18Op.mean(dim='time'))/d18Op.std(dim='time') - - - - - -solver = Eof(d18Oa, weights=None) - - - - - -eof1 = solver.eofsAsCovariance(neofs=3) - - - - - - - - -url = 'https://linkedearth.graphdb.mint.isi.edu/repositories/LiPDVerse-dynamic' - -query = """PREFIX le: -PREFIX wgs84: -PREFIX rdfs: -SELECT distinct?varID ?dataSetName ?lat ?lon ?varname ?interpLabel ?val ?varunits ?timevarname ?timeval ?timeunits ?archiveType where{ - - ?ds a le:Dataset . - ?ds le:hasName ?dataSetName . - OPTIONAL{?ds le:hasArchiveType ?archiveTypeObj . - ?archiveTypeObj rdfs:label ?archiveType .} - - - ?ds le:hasLocation ?loc . - ?loc wgs84:lat ?lat . - FILTER(?lat<26 && ?lat>-26) - ?loc wgs84:long ?lon . - FILTER(?lon<-70 && ?lon>-150) - - ?ds le:hasPaleoData ?data . - ?data le:hasMeasurementTable ?table . - ?table le:hasVariable ?var . - ?var le:hasName ?varname . - VALUES ?varname {"d18O"} . - ?var le:partOfCompilation ?comp . - ?comp le:hasName ?compName . - VALUES ?compName {"iso2k" "Pages2kTemperature" "CoralHydro2k" "SISAL-LiPD"} . - ?var le:hasInterpretation ?interp . - ?interp le:hasVariable ?interpVar . - ?interpVar rdfs:label ?interpLabel . - FILTER (REGEX(?interpLabel, "precipitation.*", "i")) - ?var le:hasVariableId ?varID . - ?var le:hasValues ?val . - OPTIONAL{?var le:hasUnits ?varunitsObj . - ?varunitsObj rdfs:label ?varunits .} - - ?table le:hasVariable ?timevar . - ?timevar le:hasName ?timevarname . - VALUES ?timevarname {"year" "age"} . - ?timevar le:hasValues ?timeval . - OPTIONAL{?timevar le:hasUnits ?timeunitsObj . - ?timeunitsObj rdfs:label ?timeunits .} -}""" - - - - - -response = requests.post(url, data = {'query': query}) - -data = io.StringIO(response.text) -df_res = pd.read_csv(data, sep=",") - -df_res['val']=df_res['val'].apply(lambda row : json.loads(row) if isinstance(row, str) else row) -df_res['timeval']=df_res['timeval'].apply(lambda row : json.loads(row) if isinstance(row, str) else row) - -df_res.head() - - - - - -len(df_res) - - - - - -df = df_res[~df_res['varID'].duplicated()] - - -len(df) - - - - - -df['timevarname'].unique() - - - - - -df['timeunits'].unique() - - - - - -df['timeval'] = df['timeval'].apply(np.array) - -def adjust_timeval(row): - if row['timevarname'] == 'age': - return 1950 - row['timeval'] - else: - return row['timeval'] - -# Apply the function across the DataFrame rows -df['timeval'] = df.apply(adjust_timeval, axis=1) - - - - - -def range_within_limits(array, lower = 0, upper = 2000, threshold = 1500): - filtered_values = array[(array >= lower) & (array <= upper)] - if filtered_values.size > 0: # Check if there are any values within the range - return np.ptp(filtered_values) >= threshold # np.ptp() returns the range of values - return False # If no values within the range, filter out the row - - -# Apply the function to filter the DataFrame -filtered_df = df[df['timeval'].apply(range_within_limits)] - - - - - -len(filtered_df) - - - - - -def array_range_exceeds(array, threshold=1500): - return np.max(array) - np.min(array) > threshold - -filt_df = filtered_df[filtered_df['timeval'].apply(array_range_exceeds)] - - - - - -len(filt_df) - - - - - -def min_resolution(array, min_res=60): - if len(array) > 1: # Ensure there are at least two points to calculate a difference - # Calculate differences between consecutive elements - differences = np.mean(np.diff(array)) - # Check if the minimum difference is at least 50 - return abs(differences) <= min_res - return False # If less than two elements, can't compute difference - -# Apply the function and filter the DataFrame -filtered_df2 = filt_df[filt_df['timeval'].apply(min_resolution)] - - - - - -len(filtered_df2) - - - - - -ts_list = [] -for _, row in filtered_df2.iterrows(): - ts_list.append(pyleo.GeoSeries(time=row['timeval'],value=row['val'], - time_name='year',value_name=row['varname'], - time_unit='CE', value_unit=row['varunits'], - lat = row['lat'], lon = row['lon'], - archiveType = row['archiveType'], verbose = False, - label=row['dataSetName']+'_'+row['varname'])) - - #print(row['timeval']) - - - - - -mgs = pyleo.MultipleGeoSeries(ts_list, label='HydroAm2k', time_unit='year CE') - - - - - -mgs.map() - - - - - -fig, ax = mgs.sel(time=slice(0,2000)).stackplot(v_shift_factor=1.2) -plt.show(fig) - - - - - -mgs_common = mgs.common_time().standardize() - - -pca = mgs_common.pca() - - - - - -pca.screeplot() - - - - - -pca.modeplot() - - - - - -pca.modeplot(index=1) - - - - - -pca.modeplot(index=2) - - - - - - - - -clevs = np.linspace(-1, 1, 20) -proj = ccrs.PlateCarree(central_longitude=290) -fig, ax = plt.subplots(figsize=[10,4], subplot_kw=dict(projection=proj)) -ax.coastlines() -eof1[0].plot.contourf(ax=ax, levels = clevs, cmap=plt.cm.RdBu_r, - transform=ccrs.PlateCarree(), add_colorbar=True) -fig.axes[1].set_ylabel('') -fig.axes[1].set_yticks(np.arange(-1,1.2,0.2)) -ax.set_title('EOF1 expressed as covariance', fontsize=16) -plt.show() - - - - - -pcs = solver.pcs(npcs=3, pcscaling=1) - - -fig, ax = plt.subplots(figsize=[20,4]) -pcs[:, 0].plot(ax=ax, linewidth=1) - -ax = plt.gca() -ax.axhline(0, color='k') -ax.set_ylim(-3, 3) -ax.set_xlabel('Year') -ax.set_ylabel('Normalized Units') -ax.set_title('PC1 Time Series', fontsize=16) - - - - - - - - - - - -