Running DA with the Command Line Interface (CLI)#
Generating the configs.yml
file#
[1]:
%load_ext autoreload
%autoreload 2
import cfr
print(cfr.__version__)
[2]:
job = cfr.ReconJob()
job.load_proxydb('PAGES2kv2')
[3]:
job.filter_proxydb(by='ptype', keys=['coral.SrCa'])
job.annualize_proxydb(months=[12, 1, 2], ptypes=['coral'])
# model prior: fetching & preprocessing
job.load_clim(tag='prior', path_dict={'tas': 'iCESM_past1000historical/tas'}, anom_period=[1951, 1980])
job.load_clim(tag='obs', path_dict={'tas': 'gistemp1200_GHCNv4_ERSSTv5'}, anom_period=[1951, 1980], rename_dict={'tas': 'tempanomaly'})
# proxy system modeling
job.calib_psms(
ptype_psm_dict={'coral.d18O': 'Linear', 'coral.calc': 'Linear', 'coral.SrCa': 'Linear'},
ptype_season_dict={'coral.d18O': [12, 1, 2], 'coral.calc': [12, 1, 2], 'coral.SrCa': [12, 1, 2]},
calib_period=[1850, 2015],
)
job.forward_psms()
# model prior: processing
job.annualize_clim(tag='prior', months=[12, 1, 2])
job.regrid_clim(tag='prior', nlat=42, nlon=63)
job.crop_clim(tag='prior', lat_min=-35, lat_max=35)
Annualizing ProxyDatabase: 100%|██████████| 29/29 [00:00<00:00, 40.82it/s]
>>> The target file seems existed at: ./data/tas_sfc_Amon_iCESM_past1000historical_085001-200512.nc . Loading from it instead of downloading ...
>>> The target file seems existed at: ./data/gistemp1200_GHCNv4_ERSSTv5.nc.gz . Loading from it instead of downloading ...
Calibrating the PSMs: 48%|████▊ | 14/29 [00:00<00:00, 39.09it/s]
The number of overlapped data points is 22 < 25. Skipping ...
The number of overlapped data points is 24 < 25. Skipping ...
The number of overlapped data points is 22 < 25. Skipping ...
Calibrating the PSMs: 100%|██████████| 29/29 [00:00<00:00, 39.85it/s]
The number of overlapped data points is 21 < 25. Skipping ...
Forwarding the PSMs: 100%|██████████| 25/25 [00:03<00:00, 6.68it/s]
[4]:
# paleo data assimilation
job.run_da_mc(save_dirpath='./recons/test-run-da-cfg', recon_seeds=list(range(1, 2)))
KF updating: 100%|██████████| 2001/2001 [00:02<00:00, 821.45it/s]
>>> DONE! Total time spent: 0.12 mins.
[5]:
job.save_cfg('./recons/test-run-da-cfg')
Testing running the reconstruction job based on the generated configs.yml
file#
[6]:
job_cfg = cfr.ReconJob()
job_cfg.run_da_cfg('./recons/test-run-da-cfg/configs.yml', run_mc=True, verbose=True)
>>> job.configs loaded
{'allownan': False,
'annualize_proxydb_months': [12, 1, 2],
'annualize_proxydb_ptypes': ['coral'],
'assim_frac': 0.75,
'compress_params': {'zlib': True},
'filter_proxydb_args': [],
'filter_proxydb_kwargs': {'by': 'ptype', 'keys': ['coral.SrCa']},
'nens': 100,
'obs_anom_period': [1951, 1980],
'obs_lat_name': 'lat',
'obs_lon_name': 'lon',
'obs_path': {'tas': 'gistemp1200_GHCNv4_ERSSTv5'},
'obs_rename_dict': {'tas': 'tempanomaly'},
'obs_time_name': 'time',
'output_full_ens': False,
'output_indices': ['gm', 'nhm', 'shm', 'nino3.4'],
'prior_annualize_months': [12, 1, 2],
'prior_anom_period': [1951, 1980],
'prior_lat_max': 35,
'prior_lat_min': -35,
'prior_lat_name': 'lat',
'prior_lon_max': 360,
'prior_lon_min': 0,
'prior_lon_name': 'lon',
'prior_path': {'tas': 'iCESM_past1000historical/tas'},
'prior_regrid_nlat': 42,
'prior_regrid_nlon': 63,
'prior_time_name': 'time',
'proxy_assim_frac': 0.75,
'proxydb_path': 'PAGES2kv2',
'psm_calib_period': [1850, 2015],
'ptype_psm_dict': {'coral.SrCa': 'Linear',
'coral.calc': 'Linear',
'coral.d18O': 'Linear'},
'ptype_season_dict': {'coral.SrCa': [12, 1, 2],
'coral.calc': [12, 1, 2],
'coral.d18O': [12, 1, 2]},
'recon_loc_rad': 25000,
'recon_period': [0, 2000],
'recon_sampling_mode': 'fixed',
'recon_seeds': [1],
'recon_timescale': 1,
'recon_vars': ['tas'],
'save_dirpath': './recons/test-run-da-cfg',
'trim_prior': True}
>>> job.configs["proxydb_path"] = PAGES2kv2
>>> 692 records loaded
>>> job.proxydb created
>>> job.configs["filter_proxydb_args"] = []
>>> job.configs["filter_proxydb_kwargs"] = {'by': 'ptype', 'keys': ['coral.SrCa']}
>>> 29 records remaining
>>> job.proxydb updated
Annualizing ProxyDatabase: 100%|██████████| 29/29 [00:00<00:00, 42.31it/s]
>>> job.configs["prior_path"] = {'tas': 'iCESM_past1000historical/tas'}
>>> job.configs["prior_anom_period"] = [1951, 1980]
>>> The target file seems existed at: ./data/tas_sfc_Amon_iCESM_past1000historical_085001-200512.nc . Loading from it instead of downloading ...
>>> prior variables ['tas'] loaded
>>> job.prior created
>>> job.configs["obs_path"] = {'tas': 'gistemp1200_GHCNv4_ERSSTv5'}
>>> job.configs["obs_rename_dict"] = {'tas': 'tempanomaly'}
>>> job.configs["obs_anom_period"] = [1951, 1980]
>>> The target file seems existed at: ./data/gistemp1200_GHCNv4_ERSSTv5.nc.gz . Loading from it instead of downloading ...
>>> obs variables ['tas'] loaded
>>> job.obs created
>>> job.configs["ptype_psm_dict"] = {'coral.SrCa': 'Linear', 'coral.calc': 'Linear', 'coral.d18O': 'Linear'}
>>> job.configs["ptype_season_dict"] = {'coral.SrCa': [12, 1, 2], 'coral.calc': [12, 1, 2], 'coral.d18O': [12, 1, 2]}
Calibrating the PSMs: 17%|█▋ | 5/29 [00:00<00:00, 41.24it/s]
The number of overlapped data points is 22 < 25. Skipping ...
The number of overlapped data points is 24 < 25. Skipping ...
Calibrating the PSMs: 34%|███▍ | 10/29 [00:00<00:00, 41.48it/s]
The number of overlapped data points is 22 < 25. Skipping ...
Calibrating the PSMs: 69%|██████▉ | 20/29 [00:00<00:00, 40.86it/s]
The number of overlapped data points is 21 < 25. Skipping ...
Calibrating the PSMs: 100%|██████████| 29/29 [00:00<00:00, 40.97it/s]
>>> PSM for Ocn_150 failed to be calibrated.
>>> PSM for Ocn_152 failed to be calibrated.
>>> PSM for Ocn_165 failed to be calibrated.
>>> PSM for Ocn_183 failed to be calibrated.
>>> 25 records tagged "calibrated" with ProxyRecord.psm created
Forwarding the PSMs: 100%|██████████| 25/25 [00:03<00:00, 6.51it/s]
>>> ProxyRecord.pseudo created for 25 records
>>> job.configs["prior_annualize_months"] = [12, 1, 2]
>>> Processing tas ...
>>> job.prior updated
>>> job.configs["prior_regrid_nlat"] = 42
>>> job.configs["prior_regrid_nlon"] = 63
>>> Processing tas ...
>>> job.configs["prior_lat_min"] = -35
>>> job.configs["prior_lat_max"] = 35
>>> job.configs["prior_lon_min"] = 0
>>> job.configs["prior_lon_max"] = 360
>>> Processing tas ...
>>> job.configs["save_dirpath"] = ./recons/test-run-da-cfg
>>> job.configs saved to: ./recons/test-run-da-cfg/configs.yml
>>> DONE! Total time used: 0.35 mins.
>>> job.configs["recon_period"] = [0, 2000]
>>> job.configs["recon_loc_rad"] = 25000
>>> job.configs["recon_timescale"] = 1
>>> job.configs["assim_frac"] = 0.75
>>> job.configs["compress_params"] = {'zlib': True}
>>> job.configs["output_full_ens"] = False
>>> seed: 1 | max: 1
KF updating: 100%|██████████| 2001/2001 [00:02<00:00, 743.13it/s]
>>> Reconstructed fields saved to: ./recons/test-run-da-cfg/job_r01_recon.nc
>>> DONE! Total time spent: 0.13 mins.
Leveraging the CLI#
[7]:
!cfr -h
usage: cfr [-h] [-v] {da,graphem} ...
========================================================================================
cfr: a scripting system for CFR (Feng Zhu, fengzhu@ucar.edu)
----------------------------------------------------------------------------------------
Usage example for DA:
cfr da -c config.yml -vb -s 1 2 -r
# -c config.yml: run the reconstruction job according to config.yml
# -vb: output the verbose runtime information
# -s 1 2: set seeds as integers from 1 to 2
# -r: run the Monte-Carlo iterations for PDA
Usage example for GraphEM:
cfr graphem -c config.yml -vb
# -c config.yml: run the reconstruction job according to config.yml
# -vb: output the verbose runtime information
========================================================================================
positional arguments:
{da,graphem} running mode
da run a DA-based reconstruction
graphem run a GraphEM-based reconstruction
optional arguments:
-h, --help show this help message and exit
-v, --version show program's version number and exit
[9]:
!cfr da -c ./recons/test-run-da-cfg/configs.yml -vb -s 1 2 -r
>>> Settings seeds: [1, 2]
>>> job.configs loaded
{'allownan': False,
'annualize_proxydb_months': [12, 1, 2],
'annualize_proxydb_ptypes': ['coral'],
'assim_frac': 0.75,
'compress_params': {'zlib': True},
'filter_proxydb_args': [],
'filter_proxydb_kwargs': {'by': 'ptype', 'keys': ['coral.SrCa']},
'nens': 100,
'obs_anom_period': [1951, 1980],
'obs_lat_name': 'lat',
'obs_lon_name': 'lon',
'obs_path': {'tas': 'gistemp1200_GHCNv4_ERSSTv5'},
'obs_rename_dict': {'tas': 'tempanomaly'},
'obs_time_name': 'time',
'output_full_ens': False,
'output_indices': ['gm', 'nhm', 'shm', 'nino3.4'],
'prior_annualize_months': [12, 1, 2],
'prior_anom_period': [1951, 1980],
'prior_lat_max': 35,
'prior_lat_min': -35,
'prior_lat_name': 'lat',
'prior_lon_max': 360,
'prior_lon_min': 0,
'prior_lon_name': 'lon',
'prior_path': {'tas': 'iCESM_past1000historical/tas'},
'prior_regrid_nlat': 42,
'prior_regrid_nlon': 63,
'prior_time_name': 'time',
'proxy_assim_frac': 0.75,
'proxydb_path': 'PAGES2kv2',
'psm_calib_period': [1850, 2015],
'ptype_psm_dict': {'coral.SrCa': 'Linear',
'coral.calc': 'Linear',
'coral.d18O': 'Linear'},
'ptype_season_dict': {'coral.SrCa': [12, 1, 2],
'coral.calc': [12, 1, 2],
'coral.d18O': [12, 1, 2]},
'recon_loc_rad': 25000,
'recon_period': [0, 2000],
'recon_sampling_mode': 'fixed',
'recon_seeds': [1, 2],
'recon_timescale': 1,
'recon_vars': ['tas'],
'save_dirpath': './recons/test-run-da-cfg',
'trim_prior': True}
>>> job.configs["proxydb_path"] = PAGES2kv2
>>> 692 records loaded
>>> job.proxydb created
>>> job.configs["filter_proxydb_args"] = []
>>> job.configs["filter_proxydb_kwargs"] = {'by': 'ptype', 'keys': ['coral.SrCa']}
>>> 29 records remaining
>>> job.proxydb updated
Annualizing ProxyDatabase: 100%|████████████████| 29/29 [00:00<00:00, 47.82it/s]
>>> job.configs["prior_path"] = {'tas': 'iCESM_past1000historical/tas'}
>>> job.configs["prior_anom_period"] = [1951, 1980]
>>> The target file seems existed at: ./data/tas_sfc_Amon_iCESM_past1000historical_085001-200512.nc . Loading from it instead of downloading ...
>>> prior variables ['tas'] loaded
>>> job.prior created
>>> job.configs["obs_path"] = {'tas': 'gistemp1200_GHCNv4_ERSSTv5'}
>>> job.configs["obs_rename_dict"] = {'tas': 'tempanomaly'}
>>> job.configs["obs_anom_period"] = [1951, 1980]
>>> The target file seems existed at: ./data/gistemp1200_GHCNv4_ERSSTv5.nc.gz . Loading from it instead of downloading ...
>>> obs variables ['tas'] loaded
>>> job.obs created
>>> job.configs["ptype_psm_dict"] = {'coral.SrCa': 'Linear', 'coral.calc': 'Linear', 'coral.d18O': 'Linear'}
>>> job.configs["ptype_season_dict"] = {'coral.SrCa': [12, 1, 2], 'coral.calc': [12, 1, 2], 'coral.d18O': [12, 1, 2]}
Calibrating the PSMs: 10%|██▎ | 3/29 [00:00<00:00, 27.55it/s]The number of overlapped data points is 22 < 25. Skipping ...
The number of overlapped data points is 24 < 25. Skipping ...
Calibrating the PSMs: 45%|█████████▍ | 13/29 [00:00<00:00, 43.49it/s]The number of overlapped data points is 22 < 25. Skipping ...
Calibrating the PSMs: 62%|█████████████ | 18/29 [00:00<00:00, 45.23it/s]The number of overlapped data points is 21 < 25. Skipping ...
Calibrating the PSMs: 100%|█████████████████████| 29/29 [00:00<00:00, 44.81it/s]
>>> PSM for Ocn_150 failed to be calibrated.
>>> PSM for Ocn_152 failed to be calibrated.
>>> PSM for Ocn_165 failed to be calibrated.
>>> PSM for Ocn_183 failed to be calibrated.
>>> 25 records tagged "calibrated" with ProxyRecord.psm created
Forwarding the PSMs: 100%|██████████████████████| 25/25 [00:03<00:00, 8.01it/s]
>>> ProxyRecord.pseudo created for 25 records
>>> job.configs["prior_annualize_months"] = [12, 1, 2]
>>> Processing tas ...
>>> job.prior updated
>>> job.configs["prior_regrid_nlat"] = 42
>>> job.configs["prior_regrid_nlon"] = 63
>>> Processing tas ...
>>> job.configs["prior_lat_min"] = -35
>>> job.configs["prior_lat_max"] = 35
>>> job.configs["prior_lon_min"] = 0
>>> job.configs["prior_lon_max"] = 360
>>> Processing tas ...
>>> job.configs["save_dirpath"] = ./recons/test-run-da-cfg
>>> job.configs saved to: ./recons/test-run-da-cfg/configs.yml
>>> DONE! Total time used: 0.30 mins.
>>> job.configs["recon_period"] = [0, 2000]
>>> job.configs["recon_loc_rad"] = 25000
>>> job.configs["recon_timescale"] = 1
>>> job.configs["recon_seeds"] = [1, 2]
>>> job.configs["assim_frac"] = 0.75
>>> job.configs["compress_params"] = {'zlib': True}
>>> job.configs["output_full_ens"] = False
>>> seed: 1 | max: 2
KF updating: 100%|█████████████████████████| 2001/2001 [00:02<00:00, 756.88it/s]
>>> Reconstructed fields saved to: ./recons/test-run-da-cfg/job_r01_recon.nc
>>> seed: 2 | max: 2
KF updating: 100%|█████████████████████████| 2001/2001 [00:03<00:00, 579.64it/s]
>>> Reconstructed fields saved to: ./recons/test-run-da-cfg/job_r02_recon.nc
>>> DONE! Total time spent: 0.27 mins.
[ ]: