Running DA with the Command Line Interface (CLI)#

Generating the configs.yml file#

[1]:
%load_ext autoreload
%autoreload 2

import cfr
print(cfr.__version__)
[2]:
job = cfr.ReconJob()
job.load_proxydb('PAGES2kv2')
[3]:
job.filter_proxydb(by='ptype', keys=['coral.SrCa'])
job.annualize_proxydb(months=[12, 1, 2], ptypes=['coral'])

# model prior: fetching & preprocessing
job.load_clim(tag='prior', path_dict={'tas': 'iCESM_past1000historical/tas'}, anom_period=[1951, 1980])
job.load_clim(tag='obs', path_dict={'tas': 'gistemp1200_GHCNv4_ERSSTv5'}, anom_period=[1951, 1980], rename_dict={'tas': 'tempanomaly'})

# proxy system modeling
job.calib_psms(
    ptype_psm_dict={'coral.d18O': 'Linear', 'coral.calc': 'Linear', 'coral.SrCa': 'Linear'},
    ptype_season_dict={'coral.d18O': [12, 1, 2], 'coral.calc': [12, 1, 2], 'coral.SrCa': [12, 1, 2]},
    calib_period=[1850, 2015],
)
job.forward_psms()

# model prior: processing
job.annualize_clim(tag='prior', months=[12, 1, 2])
job.regrid_clim(tag='prior', nlat=42, nlon=63)
job.crop_clim(tag='prior', lat_min=-35, lat_max=35)

Annualizing ProxyDatabase: 100%|██████████| 29/29 [00:00<00:00, 40.82it/s]
>>> The target file seems existed at: ./data/tas_sfc_Amon_iCESM_past1000historical_085001-200512.nc . Loading from it instead of downloading ...
>>> The target file seems existed at: ./data/gistemp1200_GHCNv4_ERSSTv5.nc.gz . Loading from it instead of downloading ...

Calibrating the PSMs:  48%|████▊     | 14/29 [00:00<00:00, 39.09it/s]
The number of overlapped data points is 22 < 25. Skipping ...
The number of overlapped data points is 24 < 25. Skipping ...
The number of overlapped data points is 22 < 25. Skipping ...
Calibrating the PSMs: 100%|██████████| 29/29 [00:00<00:00, 39.85it/s]
The number of overlapped data points is 21 < 25. Skipping ...
Forwarding the PSMs: 100%|██████████| 25/25 [00:03<00:00,  6.68it/s]
[4]:
# paleo data assimilation
job.run_da_mc(save_dirpath='./recons/test-run-da-cfg', recon_seeds=list(range(1, 2)))
KF updating: 100%|██████████| 2001/2001 [00:02<00:00, 821.45it/s]
>>> DONE! Total time spent: 0.12 mins.

[5]:
job.save_cfg('./recons/test-run-da-cfg')

Testing running the reconstruction job based on the generated configs.yml file#

[6]:
job_cfg = cfr.ReconJob()
job_cfg.run_da_cfg('./recons/test-run-da-cfg/configs.yml', run_mc=True, verbose=True)
>>> job.configs loaded
{'allownan': False,
 'annualize_proxydb_months': [12, 1, 2],
 'annualize_proxydb_ptypes': ['coral'],
 'assim_frac': 0.75,
 'compress_params': {'zlib': True},
 'filter_proxydb_args': [],
 'filter_proxydb_kwargs': {'by': 'ptype', 'keys': ['coral.SrCa']},
 'nens': 100,
 'obs_anom_period': [1951, 1980],
 'obs_lat_name': 'lat',
 'obs_lon_name': 'lon',
 'obs_path': {'tas': 'gistemp1200_GHCNv4_ERSSTv5'},
 'obs_rename_dict': {'tas': 'tempanomaly'},
 'obs_time_name': 'time',
 'output_full_ens': False,
 'output_indices': ['gm', 'nhm', 'shm', 'nino3.4'],
 'prior_annualize_months': [12, 1, 2],
 'prior_anom_period': [1951, 1980],
 'prior_lat_max': 35,
 'prior_lat_min': -35,
 'prior_lat_name': 'lat',
 'prior_lon_max': 360,
 'prior_lon_min': 0,
 'prior_lon_name': 'lon',
 'prior_path': {'tas': 'iCESM_past1000historical/tas'},
 'prior_regrid_nlat': 42,
 'prior_regrid_nlon': 63,
 'prior_time_name': 'time',
 'proxy_assim_frac': 0.75,
 'proxydb_path': 'PAGES2kv2',
 'psm_calib_period': [1850, 2015],
 'ptype_psm_dict': {'coral.SrCa': 'Linear',
                    'coral.calc': 'Linear',
                    'coral.d18O': 'Linear'},
 'ptype_season_dict': {'coral.SrCa': [12, 1, 2],
                       'coral.calc': [12, 1, 2],
                       'coral.d18O': [12, 1, 2]},
 'recon_loc_rad': 25000,
 'recon_period': [0, 2000],
 'recon_sampling_mode': 'fixed',
 'recon_seeds': [1],
 'recon_timescale': 1,
 'recon_vars': ['tas'],
 'save_dirpath': './recons/test-run-da-cfg',
 'trim_prior': True}
>>> job.configs["proxydb_path"] = PAGES2kv2
>>> 692 records loaded
>>> job.proxydb created
>>> job.configs["filter_proxydb_args"] = []
>>> job.configs["filter_proxydb_kwargs"] = {'by': 'ptype', 'keys': ['coral.SrCa']}
>>> 29 records remaining
>>> job.proxydb updated

Annualizing ProxyDatabase: 100%|██████████| 29/29 [00:00<00:00, 42.31it/s]
>>> job.configs["prior_path"] = {'tas': 'iCESM_past1000historical/tas'}
>>> job.configs["prior_anom_period"] = [1951, 1980]
>>> The target file seems existed at: ./data/tas_sfc_Amon_iCESM_past1000historical_085001-200512.nc . Loading from it instead of downloading ...
>>> prior variables ['tas'] loaded
>>> job.prior created
>>> job.configs["obs_path"] = {'tas': 'gistemp1200_GHCNv4_ERSSTv5'}
>>> job.configs["obs_rename_dict"] = {'tas': 'tempanomaly'}
>>> job.configs["obs_anom_period"] = [1951, 1980]
>>> The target file seems existed at: ./data/gistemp1200_GHCNv4_ERSSTv5.nc.gz . Loading from it instead of downloading ...
>>> obs variables ['tas'] loaded
>>> job.obs created
>>> job.configs["ptype_psm_dict"] = {'coral.SrCa': 'Linear', 'coral.calc': 'Linear', 'coral.d18O': 'Linear'}
>>> job.configs["ptype_season_dict"] = {'coral.SrCa': [12, 1, 2], 'coral.calc': [12, 1, 2], 'coral.d18O': [12, 1, 2]}

Calibrating the PSMs:  17%|█▋        | 5/29 [00:00<00:00, 41.24it/s]
The number of overlapped data points is 22 < 25. Skipping ...
The number of overlapped data points is 24 < 25. Skipping ...
Calibrating the PSMs:  34%|███▍      | 10/29 [00:00<00:00, 41.48it/s]
The number of overlapped data points is 22 < 25. Skipping ...
Calibrating the PSMs:  69%|██████▉   | 20/29 [00:00<00:00, 40.86it/s]
The number of overlapped data points is 21 < 25. Skipping ...
Calibrating the PSMs: 100%|██████████| 29/29 [00:00<00:00, 40.97it/s]
>>> PSM for Ocn_150 failed to be calibrated.
>>> PSM for Ocn_152 failed to be calibrated.
>>> PSM for Ocn_165 failed to be calibrated.
>>> PSM for Ocn_183 failed to be calibrated.
>>> 25 records tagged "calibrated" with ProxyRecord.psm created

Forwarding the PSMs: 100%|██████████| 25/25 [00:03<00:00,  6.51it/s]
>>> ProxyRecord.pseudo created for 25 records
>>> job.configs["prior_annualize_months"] = [12, 1, 2]
>>> Processing tas ...
>>> job.prior updated
>>> job.configs["prior_regrid_nlat"] = 42
>>> job.configs["prior_regrid_nlon"] = 63
>>> Processing tas ...
>>> job.configs["prior_lat_min"] = -35
>>> job.configs["prior_lat_max"] = 35
>>> job.configs["prior_lon_min"] = 0
>>> job.configs["prior_lon_max"] = 360
>>> Processing tas ...
>>> job.configs["save_dirpath"] = ./recons/test-run-da-cfg
>>> job.configs saved to: ./recons/test-run-da-cfg/configs.yml
>>> DONE! Total time used: 0.35 mins.
>>> job.configs["recon_period"] = [0, 2000]
>>> job.configs["recon_loc_rad"] = 25000
>>> job.configs["recon_timescale"] = 1
>>> job.configs["assim_frac"] = 0.75
>>> job.configs["compress_params"] = {'zlib': True}
>>> job.configs["output_full_ens"] = False
>>> seed: 1 | max: 1

KF updating: 100%|██████████| 2001/2001 [00:02<00:00, 743.13it/s]
>>> Reconstructed fields saved to: ./recons/test-run-da-cfg/job_r01_recon.nc
>>> DONE! Total time spent: 0.13 mins.

Leveraging the CLI#

[7]:
!cfr -h
usage: cfr [-h] [-v] {da,graphem} ...

========================================================================================
 cfr: a scripting system for CFR (Feng Zhu, fengzhu@ucar.edu)
----------------------------------------------------------------------------------------
 Usage example for DA:
    cfr da -c config.yml -vb -s 1 2 -r
    # -c config.yml: run the reconstruction job according to config.yml
    # -vb: output the verbose runtime information
    # -s 1 2: set seeds as integers from 1 to 2
    # -r: run the Monte-Carlo iterations for PDA

 Usage example for GraphEM:
    cfr graphem -c config.yml -vb
    # -c config.yml: run the reconstruction job according to config.yml
    # -vb: output the verbose runtime information
========================================================================================


positional arguments:
  {da,graphem}   running mode
    da           run a DA-based reconstruction
    graphem      run a GraphEM-based reconstruction

optional arguments:
  -h, --help     show this help message and exit
  -v, --version  show program's version number and exit
[9]:
!cfr da -c ./recons/test-run-da-cfg/configs.yml -vb -s 1 2 -r
>>> Settings seeds: [1, 2]
>>> job.configs loaded
{'allownan': False,
 'annualize_proxydb_months': [12, 1, 2],
 'annualize_proxydb_ptypes': ['coral'],
 'assim_frac': 0.75,
 'compress_params': {'zlib': True},
 'filter_proxydb_args': [],
 'filter_proxydb_kwargs': {'by': 'ptype', 'keys': ['coral.SrCa']},
 'nens': 100,
 'obs_anom_period': [1951, 1980],
 'obs_lat_name': 'lat',
 'obs_lon_name': 'lon',
 'obs_path': {'tas': 'gistemp1200_GHCNv4_ERSSTv5'},
 'obs_rename_dict': {'tas': 'tempanomaly'},
 'obs_time_name': 'time',
 'output_full_ens': False,
 'output_indices': ['gm', 'nhm', 'shm', 'nino3.4'],
 'prior_annualize_months': [12, 1, 2],
 'prior_anom_period': [1951, 1980],
 'prior_lat_max': 35,
 'prior_lat_min': -35,
 'prior_lat_name': 'lat',
 'prior_lon_max': 360,
 'prior_lon_min': 0,
 'prior_lon_name': 'lon',
 'prior_path': {'tas': 'iCESM_past1000historical/tas'},
 'prior_regrid_nlat': 42,
 'prior_regrid_nlon': 63,
 'prior_time_name': 'time',
 'proxy_assim_frac': 0.75,
 'proxydb_path': 'PAGES2kv2',
 'psm_calib_period': [1850, 2015],
 'ptype_psm_dict': {'coral.SrCa': 'Linear',
                    'coral.calc': 'Linear',
                    'coral.d18O': 'Linear'},
 'ptype_season_dict': {'coral.SrCa': [12, 1, 2],
                       'coral.calc': [12, 1, 2],
                       'coral.d18O': [12, 1, 2]},
 'recon_loc_rad': 25000,
 'recon_period': [0, 2000],
 'recon_sampling_mode': 'fixed',
 'recon_seeds': [1, 2],
 'recon_timescale': 1,
 'recon_vars': ['tas'],
 'save_dirpath': './recons/test-run-da-cfg',
 'trim_prior': True}
>>> job.configs["proxydb_path"] = PAGES2kv2
>>> 692 records loaded
>>> job.proxydb created
>>> job.configs["filter_proxydb_args"] = []
>>> job.configs["filter_proxydb_kwargs"] = {'by': 'ptype', 'keys': ['coral.SrCa']}
>>> 29 records remaining
>>> job.proxydb updated
Annualizing ProxyDatabase: 100%|████████████████| 29/29 [00:00<00:00, 47.82it/s]
>>> job.configs["prior_path"] = {'tas': 'iCESM_past1000historical/tas'}
>>> job.configs["prior_anom_period"] = [1951, 1980]
>>> The target file seems existed at: ./data/tas_sfc_Amon_iCESM_past1000historical_085001-200512.nc . Loading from it instead of downloading ...
>>> prior variables ['tas'] loaded
>>> job.prior created
>>> job.configs["obs_path"] = {'tas': 'gistemp1200_GHCNv4_ERSSTv5'}
>>> job.configs["obs_rename_dict"] = {'tas': 'tempanomaly'}
>>> job.configs["obs_anom_period"] = [1951, 1980]
>>> The target file seems existed at: ./data/gistemp1200_GHCNv4_ERSSTv5.nc.gz . Loading from it instead of downloading ...
>>> obs variables ['tas'] loaded
>>> job.obs created
>>> job.configs["ptype_psm_dict"] = {'coral.SrCa': 'Linear', 'coral.calc': 'Linear', 'coral.d18O': 'Linear'}
>>> job.configs["ptype_season_dict"] = {'coral.SrCa': [12, 1, 2], 'coral.calc': [12, 1, 2], 'coral.d18O': [12, 1, 2]}
Calibrating the PSMs:  10%|██▎                   | 3/29 [00:00<00:00, 27.55it/s]The number of overlapped data points is 22 < 25. Skipping ...
The number of overlapped data points is 24 < 25. Skipping ...
Calibrating the PSMs:  45%|█████████▍           | 13/29 [00:00<00:00, 43.49it/s]The number of overlapped data points is 22 < 25. Skipping ...
Calibrating the PSMs:  62%|█████████████        | 18/29 [00:00<00:00, 45.23it/s]The number of overlapped data points is 21 < 25. Skipping ...
Calibrating the PSMs: 100%|█████████████████████| 29/29 [00:00<00:00, 44.81it/s]
>>> PSM for Ocn_150 failed to be calibrated.
>>> PSM for Ocn_152 failed to be calibrated.
>>> PSM for Ocn_165 failed to be calibrated.
>>> PSM for Ocn_183 failed to be calibrated.
>>> 25 records tagged "calibrated" with ProxyRecord.psm created
Forwarding the PSMs: 100%|██████████████████████| 25/25 [00:03<00:00,  8.01it/s]
>>> ProxyRecord.pseudo created for 25 records
>>> job.configs["prior_annualize_months"] = [12, 1, 2]
>>> Processing tas ...
>>> job.prior updated
>>> job.configs["prior_regrid_nlat"] = 42
>>> job.configs["prior_regrid_nlon"] = 63
>>> Processing tas ...
>>> job.configs["prior_lat_min"] = -35
>>> job.configs["prior_lat_max"] = 35
>>> job.configs["prior_lon_min"] = 0
>>> job.configs["prior_lon_max"] = 360
>>> Processing tas ...
>>> job.configs["save_dirpath"] = ./recons/test-run-da-cfg
>>> job.configs saved to: ./recons/test-run-da-cfg/configs.yml
>>> DONE! Total time used: 0.30 mins.
>>> job.configs["recon_period"] = [0, 2000]
>>> job.configs["recon_loc_rad"] = 25000
>>> job.configs["recon_timescale"] = 1
>>> job.configs["recon_seeds"] = [1, 2]
>>> job.configs["assim_frac"] = 0.75
>>> job.configs["compress_params"] = {'zlib': True}
>>> job.configs["output_full_ens"] = False
>>> seed: 1 | max: 2
KF updating: 100%|█████████████████████████| 2001/2001 [00:02<00:00, 756.88it/s]
>>> Reconstructed fields saved to: ./recons/test-run-da-cfg/job_r01_recon.nc
>>> seed: 2 | max: 2
KF updating: 100%|█████████████████████████| 2001/2001 [00:03<00:00, 579.64it/s]
>>> Reconstructed fields saved to: ./recons/test-run-da-cfg/job_r02_recon.nc
>>> DONE! Total time spent: 0.27 mins.

[ ]: