Commit 8554fbba authored by Luijkx, Ingrid's avatar Luijkx, Ingrid
Browse files

moved old-structure-folders to archive folder

parent 730d04d3
"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters.
Users are recommended to contact the developers (wouter.peters@wur.nl) to receive
updates of the code. See also: http://www.carbontracker.eu.
This program is free software: you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free Software Foundation,
version 3. This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this
program. If not, see <http://www.gnu.org/licenses/>."""
#!/usr/bin/env python
# control.py
"""
Author : peters
Revision History:
File created on 26 Aug 2010.
"""
import logging
################### Begin Class CtDaSystem ###################
from da.baseclasses.dasystem import DaSystem
class CO2GriddedDaSystem(DaSystem):
""" Information on the data assimilation system used. This is normally an rc-file with settings.
"""
def __init__(self, rcfilename):
"""
Initialization occurs from passed rc-file name, items in the rc-file will be added
to the dictionary
"""
self.ID = 'CarbonTracker Gridded CO2' # the identifier gives the platform name
self.load_rc(rcfilename)
logging.debug('Data Assimilation System initialized: %s' % self.ID)
def validate(self):
"""
validate the contents of the rc-file given a dictionary of required keys
"""
needed_rc_items = ['obs.input.dir',
'obs.input.fname',
'ocn.covariance',
'nparameters',
'deltaco2.prefix',
'regtype']
for k, v in list(self.items()):
if v == 'True' : self[k] = True
if v == 'False': self[k] = False
for key in needed_rc_items:
if key not in self:
msg = 'Missing a required value in rc-file : %s' % key
logging.error(msg)
raise IOError(msg)
logging.debug('DA System Info settings have been validated succesfully')
################### End Class CtDaSystem ###################
if __name__ == "__main__":
pass
#!/usr/bin/env python
# control.py
"""
Author : peters
Revision History:
File created on 26 Aug 2010.
Adapted by super004 on 26 Jan 2017.
"""
import logging
################### Begin Class CO2DaSystem ###################
from da.baseclasses.dasystem import DaSystem
class CO2DaSystem(DaSystem):
""" Information on the data assimilation system used. This is normally an rc-file with settings.
"""
def validate(self):
"""
validate the contents of the rc-file given a dictionary of required keys
"""
needed_rc_items = ['obs.input.id',
'obs.input.nr',
'obs.spec.nr',
'obs.cat.nr',
'nparameters',
'random.seed',
'emis.pparam',
'ff.covariance',
'obs.bgswitch',
'obs.background',
'emis.input.spatial',
'emis.input.tempobs',
'emis.input.tempprior',
'emis.paramfile',
'emis.paramfile2',
'run.emisflag',
'run.emisflagens',
'run.obsflag']
for k, v in self.items():
if v == 'True' :
self[k] = True
if v == 'False':
self[k] = False
for key in needed_rc_items:
if key not in self:
logging.warning('Missing a required value in rc-file : %s' % key)
logging.debug('DA System Info settings have been validated succesfully')
################### End Class CO2DaSystem ###################
if __name__ == "__main__":
pass
#!/usr/bin/env python
# stilt_tools.py
"""
Author : I. Super
Revision History:
Newly developed code, September 2017
This module holds an emission model that prepares emission files used by the observation operator and
to create pseudo-data
"""
import shutil
import os
import logging
import datetime as dtm
import numpy as np
from numpy import array, logical_and
import da.tools.io4 as io
import math
import da.tools.rc as rc
from da.tools.general import create_dirs, to_datetime
import netCDF4 as nc
identifier = 'EmissionModel ensemble '
version = '1.0'
# Improve: should not be a python file? Defenitely not be stored in this folder!
from da.ffdas import energy_use_country
energy_use_per_country = energy_use_country.energy_use_per_country
from da.ffdas import category_info
categories = category_info.categories
################### Begin Class Emission model ###################
class EmisModel(object):
def __init__(self, dacycle=None):
if dacycle != None:
self.dacycle = dacycle
else:
self.dacycle = {}
def setup(self, dacycle):
self.dacycle = dacycle
self.startdate = self.dacycle['time.fxstart']
self.enddate = self.dacycle['time.finish']
self.emisdir = dacycle.dasystem['datadir']
self.proxyfile = dacycle.dasystem['emis.input.spatial']
self.tempfileo = dacycle.dasystem['emis.input.tempobs']
self.tempfilep = dacycle.dasystem['emis.input.tempprior']
self.btime = int(dacycle.dasystem['run.backtime'])
self.obsfile = dacycle.dasystem['obs.input.id']
self.nrspc = int(dacycle.dasystem['obs.spec.nr'])
self.species = dacycle.dasystem['obs.spec.name'].split(',')
self.nrcat = int(dacycle.dasystem['obs.cat.nr'])
self.nparams = int(dacycle.dasystem['nparameters'])
self.nmembers = int(dacycle['da.optimizer.nmembers'])
self.pparam = dacycle.dasystem['emis.pparam']
self.paramfile = dacycle.dasystem['emis.paramfiles']
self.countries = [country.strip() for country in dacycle.dasystem['countries'].split(';')]
areafile = dacycle.dasystem['area.file']
self.area = nc.Dataset(areafile)['Area'][:]
self.stations2country = rc.read('/home/awoude/ffdas/RINGO/Data/station_countries.rc')
self.energy_use_per_country = energy_use_per_country
self.categories = categories
self.paramdict = rc.read('/home/awoude/ffdas/RINGO/Data/paramdict.rc')
def find_in_state(self, station, cat, name):
"""Function that finds the index in the state vector"""
key = station + '.' + cat + '.' + name
if key in self.paramdict:
# logging.debug('found that {} should be optimised'.format(key))
i_in_state = int(self.paramdict[key])
value = self.prm[i_in_state]
return value
else: return 1
def get_emis(self, dacycle, samples, do_pseudo):
"""set up emission information for pseudo-obs (do_pseudo=1) and ensemble runs (do_pseudo=0)"""
if do_pseudo==1:
priorparam=os.path.join(self.emisdir,self.pparam)
f = io.ct_read(priorparam, 'read')
self.prm = f.get_variable('prior_values')[:self.nparams]
f.close()
self.get_spatial(dacycle, 999, samples, infile=os.path.join(self.emisdir, self.paramfile))
self.get_temporal(dacycle, 999, samples, do_pseudo=1)
elif do_pseudo==0:
self.timestartkey = self.dacycle['time.sample.start']
self.timefinishkey = self.dacycle['time.sample.end']
for member in range(self.nmembers):
#first remove old files, then create new emission files per ensemble member
if self.startdate == self.timestartkey:
file = os.path.join(dacycle.dasystem['datadir'],'temporal_data_%03d.nc'%member)
try:
os.remove(file)
except OSError:
pass
prmfile=os.path.join(dacycle['dir.input'],'parameters.%03d.nc'%member)
f = io.ct_read(prmfile, 'read')
self.prm = f.get_variable('parametervalues')
f.close()
self.get_yearly_emissions(samples)
self.get_spatial(dacycle, member, samples, infile=os.path.join(self.emisdir, self.paramfile))
self.get_temporal(dacycle, member, samples, do_pseudo=0)
def get_yearly_emissions(self, samples):
codes = samples.getvalues('code')
stationnames = []
for code in codes:
two_names = any(x in code for x in ['UW', 'DW'])
stationnames.append('_'.join(code.split('_')[1:2 + two_names]))
stationnames = list(set(stationnames))
yremis = np.zeros((len(stationnames), len(self.categories), len(self.countries), len(self.species)))
for i_station, station in enumerate(stationnames):
for i_country, country in enumerate(self.countries):
for i_cat, (cat, values) in enumerate(self.categories.items()):
emission_factor = values['emission_factors']
emission_factor *= self.find_in_state(station, cat, 'emission_factors')
fraction_of_total = values['fraction_of_total']
fraction_of_total *= self.find_in_state(station, cat, 'fraction_of_total')
e_use = self.energy_use_per_country[country][values['spatial']]
for i_species, specie in enumerate(self.species):
emission_ratio = values[specie]
uncertainty = values[specie+'.uncertainty']
if uncertainty == 'l':
emission_ratio = np.exp(emission_ratio)
if emission_ratio > 1:
logging.debug('{} {} {} {}'.format(station, cat, specie, emission_ratio))
emission_ratio *= self.find_in_state(station, cat, specie)
emission = e_use * fraction_of_total * emission_factor * emission_ratio
yremis[i_station, i_cat, i_country, i_species] = emission
self.yremis = yremis
def get_spatial(self, dacycle, member, samples, infile=None):
"""read in proxy data used for spatial distribution of the gridded emissions, disaggregate yearly totals for the area"""
codes = samples.getvalues('code')
stationnames = []
for code in codes:
two_names = any(x in code for x in ['UW', 'DW'])
stationnames.append('_'.join(code.split('_')[1:2 + two_names]))
stationnames = list(set(stationnames))
# Create a recalculation factor from kg/km2/yr to umol/m2/sec
M_mass = [44e-9, 28e-9]
sec_year = 24*366*3600. #seconds in a year (leapyear)
kgperkmperyr2umolperm2pers = np.array(M_mass)[:, None, None] * sec_year * self.area[None, :, :]
self.kgperkmperyr2umolperm2pers = kgperkmperyr2umolperm2pers
#read in proxy data for spatial disaggregation
infile = os.path.join(self.emisdir, self.proxyfile)
proxy = io.ct_read(infile, method='read')
proxy_category_names = proxy['emis_category_names'][:]
proxy_category_names = [b''.join(category_name).strip().decode() for category_name in proxy_category_names]
proxy_country_names = proxy['country_names'][:]
proxy_country_names = [b''.join(country_name).strip().decode() for country_name in proxy_country_names]
for i_station, stationname in enumerate(stationnames):
spatial_distributions = np.zeros((self.nrcat, len(self.countries), self.area.shape[0], self.area.shape[1]))
for country in self.countries:
country_index = self.countries.index(country)
# Create the spatial distributions
# Loop over all categories
for i, category in enumerate(self.categories):
spatial_name = self.categories[category]['spatial']
cat_index = proxy_category_names.index(spatial_name)
# Get the emission distribution for the category
category_distribution_country = proxy['proxy_maps'][cat_index, country_index, :, :]
spatial_distributions[i, country_index, :, :] = category_distribution_country
# Multiply spatial distributions with the yearly emissions in the country to get spatially distributed emissions
spatial_emissions = spatial_distributions[:, :, None, :, :] * self.yremis[i_station, :, :, :, None, None] # cat, country, species, lat, lon
# Sum over the countries to overlay them.
spatial_emissions = spatial_emissions.sum(axis=1) # Indices: category, species, lat, lon
spatial_emissions = np.swapaxes(spatial_emissions, 0,1) # Indices: species, category, lat, lon
# Recalculate spatial emissions to umol/sec/m2
spatial_emissions = spatial_emissions / kgperkmperyr2umolperm2pers[:, None, :, :]
self.spatial_emissions = spatial_emissions
## create output file
prior_file = os.path.join(self.emisdir, 'prior_spatial_{0}_{1:03d}.nc'.format(stationname, member))
f = io.CT_CDF(prior_file, method='create')
dimid = f.add_dim('ncat', self.nrcat)
dimid2 = f.add_dim('ops',2 )
dimlat = f.add_dim('lat', self.area.shape[0])
dimlon = f.add_dim('lon', self.area.shape[1])
#loop over all tracers
for i, species in enumerate(self.species):
savedict = io.std_savedict.copy()
savedict['name'] = species
savedict['long_name'] = "Spatially distributed emissions"
savedict['units'] = "micromole/m2/s"
savedict['dims'] = dimid + dimlat + dimlon
savedict['values'] = spatial_emissions[i,:,:,:]
f.add_data(savedict)
f.close()
logging.debug("Successfully wrote data to prior spatial distribution file (%s)" % prior_file)
def get_temporal(self, dacycle, member, samples, do_pseudo):
"""read in time profiles used for temporal distribution of the emissions"""
# First, get the station names from the smaples. For these stations, the time profiles will be written.
codes = samples.getvalues('code')
self.codes = codes
stationnames = []
for code in codes:
two_names = any(x in code for x in ['UW', 'DW'])
stationnames.append('_'.join(code.split('_')[1:2 + two_names]))
stationnames = list(set(stationnames))
# For pseudo-observation (do_pseudo==1) or when no time profiles need to be optimized the profiles are simply read from the
# input file and copied to another file. Otherwise create a new file per ensemble member at t=0 and update the profiles for each time step
# Check if the ensemble file exists. Otherwise create.
ensfile = os.path.join(self.emisdir, 'temporal_data_%03d.nc'%member)
if not os.path.exists(ensfile):
dumfile = os.path.join(self.emisdir, self.tempfilep)
shutil.copy2(dumfile,ensfile)
time_profiles_ds = nc.Dataset(ensfile)
times = time_profiles_ds['Times'][:]
times = np.array([dtm.datetime.strptime(time, '%Y-%m-%d %H:%M:%S') for time in np.array(times)])
self.times = times
subselect = logical_and(times >= self.timestartkey , times < self.timefinishkey).nonzero()[0]
date_selection = times.take(subselect, axis=0)
# The time profiles should always cover at least one full year
start_date = dtm.datetime(self.timestartkey.year,1,1,0,0) #first time included
end_date = dtm.datetime(self.timestartkey.year,12,31,23,0) #last time included
dt = dtm.timedelta(0,3600)
starttime_index = np.where(times==self.timestartkey)[0][0]
startdate_index = np.where(times==self.startdate)[0][0]
end_index = np.where(times==self.timefinishkey)[0][0]
""" Time profiles should, for a full year, always have an average value of 1.0. Therefore, a new method has been developed
to optimize time profiles such that we comply with this and the time profiles do not affect the total emissions.
For this purpose we apply the scaling factor (statevector) to the period covered in this cycle. The time profile for all dates
outside this period are scaled equally such that the time profile remains its average value of 1.0. Except previously updated
dates (from previous cycles) are maintained (they are already optimized!)."""
unselected_day = np.where((times<self.startdate) | (times>self.timefinishkey))[0]
category_names = list(time_profiles_ds['category_name'][:])
self.category_names = category_names
station_names_ds = list(time_profiles_ds['station_names'][:])
profiles = np.zeros(time_profiles_ds['time_profile'][:].shape)
for category, values in self.categories.items():
cat_index = category_names.index(values['temporal'])
for station in stationnames:
paramvalue = self.find_in_state(station, category, values['temporal'])
if paramvalue != 1:
station_index = station_names_ds.index(station)
original_profile = time_profiles_ds['time_profile'][station_index, cat_index, :]
selected_profile = time_profiles_ds['time_profile'][station_index, cat_index, :].take(subselect)
new_profile = selected_profile[:] * paramvalue
daily_sum = np.array(original_profile[unselected_day]).sum()
original_profile[:startdate_index] = original_profile[:startdate_index] - (original_profile[:startdate_index] / daily_sum) * (new_profile.sum() - selected_profile.sum())
original_profile[end_index:] = original_profile[end_index:] - (original_profile[end_index:] / daily_sum) * (new_profile.sum() - selected_profile.sum())
original_profile[starttime_index:end_index] = new_profile
profiles[station_index, cat_index, :] = original_profile
time_profiles_ds.close()
# Now, write the output
tofile = nc.Dataset(ensfile, 'r+')
for category, values in self.categories.items():
cat_index = category_names.index(values['temporal'])
for station in stationnames:
if self.find_in_state(station, category, 'time_profile') != 1:
station_index = station_names_ds.index(station)
tofile['time_profile'][station_index, cat_index, :] = profiles[station_index, cat_index, :]
tofile.close()
# Now read in the correct profiles, select the correct time period and write the profiles into one file per ensemble member
time_profiles_ds = nc.Dataset(ensfile)
subselect = logical_and(times >= times[0] , times <= times[-1]).nonzero()[0]
date_selection = times.take(subselect, axis=0)
for station in stationnames:
station_index = station_names_ds.index(station)
prior_file = os.path.join(self.emisdir, 'prior_temporal_{0}_{1:03}.nc'.format(station, member))
f = io.CT_CDF(prior_file, method='create')
dimtime = f.add_dim('Times', len(date_selection))
cat_names_done = []
for category, values in self.categories.items():
cat_name = values['temporal']
cat_index = category_names.index(cat_name)
if not cat_name in cat_names_done:
profile = np.array(time_profiles_ds['time_profile'][station_index, cat_index, :].take(subselect))
savedict = io.std_savedict.copy()
savedict['name'] = cat_name
savedict['long_name'] = "Temporal distribution"
savedict['units'] = ""
savedict['dims'] = dimtime
savedict['values'] = profile
f.add_data(savedict)
cat_names_done.append(cat_name)
f.close()
logging.debug("Successfully wrote data to prior temporal distribution file (%s)" % prior_file)
################### End Class Emission model ###################
if __name__ == "__main__":
pass
This diff is collapsed.
This diff is collapsed.
#!/usr/bin/env python
# optimizer.py
"""
.. module:: optimizer
.. moduleauthor:: Wouter Peters
Revision History:
File created on 28 Jul 2010.
"""
import os
import logging
import numpy as np
import numpy.linalg as la
import da.tools.io4 as io
identifier = 'Optimizer CO2'
version = '0.0'
from da.baseclasses.optimizer import Optimizer
################### Begin Class CO2Optimizer ###################
class CO2Optimizer(Optimizer):
"""
This creates an instance of an optimization object. It handles the minimum least squares optimization
of the state vector given a set of sample objects. Two routines will be implemented: one where the optimization
is sequential and one where it is the equivalent matrix solution. The choice can be made based on considerations of speed
and efficiency.
"""
def setup(self, dims):
self.nlag = dims[0]
self.nmembers = dims[1]
self.nparams = dims[2]
self.nobs = dims[3]
self.nrloc = dims[4]
self.nrspc = dims[5]
self.inputdir = dims[6]
#self.specfile = dims[7]
self.create_matrices()
def set_localization(self, loctype='None'):
""" determine which localization to use """
if loctype == 'CT2007':
self.localization = True
self.localizetype = 'CT2007'
#T-test values for two-tailed student's T-test using 95% confidence interval for some options of nmembers
if self.nmembers == 50:
self.tvalue = 2.0086
elif self.nmembers == 100:
self.tvalue = 1.9840
elif self.nmembers == 150:
self.tvalue = 1.97591
elif self.nmembers == 200:
self.tvalue = 1.9719
else: self.tvalue = 0
elif loctype == 'multitracer':
self.localization = True
self.localizetype = 'multitracer'
elif loctype == 'multitracer2':
self.localization = True
self.localizetype = 'multitracer2'
else:
self.localization = False
self.localizetype = 'None'
logging.info("Current localization option is set to %s" % self.localizetype)
def localize(self, n):
""" localize the Kalman Gain matrix """
import numpy as np
if not self.localization:
logging.debug('Not localized observation %i' % self.obs_ids[n])
return
if self.localizetype == 'CT2007':
count_localized = 0
for r in range(self.nlag * self.nparams):
corr = np.corrcoef(self.HX_prime[n, :], self.X_prime[r, :].squeeze())[0, 1]
prob = corr / np.sqrt((1.000000001 - corr ** 2) / (self.nmembers - 2))
if abs(prob) < self.tvalue:
self.KG[r] = 0.0
count_localized = count_localized + 1
logging.debug('Localized observation %i, %i%% of values set to 0' % (self.obs_ids[n],count_localized*100/(self.nlag * self.nparams)))
if self.localizetype == 'multitracer':
count_localized = 0
###read emission ratios for source type related to each parameter
infile = os.path.join(self.inputdir, self.specfile)
f = open(infile, 'r')
lines = f.readlines()
f.close()
speclist = []
speclist.append('CO2')
emr = []
for line in lines:
dum=line.split(",")
if dum[0]=='#' or dum[0]=='CO2' or dum[0]=='SNAP':
continue
else:
sp = dum[0]
emrs = []
for k in range(self.nparams):
emrs.append(float(dum[k+1]))
speclist.append(sp)
emr.append(emrs)
emr=np.array(emr)
###find obs and model value for this time step and species; calculate differences and ratios
lnd = self.nobs/(self.nrspc*self.nrloc)
for i in range(self.nrspc):
if self.species[n] == speclist[i]:
idx = [0-i,1-i,2-i,3-i]
Xobs = []
Xmod = []
Robs = []
Rmod = []
for i in range(self.nrspc):
Xobs.append(self.obs[n+lnd*idx[i]])
Xmod.append(self.Hx[n+lnd*idx[i]])
if i>0:
Robs.append(Xobs[i]/Xobs[0])
Rmod.append(Xmod[i]/Xmod[0])
Xobs = np.array(Xobs)
Xmod = np.array(Xmod)
Robs = np.array(Robs)