statevector.py 8.31 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. 
Users are recommended to contact the developers (wouter.peters@wur.nl) to receive
updates of the code. See also: http://www.carbontracker.eu. 

This program is free software: you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free Software Foundation, 
version 3. This program is distributed in the hope that it will be useful, but 
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 

You should have received a copy of the GNU General Public License along with this 
program. If not, see <http://www.gnu.org/licenses/>."""
13
14
15
16
17
18
19
20
21
22
23
24
25
#!/usr/bin/env python
# ct_statevector_tools.py

"""
Author : peters 

Revision History:
File created on 28 Jul 2010.

"""

import os
import sys
karolina's avatar
karolina committed
26
27
28
import logging
import numpy as np

29
sys.path.append(os.getcwd())
30
31
sys.path.append('../../')

karolina's avatar
karolina committed
32
import da.tools.io4 as io
karolina's avatar
karolina committed
33
from da.baseclasses.statevector import StateVector, EnsembleMember
34
identifier = 'CarbonTracker Gridded Statevector '
35
version = '0.0'
36
37
38

################### Begin Class CtStateVector ###################

39
class CO2GriddedStateVector(StateVector):
40
41
    """ This is a StateVector object for CarbonTracker. It has a private method to make new ensemble members """

42
    def get_covariance(self, date, dacycle):
43
44
45
46
47
        """ Make a new ensemble from specified matrices, the attribute lag refers to the position in the state vector. 
            Note that lag=1 means an index of 0 in python, hence the notation lag-1 in the indexing below.
            The argument is thus referring to the lagged state vector as [1,2,3,4,5,..., nlag]
        """    

karolina's avatar
karolina committed
48
        
49
50
51
52
53
54
55
        try:
            import matplotlib.pyplot as plt
        except:
            pass

        # Get the needed matrices from the specified covariance files

56
        file_ocn_cov = dacycle.dasystem['ocn.covariance'] 
57

58
59
        cov_files = os.listdir(dacycle.dasystem['bio.cov.dir'])
        cov_files = [os.path.join(dacycle.dasystem['bio.cov.dir'], f) for f in cov_files if dacycle.dasystem['bio.cov.prefix'] in f]
60

61
        logging.debug("Found %d covariances to use for biosphere" % len(cov_files))
62
63
64

        # replace YYYY.MM in the ocean covariance file string

65
        file_ocn_cov = file_ocn_cov.replace('2000.01', date.strftime('%Y.%m'))
66

67
68
69
70
        cov_files.append(file_ocn_cov)

        covariancematrixlist = []
        for file in cov_files:
71
            if not os.path.exists(file):
72
73
                msg = "Cannot find the specified file %s" % file 
                logging.error(msg)
brunner's avatar
brunner committed
74
                raise IOError(msg)
75
            else:
76
                logging.debug("Using covariance file: %s" % file)
77

Peters, Wouter's avatar
Peters, Wouter committed
78
            f = io.ct_read(file, 'read')
79

80
            if 'pco2' in file or 'cov_ocean' in file: 
81
                cov_ocn = f.get_variable('CORMAT')
82
                cov = cov_ocn
83
            else: 
84
                cov = f.get_variable('covariance')
Peters, Wouter's avatar
Peters, Wouter committed
85
                #cov_sf      = 10.0/np.sqrt(cov.diagonal().sum())  # this scaling factor makes the total variance close to the value of a single ecoregion
86
87
                cov_sf = 20.0 / np.sqrt(cov.diagonal().sum())  # this scaling factor makes the total variance close to the value of a single ecoregion
                cov = cov * cov_sf
88

89
            f.close()
90
            covariancematrixlist.append(cov)
91

92
        logging.debug("Succesfully closed files after retrieving prior covariance matrices")
93
94
95

        # Once we have the matrices, we can start to make the full covariance matrix, and then decompose it

96
        return covariancematrixlist
97

98
    def make_new_ensemble(self, lag, covariancematrixlist=[None]):
99
100
101
102
103
104
105
106
107
108
109
110
111
112
        """ 
        :param lag: an integer indicating the time step in the lag order
        :param covariancematrix: a list of matrices specifying the covariance distribution to draw from
        :rtype: None
    
        Make a new ensemble, the attribute lag refers to the position in the state vector. 
        Note that lag=1 means an index of 0 in python, hence the notation lag-1 in the indexing below.
        The argument is thus referring to the lagged state vector as [1,2,3,4,5,..., nlag]

        The covariance list object to be passed holds a list of matrices with a total number of dimensions [nparams, nparams], which is
        used to draw ensemblemembers from. Each draw is done on a matrix from the list, to make the computational burden smaller when
        the StateVector nparams becomes very large.

        """    
113
114
115
116
        try:
            import matplotlib.pyplot as plt
        except:
            pass
117

118
119
        if not isinstance(covariancematrixlist, list):
            logging.error("The covariance matrix or matrices must be passed as a list of array objects, exiting..." )
120
121
122
123
            raise ValueError

        # Check dimensions of covariance matrix list, must add up to nparams

124
125
        dims = 1  # start from 1.0 to account for the last parameter that scales Ice+Non-optimized, we have no covariance matrix for this though

126
127
        for matrix in covariancematrixlist: 
            dims += matrix.shape[0]
128
129

        if dims != self.nparams:
130
            logging.error("The total dimension of the covariance matrices passed (%d) does not add up to the prescribed nparams (%d), exiting..." % (dims, self.nparams))
131
132
133
134
            raise ValueError

        # Loop over list if identity matrices and create a matrix of (nparams,nmembers) with the deviations

135
136
137
138
139
        istart = 0
        istop = 0
        dof = 0.0
        dev_matrix = np.zeros((self.nparams, self.nmembers,), 'float')
        randstate = np.random.get_state()
140

141
        for matrix in covariancematrixlist:
142
            # Make a cholesky decomposition of the covariance matrix
143

karolina's avatar
karolina committed
144
            _, s, _ = np.linalg.svd(matrix)
145
            dof += np.sum(s) ** 2 / sum(s ** 2)
146
            try:
147
                C = np.linalg.cholesky(matrix)
148
            except np.linalg.linalg.LinAlgError as err:
149
150
                logging.error('Cholesky decomposition has failed ')
                logging.error('For a matrix of dimensions: %d' % matrix.shape[0])
151
152
                logging.debug(err)
                raise np.linalg.linalg.LinAlgError
153
154
155
156


            # Draw nmembers instances of this distribution

157
            npoints = matrix.shape[0]
158

159
            istop = istop + npoints
160

161
162
163
164
165
            for member in range(1, self.nmembers):
                rands = np.random.randn(npoints)
                deviations = np.dot(C, rands)
                dev_matrix[istart:istop, member - 1] = deviations
                dev_matrix[istop, member - 1] = 1.e-10 * np.random.randn()
166

167
168
169
            #cov2 = np.dot(dev_matrix[istart:istop,:],np.transpose(dev_matrix[istart:istop,:])) / (self.nmembers-1)
            #print matrix.sum(),cov2.sum(),abs(matrix.diagonal()-cov2.diagonal()).max(), matrix.shape,cov2.shape

170
            istart = istart + npoints
171

172
173
        logging.debug('Successfully constructed a deviation matrix from covariance structure')
        logging.info('Appr. degrees of freedom in full covariance matrix is %s' % (int(dof)))
174

175
        # Now fill the ensemble members with the deviations we have just created
176
177


178
        # Create mean values 
179

karolina's avatar
karolina committed
180
        new_mean = np.ones(self.nparams, float) # standard value for a new time step is 1.0
181

182
        # If this is not the start of the filter, average previous two optimized steps into the mix
183

karolina's avatar
karolina committed
184
        if lag == self.nlag - 1 and self.nlag >= 3:
185
186
            new_mean += self.ensemble_members[lag - 1][0].param_values + \
                                           self.ensemble_members[lag - 2][0].param_values 
karolina's avatar
karolina committed
187
            new_mean = new_mean / 3.0
188

189
        # Create the first ensemble member with a deviation of 0.0 and add to list
190

karolina's avatar
karolina committed
191
        new_member = EnsembleMember(0)
192
193
        new_member.param_values = new_mean.flatten()  # no deviations
        self.ensemble_members[lag].append(new_member)
194

195
        # Create members 1:nmembers and add to ensemble_members list
196

197
        for member in range(1, self.nmembers):
karolina's avatar
karolina committed
198
            new_member = EnsembleMember(member)
199
200
            new_member.param_values = dev_matrix[:, member - 1] + new_mean
            self.ensemble_members[lag].append(new_member)
201

karolina's avatar
karolina committed
202
        logging.debug('%d new ensemble members were added to the state vector # %d' % (self.nmembers, (lag + 1)))
203
204
205
206
207
208
209
210
211





################### End Class CtStateVector ###################


if __name__ == "__main__":
212
    pass