obs.py 13.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. 
Users are recommended to contact the developers (wouter.peters@wur.nl) to receive
updates of the code. See also: http://www.carbontracker.eu. 

This program is free software: you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free Software Foundation, 
version 3. This program is distributed in the hope that it will be useful, but 
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 

You should have received a copy of the GNU General Public License along with this 
program. If not, see <http://www.gnu.org/licenses/>."""
13
14
15
16
#!/usr/bin/env python
# obs.py

"""
Peters, Wouter's avatar
Peters, Wouter committed
17
18
.. module:: obs
.. moduleauthor:: Wouter Peters 
19
20
21
22

Revision History:
File created on 28 Jul 2010.

23
.. autoclass:: da.baseclasses.obs.Observations 
24
   :members: setup, Validate, add_observations, add_simulations, add_model_data_mismatch, write_sample_coords  
25
26
27
28

.. autoclass:: da.baseclasses.obs.ObservationList 
   :members: __init__

29
"""
karolina's avatar
karolina committed
30

31
import logging, sys, os
32
from numpy import array, ndarray
33
34
35
import datetime as dt
sys.path.append(os.getcwd())
sys.path.append('../../')
36

37
identifier = 'Observations baseclass'
38
version = '0.0'
39

40
41
import da.tools.io4 as io

42
################### Begin Class Observations ###################
43

44
class Observations(object):
45
    """ 
46
    The baseclass Observations is a generic object that provides a number of methods required for any type of observations used in 
47
48
49
50
51
52
53
54
    a data assimilation system. These methods are called from the CarbonTracker pipeline. 

    .. note:: Most of the actual functionality will need to be provided through a derived Observations class with the methods 
              below overwritten. Writing your own derived class for Observations is one of the first tasks you'll likely 
              perform when extending or modifying the CarbonTracker Data Assimilation Shell.

    Upon initialization of the class, an object is created that holds no actual data, but has a placeholder attribute `self.Data` 
    which is an empty list of type :class:`~da.baseclasses.obs.ObservationList`. An ObservationList object is created when the 
55
    method :meth:`~da.baseclasses.obs.Observations.add_observations` is invoked in the pipeline. 
56
57

    From the list of observations, a file is written  by method 
58
    :meth:`~da.baseclasses.obs.Observations.write_sample_info`
59
60
    with the sample info needed by the 
    :class:`~da.baseclasses.observationoperator.ObservationOperator` object. The values returned after sampling 
61
    are finally added by :meth:`~da.baseclasses.obs.Observations.add_simulations`
62
63

    """ 
64

65
    def __init__(self):
66
67
68
        """
        create an object with an identifier, version, and an empty ObservationList
        """
karolina's avatar
karolina committed
69
70
        self.ID = identifier
        self.version = version
71
        self.datalist = []  # initialize with an empty list of obs
72

73
        # The following code allows the object to be initialized with a dacycle object already present. Otherwise, it can
74
75
        # be added at a later moment.

karolina's avatar
karolina committed
76
        logging.info('Observations object initialized: %s' % self.ID)
77

karolina's avatar
karolina committed
78
79
    def getlength(self):
        return len(self.datalist)
80

81
    def setup(self, dacycle):
82
83
84
85
        """ Perform all steps needed to start working with observational data, this can include moving data, concatenating files,
            selecting datasets, etc.
        """

86
87
88
89
        self.startdate = dacycle['time.sample.start']
        self.enddate = dacycle['time.sample.end']
        self.datalist = []

90
    def add_observations(self):
91
        """ 
92
        Add actual observation data to the Observations object. This is in a form of an 
93
94
95
96
        :class:`~da.baseclasses.obs.ObservationList` that is contained in self.Data. The 
        list has as only requirement that it can return the observed+simulated values 
        through the method :meth:`~da.baseclasses.obs.ObservationList.getvalues`

97
98
        """

99
100
101
102
103
104
105
106
107
        for n in range(10):
            self.datalist.append(MoleFractionSample(n, self.startdate+dt.timedelta(hours=n+1),'testobs' , 400+n, 0.0, 0.0, 0.0, 0.0, 0 , 100.0 , 52.0, 6.0 , '%04d'%n , 'co2', 1, 0.0, 'none.nc'))

            logging.debug("Added %d observations to the Data list" % (1))

        logging.info("Observations list now holds %d values" % len(self.datalist))


    def add_simulations(self, filename, silent=False):
108
        """ Add the simulation data to the Observations object. 
109
110
        """

111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145

        if not os.path.exists(filename):
            msg = "Sample output filename for observations could not be found : %s" % filename 
            logging.error(msg)
            logging.error("Did the sampling step succeed?")
            logging.error("...exiting")
            raise IOError(msg)

        ncf = io.ct_read(filename, method='read')
        ids = ncf.get_variable('obs_num')
        simulated = ncf.get_variable('flask')
        ncf.close()
        logging.info("Successfully read data from model sample file (%s)" % filename)

        obs_ids = self.getvalues('id').tolist()
        ids = list(map(int, ids))

        missing_samples = []

        for idx, val in zip(ids, simulated): 
            if idx in obs_ids:
                index = obs_ids.index(idx)

                self.datalist[index].simulated = val  # in mol/mol
            else:     
                missing_samples.append(idx)

        if not silent and missing_samples != []:
            logging.warning('Model samples were found that did not match any ID in the observation list. Skipping them...')
            #msg = '%s'%missing_samples ; logging.warning(msg)

        logging.debug("Added %d simulated values to the Data list" % (len(ids) - len(missing_samples)))


    def add_model_data_mismatch(self, filename):
146
147
148
        """ 
            Get the model-data mismatch values for this cycle.
        """
149
150
151
152
153
154
155
156
157
158
159
        self.rejection_threshold = 3.0 # 3-sigma cut-off
        self.global_R_scaling = 1.0 # no scaling applied

        for obs in self.datalist:  # first loop over all available data points to set flags correctly

            obs.mdm = 1.0  
            obs.may_localize = True
            obs.may_reject = True
            obs.flag = 0

        logging.debug("Added Model Data Mismatch to all samples ")
160

161
    def write_sample_coords(self,obsinputfile):
162
163
164
165
        """ 
            Write the information needed by the observation operator to a file. Return the filename that was written for later use
        """

166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
        if len(self.datalist) == 0:
            logging.debug("No observations found for this time period, nothing written to obs file")
        else:
            f = io.CT_CDF(obsinputfile, method='create')
            logging.debug('Creating new observations file for ObservationOperator (%s)' % obsinputfile)

            dimid = f.add_dim('obs', len(self.datalist))
            dim200char = f.add_dim('string_of200chars', 200)
            dim10char = f.add_dim('string_of10chars', 10)
            dimcalcomp = f.add_dim('calendar_components', 6)

            data = self.getvalues('id')

            savedict = io.std_savedict.copy() 
            savedict['name'] = "obs_num"
            savedict['dtype'] = "int"
            savedict['long_name'] = "Unique_Dataset_observation_index_number"
            savedict['units'] = ""
            savedict['dims'] = dimid
            savedict['values'] = data.tolist()
            savedict['comment'] = "Unique index number within this dataset ranging from 0 to UNLIMITED."
            f.add_data(savedict)

            data = [[d.year, d.month, d.day, d.hour, d.minute, d.second] for d in self.getvalues('xdate') ]

            savedict = io.std_savedict.copy() 
            savedict['dtype'] = "int"
            savedict['name'] = "date_components"
            savedict['units'] = "integer components of UTC date/time"
            savedict['dims'] = dimid + dimcalcomp
            savedict['values'] = data
            savedict['missing_value'] = -9
            savedict['comment'] = "Calendar date components as integers. Times and dates are UTC." 
            savedict['order'] = "year, month, day, hour, minute, second"
            f.add_data(savedict)

            data = self.getvalues('lat')

            savedict = io.std_savedict.copy() 
            savedict['name'] = "latitude"
            savedict['units'] = "degrees_north"
            savedict['dims'] = dimid
            savedict['values'] = data.tolist()
            savedict['missing_value'] = -999.9
            f.add_data(savedict)

            data = self.getvalues('lon')

            savedict = io.std_savedict.copy() 
            savedict['name'] = "longitude"
            savedict['units'] = "degrees_east"
            savedict['dims'] = dimid
            savedict['values'] = data.tolist()
            savedict['missing_value'] = -999.9
            f.add_data(savedict)

            data = self.getvalues('height')

            savedict = io.std_savedict.copy() 
            savedict['name'] = "altitude"
            savedict['units'] = "meters_above_sea_level"
            savedict['dims'] = dimid
            savedict['values'] = data.tolist()
            savedict['missing_value'] = -999.9
            f.add_data(savedict)

            data = self.getvalues('samplingstrategy')

            savedict = io.std_savedict.copy() 
            savedict['dtype'] = "int"
            savedict['name'] = "sampling_strategy"
            savedict['units'] = "NA"
            savedict['dims'] = dimid
            savedict['values'] = data.tolist()
            savedict['missing_value'] = -9
            f.add_data(savedict)

            data = self.getvalues('obs')

            savedict = io.std_savedict.copy()
            savedict['name'] = "observed"
            savedict['long_name'] = "observedvalues"
            savedict['units'] = "mol mol-1"
            savedict['dims'] = dimid
            savedict['values'] = data.tolist()
            savedict['comment'] = 'Observations used in optimization'
            f.add_data(savedict)
    
            data = self.getvalues('mdm')
    
            savedict = io.std_savedict.copy()
            savedict['name'] = "modeldatamismatch"
            savedict['long_name'] = "modeldatamismatch"
            savedict['units'] = "[mol mol-1]"
            savedict['dims'] = dimid
            savedict['values'] = data.tolist()
            savedict['comment'] = 'Standard deviation of mole fractions resulting from model-data mismatch'
            f.add_data(savedict)
            f.close()

            logging.debug("Successfully wrote data to obs file")
            logging.info("Sample input file for obs operator now in place [%s]" % obsinputfile)

        


272
273
274
    def write_sample_auxiliary(self, auxoutputfile):
        """ 
            Write selected additional information contained in the Observations object to a file for later processing. 
275

276
        """
277

278
    def getvalues(self, name, constructor=array):
279

karolina's avatar
karolina committed
280
        result = constructor([getattr(o, name) for o in self.datalist])
281
        if isinstance(result, ndarray): 
282
283
284
285
            return result.squeeze()
        else:
            return result

karolina's avatar
karolina committed
286
287
288

################### End Class Observations ###################

289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
################### Begin Class MoleFractionSample ###################

class MoleFractionSample(object):
    """ 
        Holds the data that defines a mole fraction Sample in the data assimilation framework. Sor far, this includes all
        attributes listed below in the __init__ method. One can additionally make more types of data, or make new
        objects for specific projects.

    """

    def __init__(self, idx, xdate, code='XXX', obs=0.0, simulated=0.0, resid=0.0, hphr=0.0, mdm=0.0, flag=0, height=0.0, lat= -999., lon= -999., evn='0000', species='co2', samplingstrategy=1, sdev=0.0, fromfile='none.nc'):
        self.code = code.strip()      # dataset identifier, i.e., co2_lef_tower_insitu_1_99
        self.xdate = xdate             # Date of obs
        self.obs = obs               # Value observed
        self.simulated = simulated         # Value simulated by model
        self.resid = resid             # Mole fraction residuals
        self.hphr = hphr              # Mole fraction prior uncertainty from fluxes and (HPH) and model data mismatch (R)
        self.mdm = mdm               # Model data mismatch
        self.may_localize = True           # Whether sample may be localized in optimizer
        self.may_reject = True              # Whether sample may be rejected if outside threshold
        self.flag = flag              # Flag
        self.height = height            # Sample height in masl
        self.lat = lat               # Sample lat
        self.lon = lon               # Sample lon
        self.id = idx               # Obspack ID within distrution (integer), e.g., 82536
        self.evn = evn               # Obspack Number within distrution (string), e.g., obspack_co2_1_PROTOTYPE_v0.9.2_2012-07-26_99_82536
        self.sdev = sdev              # standard deviation of ensemble
        self.masl = True              # Sample is in Meters Above Sea Level
        self.mag = not self.masl     # Sample is in Meters Above Ground
        self.species = species.strip()
        self.samplingstrategy = samplingstrategy
        self.fromfile = fromfile   # netcdf filename inside ObsPack distribution, to write back later

################### End Class MoleFractionSample ###################


325