Coverage for pygeodyn/inout/observations.py: 46%

284 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-12-22 13:43 +0000

1#-*- coding: utf-8 -*- 

2""" Contains the functions building observations """ 

3import os, os.path 

4import numpy as np 

5import h5py 

6import math 

7import logging 

8import glob 

9import pygeodyn 

10from pygeodyn.inout import reads 

11from pygeodyn import common, utilities 

12from pygeodyn.constants import r_core 

13 

14class Observation: 

15 """ 

16 Class storing the observation data, operator and errors in members: 

17 * Observation.data 

18 * Observation.H_core 

19 * Observation.errors 

20 """ 

21 def __init__(self, X, H, Rxx, pos_go_vo=None, H_iono=None): 

22 """ 

23 The observation is expected to be used as Y = HX with Y the observation data, X the observed data under spectral form and H the observation operator. 

24 

25 

26 :param obs_data: 2D numpy array (nb_real x nb_obs) storing the observation data Y 

27 :type obs_data: numpy.ndarray 

28 :param H: 2D numpy array (nb_obs x nb_coeffs) storing the observation operator 

29 :type H: numpy.ndarray 

30 :param R: 2D numpy array (nb_obs x nb_obs) storing the observation errors 

31 :type R: numpy.ndarray 

32 """ 

33 

34 self.X = X 

35 self.nb_obs = self.X.shape[1] 

36 # Check that H has first dim nb_obs 

37 if H.shape[0] != self.nb_obs: 

38 raise ValueError('Observation operator\'s first dimension must be equal to number of observations ({}).Got {} dims instead.'.format(self.nb_obs, H.shape)) 

39 

40 self.H = H 

41 

42 # Check that R has shape (nb_obs x nb_obs) 

43 if Rxx.shape != (self.nb_obs, self.nb_obs): 

44 raise ValueError('Observation errors\' dimensions must be equal to number of observations ({}).Got {} dims instead.'.format(self.nb_obs, Rxx.shape)) 

45 self.Rxx = Rxx 

46 # for GVOs, it is useful for a few applications to save positions 

47 self.positions = pos_go_vo 

48 

49 

50def find_obs_analysis_match(obs_date, t_analysis, dt_f): 

51 """ 

52 return index if obs date found in analysis times +-dt_forecast/2  

53 """ 

54 for idx, ana_date in enumerate(t_analysis): 

55 if abs(obs_date - ana_date) < dt_f/2: 

56 return str(idx) 

57 return None 

58 

59 

60def build_go_vo_observations(cfg, nb_realisations, measure_type, seed=None): 

61 """ 

62 Builds the observations (including direct observation operators H) from direct sources 

63 (Ground observatories (GO), virtual observatories of CHAMP (VO_CHAMP) and SWARM (VO_SWARM). 

64 The dates where no analysis takes place are discarded. 

65 

66 :param cfg: configuration of the computation 

67 :type cfg: inout.config.ComputationConfig 

68 :param nb_realisations: Number of realisations of the computation 

69 :type nb_realisations: int 

70 :param measure_type: Type of the measure to extract ('MF' or 'SV') 

71 :type measure_type: str 

72 :return: dictionary of Observation objects with dates (np.datetime64) as keys. 

73 :rtype: dict[np.datetime64, Observation] 

74 """ 

75 datadir = cfg.obs_dir 

76 

77 # if adding a new satellite, the first two letters of the cdf file should 

78 # match the first two letters after '_' in the variable obs_types_to_use. 

79 # If not possible, update code lines below that find cdf filename. 

80 possible_ids = np.array(['GROUND', 'CHAMP', 'SWARM', 'OERSTED', 'CRYOSAT', 'GRACE', 'COMPOSITE']) 

81 cdf_name_shortcuts = np.array(['GO', 'CH', 'SW', 'OR', 'CR', 'GR', 'CO']) 

82 if cfg.obs_select == all: 

83 obs_types_to_use = {obs: ('MF', 'SV') for obs in possible_ids} 

84 obs_types_to_use.pop('COMPOSITE') # remove composite data 

85 else: 

86 obs_types_to_use = cfg.obs_select.copy() 

87 complete_obs = ({}, {}, {}, {}, {}) # dicts will resp. contain positions, data and variance and data_mask and sat_name 

88 # Extract data for all valid observatories 

89 

90 # initialize on seed for every observatory: 

91 seeds = utilities.get_seeds_for_obs(seed, obs_types_to_use) 

92 for obs in obs_types_to_use.keys(): 

93 # these conditions filters which observations are taken into account according to config.obs_select 

94 # e.g. if obs_read['GROUND'] = (SV,), then MF of GO are not taken into account. 

95 if not np.any(obs == possible_ids): 

96 continue 

97 elif measure_type not in obs_types_to_use[obs]: 

98 continue 

99 # variance data is already extracted because everything is located 

100 # in a single file in cdf files 

101 file = None 

102 for files in os.walk(datadir): 

103 for f in files[2]: 

104 # find the filename of the cdf file that correspond to GO, CHAMP, SWARM, OERSTED, CRYO, GRACE or COMPOSITE 

105 # files that are not starting with GO, CH, SW, OR, CO or CR will be ignored. 

106 if np.any(f[:2] == cdf_name_shortcuts[obs == possible_ids]): 

107 obs_shortcut = cdf_name_shortcuts[obs == possible_ids] 

108 file = os.path.join(files[0], f) 

109 if file is None: 

110 logging.warning('cdf file corresponding to {} has not been found, continuing without'.format(obs)) 

111 continue 

112 # all_data consist of (obs_pos, obs_data, var_data) 

113 all_data = reads.read_observatories_cdf_data(file, measure_type, obs_shortcut, discard=cfg.discard_high_lat) 

114 complete_obs = noise_GVO_data(obs_shortcut, all_data, cfg, nb_realisations, measure_type, complete_obs, seed=seeds[obs, measure_type]) 

115 # End of loop on obs_type 

116 

117 return compute_H_R_GVOs(datadir, complete_obs, cfg, measure_type) 

118 

119 

120def noise_GVO_data(obs, data, cfg, nb_realisations, measure_type, complete_obs, seed=None): 

121 """ 

122 

123 Noise data contained in obs_data with a gaussian distribution, variance taken from 

124 var_data, in the basis of obs_pos. 

125 """ 

126 complete_obs_positions, complete_obs_data, complete_var_date, complete_mask_date, complete_sat_name = complete_obs 

127 

128 obs_pos, obs_data, var_data, mask, sat_name = data 

129 # check that the length is compatible with a format like [Br1, Bth1, Bph1, Br2, Bth2, Bph2, ..., BrN, BthN, BphN] 

130 assert len(var_data[list(var_data.keys())[0]]) % 3 == 0 

131 

132 # Draw a seed for normal draw 

133 if seed is None: 

134 seed = np.random.randint(0, 50000) 

135 logging.info("Noising {} observations of {} with seed {}".format(obs, measure_type, seed)) 

136 random_draw = np.random.RandomState(seed).normal 

137 for date in obs_data: 

138 print(utilities.date_to_decimal(date)) 

139 for date in obs_data: 

140 # Skip any date that will not be used for analyses 

141 match_idx = find_obs_analysis_match(utilities.date_to_decimal(date), cfg.t_analyses_full, cfg.dt_f) 

142 if match_idx is None: 

143 continue 

144 nb_observations_at_date = len(obs_data[date]) 

145 assert nb_observations_at_date % 3 == 0 

146 noised_data_at_date = np.zeros(shape=(nb_realisations, nb_observations_at_date)) 

147 var_at_date = [] 

148 for i_obs in range(0, nb_observations_at_date // 3, 1): 

149 # For each component (j=0: r, j=1: th, j=2: ph) 

150 for j in range(0, 3): 

151 n = 3 * i_obs + j 

152 # var_data is now ordered as the magnetic field 

153 # Format: [Br1, Bth1, Bph1, Br2, Bth2, Bph2, ..., BrN, BthN, BphN] 

154 # The value stored is assumed to be sigma**2 

155 sigma2 = var_data[date][n] 

156 var_at_date.append(sigma2) 

157 # Noise data with normal noise N(data, sigma) 

158 noised_data_at_date[:, n] = random_draw(obs_data[date][n], math.sqrt(sigma2), size=nb_realisations) 

159 

160 # If the date does not exist yet, create the entry in the dict 

161 if date not in complete_obs_data: 

162 complete_obs_positions[date] = obs_pos[date] 

163 complete_obs_data[date] = noised_data_at_date 

164 complete_var_date[date] = var_at_date 

165 complete_mask_date[date] = mask[date] 

166 complete_sat_name[date] = sat_name[date] 

167 # Else concatenate the data with the existing one 

168 else: 

169 complete_obs_positions[date].extend(obs_pos[date]) 

170 complete_obs_data[date] = np.concatenate((complete_obs_data[date], noised_data_at_date), axis=1) 

171 complete_var_date[date].extend(var_at_date) 

172 complete_mask_date[date] = np.concatenate((complete_mask_date[date], mask[date])) 

173 complete_sat_name[date].extend(sat_name[date]) 

174 

175 return (complete_obs_positions, complete_obs_data, complete_var_date, complete_mask_date, complete_sat_name) 

176 

177 

178def compute_H_R_GVOs(data_dir, complete_obs, cfg, measure_type): 

179 """ 

180 Final part of build_govo_observations(). Compute the observations and error matrices 

181 at all relevant dates. 

182 :param data_dir: directory of observation GVO_data 

183 :type data_dir: str 

184 :param complete_obs: tuple containing(position, data, var, mask, sat name) 

185 :type complete_obs: tuple 

186 :param measure_type: Type of the measure to extract ('MF' or 'SV') 

187 :type measure_type: str 

188 """ 

189 complete_obs_positions, complete_obs_data, complete_var_date, complete_mask_date, complete_sat_name = complete_obs 

190 observations = {} 

191 logging.info('Computing observation operator H and error matrix R for {}'.format(measure_type)) 

192 # Build Observation objects 

193 max_degree = cfg.Lsv if measure_type == 'SV' else cfg.Lb 

194 for date, obs_data_at_date in complete_obs_data.items(): 

195 # Skip any date that will not be used for analyses 

196 match_idx = find_obs_analysis_match(utilities.date_to_decimal(date), cfg.t_analyses_full, cfg.dt_f) 

197 if match_idx is None: 

198 continue 

199 H_core = common.compute_direct_obs_operator(complete_obs_positions[date], max_degree) 

200 # Convert the list of sigmas as diagonal matrix R (obs errors) 

201 R = np.diagflat(complete_var_date[date]) 

202 

203 # if dense SW_err then we replace the SWARM error in R by the dense error contained in R_dense_swarm.txt 

204 # ONLY FOR 4MONTHS SWARM!!! 

205 if cfg.SW_err == "dense": 

206 sat_mask = np.isin(np.array(complete_sat_name[date]),["SW"]) 

207 mask = np.array(complete_mask_date[date], dtype=bool) 

208 R_swarm = np.loadtxt(os.path.join(data_dir, 'SWARM_err','R_dense_swarm.txt')) 

209 R[sat_mask,sat_mask] = R_swarm[mask,mask] 

210 

211 # update observations object 

212 observations[match_idx] = Observation(obs_data_at_date, H_core, R, pos_go_vo=complete_obs_positions[date]) 

213 

214 return observations 

215 

216 

217def build_covobs_observations(cfg, nb_realisations, measure_type, seed=None): 

218 """ 

219 Builds the observations from COVOBS files (without a var file). 

220 The dates where no analysis takes place are discarded. 

221 

222 :param cfg: configuration of the computation 

223 :type cfg: inout.config.ComputationConfig 

224 :param nb_realisations: Number of realisations of the computation 

225 :type nb_realisations: int 

226 :param measure_type: Type of the measure to extract ('MF' or 'SV') 

227 :type measure_type: str 

228 :return: dictionary of Observation objects with dates (np.datetime64) as keys. 

229 :rtype: dict[np.datetime64, Observation] 

230 """ 

231 datadir = cfg.obs_dir 

232 if measure_type == 'SV': 

233 nb_coefs = cfg.Nsv 

234 else: 

235 nb_coefs = cfg.Nb 

236 

237 # Find files (sort to have reproducible order of realisations_files) 

238 realisations_files = sorted(glob.glob(os.path.join(datadir,'real*_{}_int_coefs.dat'.format(measure_type.lower())))) 

239 nb_real_files = len(realisations_files) 

240 

241 if nb_real_files == 0: 

242 raise IOError("No COVOBS files were found in {} ! Check that it is the valid directory path.".format(datadir)) 

243 

244 if nb_real_files < nb_realisations: 

245 raise ValueError("There are not enough COVOBS files to handle the {} realisations asked ! Please retry with a lower number (max: {}).".format(nb_realisations, nb_real_files)) 

246 

247 # Create a list to store the unformatted data read from the files 

248 dates = None 

249 unformatted_data = [] 

250 

251 # Get all realisations (unformatted) 

252 for file_name in realisations_files: 

253 unformatted_data.append(np.loadtxt(file_name)) 

254 if dates is None: 

255 # Get dates array (first element of the lines in file is the date) 

256 dates = unformatted_data[-1][:, 0] 

257 

258 # Convert the list in a numpy array and remove the date (first member of last axis) 

259 unformatted_data = np.array(unformatted_data)[:, :, 1:] 

260 # Compute the var_data from ALL realisations 

261 var_data = np.var(unformatted_data, axis=0) 

262 

263 # Format the data as a dict of Observations with dates as keys 

264 observations = {} 

265 for i_d, date in enumerate(dates): 

266 # Shift the date by one month (different convention) 

267 match_idx = find_obs_analysis_match(date, cfg.t_analyses_full, cfg.dt_f) 

268 if match_idx is None: 

269 continue 

270 # Get nb_realisations and nb_coefs 

271 obs_data = unformatted_data[:nb_realisations, i_d, :nb_coefs] 

272 # Get the number of observed coefficients (here equal to nb_coefs) 

273 current_No = obs_data.shape[-1] 

274 # H is identity 

275 H = np.eye(current_No, nb_coefs) 

276 # R is a diagonal matrix with diagonal from 1D var_data (skip first item that is the date) 

277 R = np.diagflat(var_data[i_d, :current_No]) 

278 observations[match_idx] = Observation(obs_data, H, R) 

279 

280 return observations 

281 

282 

283def build_covobs_hdf5_observations(cfg, nb_realisations, measure_type, seed=None): 

284 """ 

285 Builds the observations from a hdf5 COVOBS file. 

286 The dates where no analysis takes place are discarded. 

287 

288 :param cfg: configuration of the computation 

289 :type cfg: inout.config.ComputationConfig 

290 :param nb_realisations: Number of realisations of the computation 

291 :type nb_realisations: int 

292 :param measure_type: Type of the measure to extract ('MF' or 'SV') 

293 :type measure_type: str 

294 :return: dictionary of Observation objects with dates (np.datetime64) as keys. 

295 :rtype: dict[np.datetime64, Observation] 

296 """ 

297 datadir = cfg.obs_dir 

298 if measure_type == 'SV': 

299 max_degree = cfg.Lsv 

300 nb_coefs = cfg.Nsv 

301 dataset_name = 'dgnm' 

302 else: 

303 max_degree = cfg.Lb 

304 nb_coefs = cfg.Nb 

305 dataset_name = 'gnm' 

306 

307 # Find the hdf5 files (sort to have reproducible order of realisations_files) 

308 hdf5_files = sorted(glob.glob(os.path.join(datadir, '*.hdf5'))) 

309 

310 if len(hdf5_files) == 0: 

311 raise IOError("No hdf5 files were found in {} ! Check that it is the valid directory path.".format(datadir)) 

312 

313 logging.debug('Observations are read from {}'.format(hdf5_files[0])) 

314 

315 with h5py.File(hdf5_files[0], 'r') as f: 

316 dates = f['times'][:] 

317 real_data = f[dataset_name][:] 

318 

319 nb_reals_data, nb_dates_data, nb_coefs_data = real_data.shape 

320 # N = L(L+2) => L = sqrt(N+1) - 1 

321 max_degree_data = np.sqrt(nb_coefs_data + 1) - 1 

322 

323 variance_data = np.var(real_data, axis=0) 

324 

325 if nb_reals_data < nb_realisations: 

326 raise ValueError("There are not enough realisations in the COVOBS file to handle the {} realisations asked !" 

327 " Please retry with a lower number (max: {}).".format(nb_realisations, nb_reals_data)) 

328 

329 if max_degree_data < max_degree: 

330 raise ValueError("There are not enough coefficients in the COVOBS file to handle the max degree {} asked for {} !" 

331 " Please retry with a lower degree (max: {}).".format(max_degree, measure_type, max_degree_data)) 

332 

333 # Format the data as a dict of Observations with dates as keys 

334 observations = {} 

335 for i_d, date in enumerate(dates): 

336 # Shift the date by one month (different convention) 

337 match_idx = find_obs_analysis_match(date, cfg.t_analyses_full, cfg.dt_f) 

338 if match_idx is None: 

339 continue 

340 # Get asked nb_realisations and nb_coefs 

341 obs_data = real_data[:nb_realisations, i_d, :nb_coefs] 

342 # Get the number of observed coefficients (here equal to nb_coefs) 

343 current_No = obs_data.shape[-1] 

344 # H is identity (spectral data) 

345 H = np.eye(current_No, nb_coefs) 

346 # R is a diagonal matrix with variances as diagonal 

347 R = np.diagflat(variance_data[i_d, :current_No]) 

348 observations[match_idx] = Observation(obs_data, H, R) 

349 

350 return observations 

351 

352 

353def build_chaos_hdf5_observations(cfg, nb_realisations, measure_type, seed=None): 

354 """ 

355 Builds the observations from a hdf5 file storing CHAOS' spectral coefficients. 

356 The dates where no analysis takes place are discarded. 

357 

358 :param cfg: configuration of the computation 

359 :type cfg: inout.config.ComputationConfig 

360 :param nb_realisations: Number of realisations of the computation 

361 :type nb_realisations: int 

362 :param measure_type: Type of the measure to extract ('MF' or 'SV') 

363 :type measure_type: str 

364 :return: dictionary of Observation objects with dates (np.datetime64) as keys. 

365 :rtype: dict[np.datetime64, Observation] 

366 """ 

367 datadir = cfg.obs_dir 

368 if measure_type == 'SV': 

369 max_degree = cfg.Lsv 

370 nb_coefs = cfg.Nsv 

371 dataset_name = 'dgnm' 

372 else: 

373 max_degree = cfg.Lb 

374 nb_coefs = cfg.Nb 

375 dataset_name = 'gnm' 

376 

377 # Find the hdf5 files (sort to have reproducible order of realisations_files) 

378 hdf5_files = sorted(glob.glob(os.path.join(datadir, '*.hdf5'))) 

379 

380 if len(hdf5_files) == 0: 

381 raise IOError("No hdf5 files were found in {} ! Check that it is the valid directory path.".format(datadir)) 

382 

383 logging.debug('Observations are read from {}'.format(hdf5_files[0])) 

384 

385 with h5py.File(hdf5_files[0], 'r') as f: 

386 dates = f['times'][:] 

387 chaos_data = f[dataset_name][:] 

388 variance_data = f['var_{}'.format(dataset_name)][:] 

389 

390 nb_coefs_data = chaos_data.shape[1] 

391 # N = L(L+2) => L = sqrt(N+1) - 1 

392 max_degree_data = np.sqrt(nb_coefs_data + 1) - 1 

393 

394 if max_degree_data < max_degree: 

395 raise ValueError("There are not enough coefficients in the CHAOS file to handle the max degree {} asked for {} !" 

396 " Please retry with a lower degree (max: {}).".format(max_degree, measure_type, max_degree_data)) 

397 

398 # find kalmag file 

399 try: 

400 filename = os.path.join(os.path.join( 

401 pygeodyn._package_directory, "data/observations/KALMAG", "KALMAG.hdf5") 

402 ) 

403 except IOError: 

404 logging.error("{} not found! Check the path".format(filename)) 

405 return 

406 

407 with h5py.File(filename, 'r') as f: 

408 dates_kalmag = f['times'][:] 

409 real_data = f[measure_type][:] 

410 

411 # Format the data as a dict of Observations with dates as keys 

412 observations = {} 

413 for i_d, date in enumerate(dates): 

414 # Shift the date by one month (different convention) 

415 match_idx = find_obs_analysis_match(date, cfg.t_analyses_full, cfg.dt_f) 

416 if match_idx is None: 

417 continue 

418 # Get asked nb_realisations and nb_coefs 

419 obs_data = np.zeros((nb_realisations, nb_coefs)) 

420 obs_data[:] = chaos_data[i_d, :nb_coefs] 

421 

422 #Now we build full obs_data = obs_data + N(0,sigma_mods_error) + deviation_from_mean_reals_kalmag 

423 idx = np.where(dates_kalmag == date)[0][0] 

424 # Add covobs deviation from mean to obs_data 

425 obs_data += real_data[:nb_realisations, idx, :nb_coefs] - np.mean(real_data[:, idx, :nb_coefs],axis=0) 

426 

427 # Get the number of observed coefficients (here equal to nb_coefs) 

428 current_No = obs_data.shape[-1] 

429 # H is identity (spectral data) 

430 H = np.eye(current_No, nb_coefs) 

431 # R is a diagonal matrix with variances as diagonal 

432 R = np.diagflat(variance_data[i_d, :current_No]) 

433 observations[match_idx] = Observation(obs_data, H, R) 

434 

435 return observations 

436 

437 

438def build_kalmag_hdf5_observations(cfg, nb_realisations, measure_type, seed=None): 

439 """ 

440 Builds the observations from a hdf5 COVOBS file. 

441 The dates where no analysis takes place are discarded. 

442 

443 :param cfg: configuration of the computation 

444 :type cfg: inout.config.ComputationConfig 

445 :param nb_realisations: Number of realisations of the computation 

446 :type nb_realisations: int 

447 :param measure_type: Type of the measure to extract ('MF' or 'SV') 

448 :type measure_type: str 

449 :return: dictionary of Observation objects with dates (np.datetime64) as keys. 

450 :rtype: dict[np.datetime64, Observation] 

451 """ 

452 datadir = cfg.obs_dir 

453 if measure_type == 'SV': 

454 max_degree = cfg.Lsv 

455 nb_coefs = cfg.Nsv 

456 else: 

457 max_degree = cfg.Lb 

458 nb_coefs = cfg.Nb 

459 

460 # Find the hdf5 files (sort to have reproducible order of realisations_files) 

461 hdf5_files = sorted(glob.glob(os.path.join(datadir, '*.hdf5'))) 

462 

463 if len(hdf5_files) == 0: 

464 raise IOError("No hdf5 files were found in {} ! Check that it is the valid directory path.".format(datadir)) 

465 

466 logging.debug('Observations are read from {}'.format(hdf5_files[0])) 

467 

468 with h5py.File(hdf5_files[0], 'r') as f: 

469 dates = f['times'][:] 

470 real_data = f[measure_type][:] 

471 

472 nb_reals_data, nb_dates_data, nb_coefs_data = real_data.shape 

473 # N = L(L+2) => L = sqrt(N+1) - 1 

474 max_degree_data = np.sqrt(nb_coefs_data + 1) - 1 

475 

476 variance_data = np.var(real_data, axis=0) 

477 

478 if nb_reals_data < nb_realisations: 

479 raise ValueError("There are not enough realisations in the KALMAG file to handle the {} realisations asked !" 

480 " Please retry with a lower number (max: {}).".format(nb_realisations, nb_reals_data)) 

481 

482 if max_degree_data < max_degree: 

483 raise ValueError("There are not enough coefficients in the KALMAG file to handle the max degree {} asked for {} !" 

484 " Please retry with a lower degree (max: {}).".format(max_degree, measure_type, max_degree_data)) 

485 

486 # Format the data as a dict of Observations with dates as keys 

487 observations = {} 

488 for i_d, date in enumerate(dates): 

489 # Shift the date by one month (different convention) 

490 match_idx = find_obs_analysis_match(date, cfg.t_analyses_full, cfg.dt_f) 

491 if match_idx is None: 

492 continue 

493 # Get asked nb_realisations and nb_coefs 

494 obs_data = real_data[:nb_realisations, i_d, :nb_coefs] 

495 # Get the number of observed coefficients (here equal to nb_coefs) 

496 current_No = obs_data.shape[-1] 

497 # H is identity (spectral data) 

498 H = np.eye(current_No, nb_coefs) 

499 # R is a diagonal matrix with variances as diagonal 

500 R = np.diagflat(variance_data[i_d, :current_No]) 

501 observations[match_idx] = Observation(obs_data, H, R) 

502 

503 return observations 

504 

505 

506def build_sola_hdf5_observations(cfg, nb_realisations, measure_type, seed=None): 

507 """ 

508 Builds the observations (including direct observation operators H) from direct sources 

509 (Ground observatories (GO), virtual observatories of CHAMP (VO_CHAMP) and SWARM (VO_SWARM). 

510 The dates where no analysis takes place are discarded. 

511 

512 :param cfg: configuration of the computation 

513 :type cfg: inout.config.ComputationConfig 

514 :param nb_realisations: Number of realisations of the computation 

515 :type nb_realisations: int 

516 :param measure_type: Type of the measure to extract ('MF' or 'SV') 

517 :type measure_type: strf 

518 :return: dictionary of Observation objects with dates (np.datetime64) as keys. 

519 :rtype: dict[np.datetime64, Observation] 

520 """ 

521 

522 observations = {} 

523 

524 random_state = np.random.RandomState(seed).normal 

525 if measure_type == 'MF': 

526 # find kalmag file 

527 try: 

528 cfg.obs_dir = os.path.join(os.path.join( 

529 pygeodyn._package_directory, "data/observations/KALMAG") 

530 ) 

531 except IOError: 

532 logging.error("{} not found! Check the path".format(cfg.obs_dir)) 

533 return 

534 return build_kalmag_hdf5_observations(cfg, nb_realisations, measure_type, seed=seed) 

535 

536 if measure_type == 'SV': 

537 SNAPSHOT = list(h5py.File(os.path.join(cfg.obs_dir, "SOLA.hdf5"), 'r')['SOLA']) 

538 

539 for i in SNAPSHOT: 

540 with h5py.File(os.path.join(cfg.obs_dir, "SOLA.hdf5"), 'r') as f: 

541 

542 time = np.array(f['SOLA'][i]['time'])[0][0] 

543 match_idx = find_obs_analysis_match(time, cfg.t_analyses_full, cfg.dt_f) 

544 if match_idx is None: 

545 continue 

546 

547 leb_theta = np.squeeze(np.array(f['SOLA'][i]['theta_leb']))* np.pi / 180 

548 leb_phi = np.squeeze(np.array(f['SOLA'][i]['phi_leb']))* np.pi / 180 

549 w_cmb = np.squeeze(np.array(f['SOLA'][i]['weight_leb'])) 

550 leb_r = np.squeeze(np.array([r_core]*leb_phi.shape[0])) 

551 

552 Bs = np.squeeze(np.array(f['SOLA'][i]['sv'])[::3]) 

553 sigma2 = np.squeeze(np.array(f['SOLA'][i]['sigma'])[::3]**2) 

554 

555 mask = np.squeeze(np.logical_not(np.isnan(sigma2) | np.isnan(Bs))) 

556 Bs = Bs[mask] 

557 sigma2 = sigma2[mask] 

558 Avg_kern = np.array(f['SOLA'][i]['avg_kernel'])[::3] 

559 Avg_kern = np.multiply(Avg_kern[mask],w_cmb) 

560 

561 B_reals = np.zeros((nb_realisations,Bs.shape[0])) 

562 for i in range(Bs.shape[0]): 

563 B_reals[:,i] = random_state(Bs[i], np.sqrt(sigma2[i]), size=nb_realisations) 

564 

565 R = np.diagflat(sigma2) 

566 

567 H_core = common.compute_direct_obs_operator_SOLA(leb_r, leb_theta, leb_phi, cfg.Lsv) 

568 H = Avg_kern @ H_core 

569 

570 observations[match_idx] = Observation(B_reals, H, R) 

571 

572 return observations 

573