Coverage for pygeodyn/inout/observations.py: 46%

1#-*- coding: utf-8 -*-

2""" Contains the functions building observations """

3import os, os.path

4import numpy as np

5import h5py

6import math

7import logging

8import glob

9import pygeodyn

10from pygeodyn.inout import reads

11from pygeodyn import common, utilities

12from pygeodyn.constants import r_core

14class Observation:

15 """

16 Class storing the observation data, operator and errors in members:

17 * Observation.data

18 * Observation.H_core

19 * Observation.errors

20 """

21 def __init__(self, X, H, Rxx, pos_go_vo=None, H_iono=None):

22 """

23 The observation is expected to be used as Y = HX with Y the observation data, X the observed data under spectral form and H the observation operator.

26 :param obs_data: 2D numpy array (nb_real x nb_obs) storing the observation data Y

27 :type obs_data: numpy.ndarray

28 :param H: 2D numpy array (nb_obs x nb_coeffs) storing the observation operator

29 :type H: numpy.ndarray

30 :param R: 2D numpy array (nb_obs x nb_obs) storing the observation errors

31 :type R: numpy.ndarray

32 """

34 self.X = X

35 self.nb_obs = self.X.shape[1]

36 # Check that H has first dim nb_obs

37 if H.shape[0] != self.nb_obs:

38 raise ValueError('Observation operator\'s first dimension must be equal to number of observations ({}).Got {} dims instead.'.format(self.nb_obs, H.shape))

40 self.H = H

42 # Check that R has shape (nb_obs x nb_obs)

43 if Rxx.shape != (self.nb_obs, self.nb_obs):

44 raise ValueError('Observation errors\' dimensions must be equal to number of observations ({}).Got {} dims instead.'.format(self.nb_obs, Rxx.shape))

45 self.Rxx = Rxx

46 # for GVOs, it is useful for a few applications to save positions

47 self.positions = pos_go_vo

50def find_obs_analysis_match(obs_date, t_analysis, dt_f):

51 """

52 return index if obs date found in analysis times +-dt_forecast/2

53 """

54 for idx, ana_date in enumerate(t_analysis):

55 if abs(obs_date - ana_date) < dt_f/2:

56 return str(idx)

57 return None

60def build_go_vo_observations(cfg, nb_realisations, measure_type, seed=None):

61 """

62 Builds the observations (including direct observation operators H) from direct sources

63 (Ground observatories (GO), virtual observatories of CHAMP (VO_CHAMP) and SWARM (VO_SWARM).

64 The dates where no analysis takes place are discarded.

66 :param cfg: configuration of the computation

67 :type cfg: inout.config.ComputationConfig

68 :param nb_realisations: Number of realisations of the computation

69 :type nb_realisations: int

70 :param measure_type: Type of the measure to extract ('MF' or 'SV')

71 :type measure_type: str

72 :return: dictionary of Observation objects with dates (np.datetime64) as keys.

73 :rtype: dict[np.datetime64, Observation]

74 """

75 datadir = cfg.obs_dir

77 # if adding a new satellite, the first two letters of the cdf file should

78 # match the first two letters after '_' in the variable obs_types_to_use.

79 # If not possible, update code lines below that find cdf filename.

80 possible_ids = np.array(['GROUND', 'CHAMP', 'SWARM', 'OERSTED', 'CRYOSAT', 'GRACE', 'COMPOSITE'])

81 cdf_name_shortcuts = np.array(['GO', 'CH', 'SW', 'OR', 'CR', 'GR', 'CO'])

82 if cfg.obs_select == all:

83 obs_types_to_use = {obs: ('MF', 'SV') for obs in possible_ids}

84 obs_types_to_use.pop('COMPOSITE') # remove composite data

85 else:

86 obs_types_to_use = cfg.obs_select.copy()

87 complete_obs = ({}, {}, {}, {}, {}) # dicts will resp. contain positions, data and variance and data_mask and sat_name

88 # Extract data for all valid observatories

90 # initialize on seed for every observatory:

91 seeds = utilities.get_seeds_for_obs(seed, obs_types_to_use)

92 for obs in obs_types_to_use.keys():

93 # these conditions filters which observations are taken into account according to config.obs_select

94 # e.g. if obs_read['GROUND'] = (SV,), then MF of GO are not taken into account.

95 if not np.any(obs == possible_ids):

96 continue

97 elif measure_type not in obs_types_to_use[obs]:

98 continue

99 # variance data is already extracted because everything is located

100 # in a single file in cdf files

101 file = None

102 for files in os.walk(datadir):

103 for f in files[2]:

104 # find the filename of the cdf file that correspond to GO, CHAMP, SWARM, OERSTED, CRYO, GRACE or COMPOSITE

105 # files that are not starting with GO, CH, SW, OR, CO or CR will be ignored.

106 if np.any(f[:2] == cdf_name_shortcuts[obs == possible_ids]):

107 obs_shortcut = cdf_name_shortcuts[obs == possible_ids]

108 file = os.path.join(files[0], f)

109 if file is None:

110 logging.warning('cdf file corresponding to {} has not been found, continuing without'.format(obs))

111 continue

112 # all_data consist of (obs_pos, obs_data, var_data)

113 all_data = reads.read_observatories_cdf_data(file, measure_type, obs_shortcut, discard=cfg.discard_high_lat)

114 complete_obs = noise_GVO_data(obs_shortcut, all_data, cfg, nb_realisations, measure_type, complete_obs, seed=seeds[obs, measure_type])

115 # End of loop on obs_type

116

117 return compute_H_R_GVOs(datadir, complete_obs, cfg, measure_type)

118

119

120def noise_GVO_data(obs, data, cfg, nb_realisations, measure_type, complete_obs, seed=None):

121 """

122

123 Noise data contained in obs_data with a gaussian distribution, variance taken from

124 var_data, in the basis of obs_pos.

125 """

126 complete_obs_positions, complete_obs_data, complete_var_date, complete_mask_date, complete_sat_name = complete_obs

127

128 obs_pos, obs_data, var_data, mask, sat_name = data

129 # check that the length is compatible with a format like [Br1, Bth1, Bph1, Br2, Bth2, Bph2, ..., BrN, BthN, BphN]

130 assert len(var_data[list(var_data.keys())[0]]) % 3 == 0

131

132 # Draw a seed for normal draw

133 if seed is None:

134 seed = np.random.randint(0, 50000)

135 logging.info("Noising {} observations of {} with seed {}".format(obs, measure_type, seed))

136 random_draw = np.random.RandomState(seed).normal

137 for date in obs_data:

138 print(utilities.date_to_decimal(date))

139 for date in obs_data:

140 # Skip any date that will not be used for analyses

141 match_idx = find_obs_analysis_match(utilities.date_to_decimal(date), cfg.t_analyses_full, cfg.dt_f)

142 if match_idx is None:

143 continue

144 nb_observations_at_date = len(obs_data[date])

145 assert nb_observations_at_date % 3 == 0

146 noised_data_at_date = np.zeros(shape=(nb_realisations, nb_observations_at_date))

147 var_at_date = []

148 for i_obs in range(0, nb_observations_at_date // 3, 1):

149 # For each component (j=0: r, j=1: th, j=2: ph)

150 for j in range(0, 3):

151 n = 3 * i_obs + j

152 # var_data is now ordered as the magnetic field

153 # Format: [Br1, Bth1, Bph1, Br2, Bth2, Bph2, ..., BrN, BthN, BphN]

154 # The value stored is assumed to be sigma**2

155 sigma2 = var_data[date][n]

156 var_at_date.append(sigma2)

157 # Noise data with normal noise N(data, sigma)

158 noised_data_at_date[:, n] = random_draw(obs_data[date][n], math.sqrt(sigma2), size=nb_realisations)

159

160 # If the date does not exist yet, create the entry in the dict

161 if date not in complete_obs_data:

162 complete_obs_positions[date] = obs_pos[date]

163 complete_obs_data[date] = noised_data_at_date

164 complete_var_date[date] = var_at_date

165 complete_mask_date[date] = mask[date]

166 complete_sat_name[date] = sat_name[date]

167 # Else concatenate the data with the existing one

168 else:

169 complete_obs_positions[date].extend(obs_pos[date])

170 complete_obs_data[date] = np.concatenate((complete_obs_data[date], noised_data_at_date), axis=1)

171 complete_var_date[date].extend(var_at_date)

172 complete_mask_date[date] = np.concatenate((complete_mask_date[date], mask[date]))

173 complete_sat_name[date].extend(sat_name[date])

174

175 return (complete_obs_positions, complete_obs_data, complete_var_date, complete_mask_date, complete_sat_name)

176

177

178def compute_H_R_GVOs(data_dir, complete_obs, cfg, measure_type):

179 """

180 Final part of build_govo_observations(). Compute the observations and error matrices

181 at all relevant dates.

182 :param data_dir: directory of observation GVO_data

183 :type data_dir: str

184 :param complete_obs: tuple containing(position, data, var, mask, sat name)

185 :type complete_obs: tuple

186 :param measure_type: Type of the measure to extract ('MF' or 'SV')

187 :type measure_type: str

188 """

189 complete_obs_positions, complete_obs_data, complete_var_date, complete_mask_date, complete_sat_name = complete_obs

190 observations = {}

191 logging.info('Computing observation operator H and error matrix R for {}'.format(measure_type))

192 # Build Observation objects

193 max_degree = cfg.Lsv if measure_type == 'SV' else cfg.Lb

194 for date, obs_data_at_date in complete_obs_data.items():

195 # Skip any date that will not be used for analyses

196 match_idx = find_obs_analysis_match(utilities.date_to_decimal(date), cfg.t_analyses_full, cfg.dt_f)

197 if match_idx is None:

198 continue

199 H_core = common.compute_direct_obs_operator(complete_obs_positions[date], max_degree)

200 # Convert the list of sigmas as diagonal matrix R (obs errors)

201 R = np.diagflat(complete_var_date[date])

202

203 # if dense SW_err then we replace the SWARM error in R by the dense error contained in R_dense_swarm.txt

204 # ONLY FOR 4MONTHS SWARM!!!

205 if cfg.SW_err == "dense":

206 sat_mask = np.isin(np.array(complete_sat_name[date]),["SW"])

207 mask = np.array(complete_mask_date[date], dtype=bool)

208 R_swarm = np.loadtxt(os.path.join(data_dir, 'SWARM_err','R_dense_swarm.txt'))

209 R[sat_mask,sat_mask] = R_swarm[mask,mask]

210

211 # update observations object

212 observations[match_idx] = Observation(obs_data_at_date, H_core, R, pos_go_vo=complete_obs_positions[date])

213

214 return observations

215

216

217def build_covobs_observations(cfg, nb_realisations, measure_type, seed=None):

218 """

219 Builds the observations from COVOBS files (without a var file).

220 The dates where no analysis takes place are discarded.

221

222 :param cfg: configuration of the computation

223 :type cfg: inout.config.ComputationConfig

224 :param nb_realisations: Number of realisations of the computation

225 :type nb_realisations: int

226 :param measure_type: Type of the measure to extract ('MF' or 'SV')

227 :type measure_type: str

228 :return: dictionary of Observation objects with dates (np.datetime64) as keys.

229 :rtype: dict[np.datetime64, Observation]

230 """

231 datadir = cfg.obs_dir

232 if measure_type == 'SV':

233 nb_coefs = cfg.Nsv

234 else:

235 nb_coefs = cfg.Nb

236

237 # Find files (sort to have reproducible order of realisations_files)

238 realisations_files = sorted(glob.glob(os.path.join(datadir,'real*_{}_int_coefs.dat'.format(measure_type.lower()))))

239 nb_real_files = len(realisations_files)

240

241 if nb_real_files == 0:

242 raise IOError("No COVOBS files were found in {} ! Check that it is the valid directory path.".format(datadir))

243

244 if nb_real_files < nb_realisations:

245 raise ValueError("There are not enough COVOBS files to handle the {} realisations asked ! Please retry with a lower number (max: {}).".format(nb_realisations, nb_real_files))

246

247 # Create a list to store the unformatted data read from the files

248 dates = None

249 unformatted_data = []

250

251 # Get all realisations (unformatted)

252 for file_name in realisations_files:

253 unformatted_data.append(np.loadtxt(file_name))

254 if dates is None:

255 # Get dates array (first element of the lines in file is the date)

256 dates = unformatted_data[-1][:, 0]

257

258 # Convert the list in a numpy array and remove the date (first member of last axis)

259 unformatted_data = np.array(unformatted_data)[:, :, 1:]

260 # Compute the var_data from ALL realisations

261 var_data = np.var(unformatted_data, axis=0)

262

263 # Format the data as a dict of Observations with dates as keys

264 observations = {}

265 for i_d, date in enumerate(dates):

266 # Shift the date by one month (different convention)

267 match_idx = find_obs_analysis_match(date, cfg.t_analyses_full, cfg.dt_f)

268 if match_idx is None:

269 continue

270 # Get nb_realisations and nb_coefs

271 obs_data = unformatted_data[:nb_realisations, i_d, :nb_coefs]

272 # Get the number of observed coefficients (here equal to nb_coefs)

273 current_No = obs_data.shape[-1]

274 # H is identity

275 H = np.eye(current_No, nb_coefs)

276 # R is a diagonal matrix with diagonal from 1D var_data (skip first item that is the date)

277 R = np.diagflat(var_data[i_d, :current_No])

278 observations[match_idx] = Observation(obs_data, H, R)

279

280 return observations

281

282

283def build_covobs_hdf5_observations(cfg, nb_realisations, measure_type, seed=None):

284 """

285 Builds the observations from a hdf5 COVOBS file.

286 The dates where no analysis takes place are discarded.

287

288 :param cfg: configuration of the computation

289 :type cfg: inout.config.ComputationConfig

290 :param nb_realisations: Number of realisations of the computation

291 :type nb_realisations: int

292 :param measure_type: Type of the measure to extract ('MF' or 'SV')

293 :type measure_type: str

294 :return: dictionary of Observation objects with dates (np.datetime64) as keys.

295 :rtype: dict[np.datetime64, Observation]

296 """

297 datadir = cfg.obs_dir

298 if measure_type == 'SV':

299 max_degree = cfg.Lsv

300 nb_coefs = cfg.Nsv

301 dataset_name = 'dgnm'

302 else:

303 max_degree = cfg.Lb

304 nb_coefs = cfg.Nb

305 dataset_name = 'gnm'

306

307 # Find the hdf5 files (sort to have reproducible order of realisations_files)

308 hdf5_files = sorted(glob.glob(os.path.join(datadir, '*.hdf5')))

309

310 if len(hdf5_files) == 0:

311 raise IOError("No hdf5 files were found in {} ! Check that it is the valid directory path.".format(datadir))

312

313 logging.debug('Observations are read from {}'.format(hdf5_files[0]))

314

315 with h5py.File(hdf5_files[0], 'r') as f:

316 dates = f['times'][:]

317 real_data = f[dataset_name][:]

318

319 nb_reals_data, nb_dates_data, nb_coefs_data = real_data.shape

320 # N = L(L+2) => L = sqrt(N+1) - 1

321 max_degree_data = np.sqrt(nb_coefs_data + 1) - 1

322

323 variance_data = np.var(real_data, axis=0)

324

325 if nb_reals_data < nb_realisations:

326 raise ValueError("There are not enough realisations in the COVOBS file to handle the {} realisations asked !"

327 " Please retry with a lower number (max: {}).".format(nb_realisations, nb_reals_data))

328

329 if max_degree_data < max_degree:

330 raise ValueError("There are not enough coefficients in the COVOBS file to handle the max degree {} asked for {} !"

331 " Please retry with a lower degree (max: {}).".format(max_degree, measure_type, max_degree_data))

332

333 # Format the data as a dict of Observations with dates as keys

334 observations = {}

335 for i_d, date in enumerate(dates):

336 # Shift the date by one month (different convention)

337 match_idx = find_obs_analysis_match(date, cfg.t_analyses_full, cfg.dt_f)

338 if match_idx is None:

339 continue

340 # Get asked nb_realisations and nb_coefs

341 obs_data = real_data[:nb_realisations, i_d, :nb_coefs]

342 # Get the number of observed coefficients (here equal to nb_coefs)

343 current_No = obs_data.shape[-1]

344 # H is identity (spectral data)

345 H = np.eye(current_No, nb_coefs)

346 # R is a diagonal matrix with variances as diagonal

347 R = np.diagflat(variance_data[i_d, :current_No])

348 observations[match_idx] = Observation(obs_data, H, R)

349

350 return observations

351

352

353def build_chaos_hdf5_observations(cfg, nb_realisations, measure_type, seed=None):

354 """

355 Builds the observations from a hdf5 file storing CHAOS' spectral coefficients.

356 The dates where no analysis takes place are discarded.

357

358 :param cfg: configuration of the computation

359 :type cfg: inout.config.ComputationConfig

360 :param nb_realisations: Number of realisations of the computation

361 :type nb_realisations: int

362 :param measure_type: Type of the measure to extract ('MF' or 'SV')

363 :type measure_type: str

364 :return: dictionary of Observation objects with dates (np.datetime64) as keys.

365 :rtype: dict[np.datetime64, Observation]

366 """

367 datadir = cfg.obs_dir

368 if measure_type == 'SV':

369 max_degree = cfg.Lsv

370 nb_coefs = cfg.Nsv

371 dataset_name = 'dgnm'

372 else:

373 max_degree = cfg.Lb

374 nb_coefs = cfg.Nb

375 dataset_name = 'gnm'

376

377 # Find the hdf5 files (sort to have reproducible order of realisations_files)

378 hdf5_files = sorted(glob.glob(os.path.join(datadir, '*.hdf5')))

379

380 if len(hdf5_files) == 0:

381 raise IOError("No hdf5 files were found in {} ! Check that it is the valid directory path.".format(datadir))

382

383 logging.debug('Observations are read from {}'.format(hdf5_files[0]))

384

385 with h5py.File(hdf5_files[0], 'r') as f:

386 dates = f['times'][:]

387 chaos_data = f[dataset_name][:]

388 variance_data = f['var_{}'.format(dataset_name)][:]

389

390 nb_coefs_data = chaos_data.shape[1]

391 # N = L(L+2) => L = sqrt(N+1) - 1

392 max_degree_data = np.sqrt(nb_coefs_data + 1) - 1

393

394 if max_degree_data < max_degree:

395 raise ValueError("There are not enough coefficients in the CHAOS file to handle the max degree {} asked for {} !"

396 " Please retry with a lower degree (max: {}).".format(max_degree, measure_type, max_degree_data))

397

398 # find kalmag file

399 try:

400 filename = os.path.join(os.path.join(

401 pygeodyn._package_directory, "data/observations/KALMAG", "KALMAG.hdf5")

402 )

403 except IOError:

404 logging.error("{} not found! Check the path".format(filename))

405 return

406

407 with h5py.File(filename, 'r') as f:

408 dates_kalmag = f['times'][:]

409 real_data = f[measure_type][:]

410

411 # Format the data as a dict of Observations with dates as keys

412 observations = {}

413 for i_d, date in enumerate(dates):

414 # Shift the date by one month (different convention)

415 match_idx = find_obs_analysis_match(date, cfg.t_analyses_full, cfg.dt_f)

416 if match_idx is None:

417 continue

418 # Get asked nb_realisations and nb_coefs

419 obs_data = np.zeros((nb_realisations, nb_coefs))

420 obs_data[:] = chaos_data[i_d, :nb_coefs]

421

422 #Now we build full obs_data = obs_data + N(0,sigma_mods_error) + deviation_from_mean_reals_kalmag

423 idx = np.where(dates_kalmag == date)[0][0]

424 # Add covobs deviation from mean to obs_data

425 obs_data += real_data[:nb_realisations, idx, :nb_coefs] - np.mean(real_data[:, idx, :nb_coefs],axis=0)

426

427 # Get the number of observed coefficients (here equal to nb_coefs)

428 current_No = obs_data.shape[-1]

429 # H is identity (spectral data)

430 H = np.eye(current_No, nb_coefs)

431 # R is a diagonal matrix with variances as diagonal

432 R = np.diagflat(variance_data[i_d, :current_No])

433 observations[match_idx] = Observation(obs_data, H, R)

434

435 return observations

436

437

438def build_kalmag_hdf5_observations(cfg, nb_realisations, measure_type, seed=None):

439 """

440 Builds the observations from a hdf5 COVOBS file.

441 The dates where no analysis takes place are discarded.

442

443 :param cfg: configuration of the computation

444 :type cfg: inout.config.ComputationConfig

445 :param nb_realisations: Number of realisations of the computation

446 :type nb_realisations: int

447 :param measure_type: Type of the measure to extract ('MF' or 'SV')

448 :type measure_type: str

449 :return: dictionary of Observation objects with dates (np.datetime64) as keys.

450 :rtype: dict[np.datetime64, Observation]

451 """

452 datadir = cfg.obs_dir

453 if measure_type == 'SV':

454 max_degree = cfg.Lsv

455 nb_coefs = cfg.Nsv

456 else:

457 max_degree = cfg.Lb

458 nb_coefs = cfg.Nb

459

460 # Find the hdf5 files (sort to have reproducible order of realisations_files)

461 hdf5_files = sorted(glob.glob(os.path.join(datadir, '*.hdf5')))

462

463 if len(hdf5_files) == 0:

464 raise IOError("No hdf5 files were found in {} ! Check that it is the valid directory path.".format(datadir))

465

466 logging.debug('Observations are read from {}'.format(hdf5_files[0]))

467

468 with h5py.File(hdf5_files[0], 'r') as f:

469 dates = f['times'][:]

470 real_data = f[measure_type][:]

471

472 nb_reals_data, nb_dates_data, nb_coefs_data = real_data.shape

473 # N = L(L+2) => L = sqrt(N+1) - 1

474 max_degree_data = np.sqrt(nb_coefs_data + 1) - 1

475

476 variance_data = np.var(real_data, axis=0)

477

478 if nb_reals_data < nb_realisations:

479 raise ValueError("There are not enough realisations in the KALMAG file to handle the {} realisations asked !"

480 " Please retry with a lower number (max: {}).".format(nb_realisations, nb_reals_data))

481

482 if max_degree_data < max_degree:

483 raise ValueError("There are not enough coefficients in the KALMAG file to handle the max degree {} asked for {} !"

484 " Please retry with a lower degree (max: {}).".format(max_degree, measure_type, max_degree_data))

485

486 # Format the data as a dict of Observations with dates as keys

487 observations = {}

488 for i_d, date in enumerate(dates):

489 # Shift the date by one month (different convention)

490 match_idx = find_obs_analysis_match(date, cfg.t_analyses_full, cfg.dt_f)

491 if match_idx is None:

492 continue

493 # Get asked nb_realisations and nb_coefs

494 obs_data = real_data[:nb_realisations, i_d, :nb_coefs]

495 # Get the number of observed coefficients (here equal to nb_coefs)

496 current_No = obs_data.shape[-1]

497 # H is identity (spectral data)

498 H = np.eye(current_No, nb_coefs)

499 # R is a diagonal matrix with variances as diagonal

500 R = np.diagflat(variance_data[i_d, :current_No])

501 observations[match_idx] = Observation(obs_data, H, R)

502

503 return observations

504

505

506def build_sola_hdf5_observations(cfg, nb_realisations, measure_type, seed=None):

507 """

508 Builds the observations (including direct observation operators H) from direct sources

509 (Ground observatories (GO), virtual observatories of CHAMP (VO_CHAMP) and SWARM (VO_SWARM).

510 The dates where no analysis takes place are discarded.

511

512 :param cfg: configuration of the computation

513 :type cfg: inout.config.ComputationConfig

514 :param nb_realisations: Number of realisations of the computation

515 :type nb_realisations: int

516 :param measure_type: Type of the measure to extract ('MF' or 'SV')

517 :type measure_type: strf

518 :return: dictionary of Observation objects with dates (np.datetime64) as keys.

519 :rtype: dict[np.datetime64, Observation]

520 """

521

522 observations = {}

523

524 random_state = np.random.RandomState(seed).normal

525 if measure_type == 'MF':

526 # find kalmag file

527 try:

528 cfg.obs_dir = os.path.join(os.path.join(

529 pygeodyn._package_directory, "data/observations/KALMAG")

530 )

531 except IOError:

532 logging.error("{} not found! Check the path".format(cfg.obs_dir))

533 return

534 return build_kalmag_hdf5_observations(cfg, nb_realisations, measure_type, seed=seed)

535

536 if measure_type == 'SV':

537 SNAPSHOT = list(h5py.File(os.path.join(cfg.obs_dir, "SOLA.hdf5"), 'r')['SOLA'])

538

539 for i in SNAPSHOT:

540 with h5py.File(os.path.join(cfg.obs_dir, "SOLA.hdf5"), 'r') as f:

541

542 time = np.array(f['SOLA'][i]['time'])[0][0]

543 match_idx = find_obs_analysis_match(time, cfg.t_analyses_full, cfg.dt_f)

544 if match_idx is None:

545 continue

546

547 leb_theta = np.squeeze(np.array(f['SOLA'][i]['theta_leb']))* np.pi / 180

548 leb_phi = np.squeeze(np.array(f['SOLA'][i]['phi_leb']))* np.pi / 180

549 w_cmb = np.squeeze(np.array(f['SOLA'][i]['weight_leb']))

550 leb_r = np.squeeze(np.array([r_core]*leb_phi.shape[0]))

551

552 Bs = np.squeeze(np.array(f['SOLA'][i]['sv'])[::3])

553 sigma2 = np.squeeze(np.array(f['SOLA'][i]['sigma'])[::3]**2)

554

555 mask = np.squeeze(np.logical_not(np.isnan(sigma2) | np.isnan(Bs)))

556 Bs = Bs[mask]

557 sigma2 = sigma2[mask]

558 Avg_kern = np.array(f['SOLA'][i]['avg_kernel'])[::3]

559 Avg_kern = np.multiply(Avg_kern[mask],w_cmb)

560

561 B_reals = np.zeros((nb_realisations,Bs.shape[0]))

562 for i in range(Bs.shape[0]):

563 B_reals[:,i] = random_state(Bs[i], np.sqrt(sigma2[i]), size=nb_realisations)

564

565 R = np.diagflat(sigma2)

566

567 H_core = common.compute_direct_obs_operator_SOLA(leb_r, leb_theta, leb_phi, cfg.Lsv)

568 H = Avg_kern @ H_core

569

570 observations[match_idx] = Observation(B_reals, H, R)

571

572 return observations

573