Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2 

3# spechomo, Spectral homogenization of multispectral satellite data 

4# 

5# Copyright (C) 2019-2021 

6# - Daniel Scheffler (GFZ Potsdam, daniel.scheffler@gfz-potsdam.de) 

7# - Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences Potsdam, 

8# Germany (https://www.gfz-potsdam.de/) 

9# 

10# This software was developed within the context of the GeoMultiSens project funded 

11# by the German Federal Ministry of Education and Research 

12# (project grant code: 01 IS 14 010 A-C). 

13# 

14# Licensed under the Apache License, Version 2.0 (the "License"); 

15# you may not use this file except in compliance with the License. 

16# You may obtain a copy of the License at 

17# 

18# http://www.apache.org/licenses/LICENSE-2.0 

19# 

20# Please note the following exception: `spechomo` depends on tqdm, which is 

21# distributed under the Mozilla Public Licence (MPL) v2.0 except for the files 

22# "tqdm/_tqdm.py", "setup.py", "README.rst", "MANIFEST.in" and ".gitignore". 

23# Details can be found here: https://github.com/tqdm/tqdm/blob/master/LICENCE. 

24# 

25# Unless required by applicable law or agreed to in writing, software 

26# distributed under the License is distributed on an "AS IS" BASIS, 

27# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

28# See the License for the specific language governing permissions and 

29# limitations under the License. 

30 

31from glob import glob 

32import os 

33from zipfile import ZipFile 

34import dill 

35from multiprocessing import Pool 

36from typing import Union, List # noqa F401 # flake8 issue 

37import json 

38import warnings 

39from urllib.request import urlretrieve, urlopen 

40 

41import numpy as np # noqa F401 # flake8 issue 

42from geoarray import GeoArray # noqa F401 # flake8 issue 

43import pandas as pd 

44from tempfile import TemporaryDirectory 

45from natsort import natsorted 

46from tqdm import tqdm 

47 

48from .options import options 

49 

50 

51def im2spectra(geoArr): 

52 # type: (Union[GeoArray, np.ndarray]) -> np.ndarray 

53 """Convert 3D images to array of spectra samples (rows: samples; cols: spectral information).""" 

54 return geoArr.reshape((geoArr.shape[0] * geoArr.shape[1], geoArr.shape[2])) 

55 

56 

57def spectra2im(spectra, tgt_rows, tgt_cols): 

58 # type: (Union[GeoArray, np.ndarray], int, int) -> np.ndarray 

59 """Convert array of spectra samples (rows: samples; cols: spectral information) to a 3D image. 

60 

61 :param spectra: 2D array with rows: spectral samples / columns: spectral information (bands) 

62 :param tgt_rows: number of target image rows 

63 :param tgt_cols: number of target image rows 

64 :return: 3D array (rows x columns x spectral bands) 

65 """ 

66 return spectra.reshape((tgt_rows, tgt_cols, spectra.shape[1])) 

67 

68 

69_columns_df_trafos = ['method', 'src_sat', 'src_sen', 'src_LBA', 'tgt_sat', 'tgt_sen', 'tgt_LBA', 'n_clusters'] 

70 

71 

72def list_available_transformations(classifier_rootDir=options['classifiers']['rootdir'], 

73 method=None, 

74 src_sat=None, src_sen=None, src_LBA=None, 

75 tgt_sat=None, tgt_sen=None, tgt_LBA=None, 

76 n_clusters=None): 

77 # type: (str, str, str, str, List[str], str, str, List[str], int) -> pd.DataFrame 

78 """List all sensor transformations available according to the given classifier root directory. 

79 

80 NOTE: This function can be used to copy/paste possible input parameters for 

81 spechomo.SpectralHomogenizer.predict_by_machine_learner(). 

82 

83 :param classifier_rootDir: directory containing classifiers for homogenization, either as .zip archives or 

84 as .dill files 

85 :param method: filter results by the machine learning approach to be used for spectral bands prediction 

86 :param src_sat: filter results by source satellite, e.g., 'Landsat-8' 

87 :param src_sen: filter results by source sensor, e.g., 'OLI_TIRS' 

88 :param src_LBA: filter results by source bands list 

89 :param tgt_sat: filter results by target satellite, e.g., 'Landsat-8' 

90 :param tgt_sen: filter results by target sensor, e.g., 'OLI_TIRS' 

91 :param tgt_LBA: filter results by target bands list 

92 :param n_clusters: filter results by the number of spectral clusters to be used during LR/ RR/ QR 

93 homogenization 

94 :return: pandas.DataFrame listing all the available transformations 

95 """ 

96 clf_zipfiles = glob(os.path.join(classifier_rootDir, '*_classifiers.zip')) 

97 

98 df = pd.DataFrame(columns=_columns_df_trafos) 

99 

100 if clf_zipfiles: 

101 for path_zipfile in clf_zipfiles: 

102 

103 with TemporaryDirectory() as td, ZipFile(path_zipfile) as zF: 

104 zF.extractall(td) 

105 

106 with Pool() as pool: 

107 paths_dillfiles = [(os.path.join(td, fN)) for fN in natsorted(zF.namelist())] 

108 dfs = pool.map(explore_classifer_dillfile, paths_dillfiles) 

109 

110 pool.close() # needed to make coverage work in multiprocessing 

111 pool.join() 

112 

113 df = pd.concat([df] + dfs) 

114 

115 else: 

116 paths_dillfiles = natsorted(glob(os.path.join(classifier_rootDir, '*.dill'))) 

117 

118 if paths_dillfiles: 

119 with Pool() as pool: 

120 dfs = pool.map(explore_classifer_dillfile, paths_dillfiles) 

121 pool.close() # needed to make coverage work in multiprocessing 

122 pool.join() 

123 

124 df = pd.concat([df] + dfs) 

125 

126 # apply filters 

127 if not df.empty: 

128 df = df.reset_index(drop=True) 

129 

130 def _force_list(arg): 

131 return [arg] if not isinstance(arg, list) else arg 

132 

133 for filterparam in _columns_df_trafos: 

134 if locals()[filterparam]: 

135 df = df[df[filterparam].isin(_force_list(locals()[filterparam]))] 

136 

137 return df 

138 

139 

140def explore_classifer_dillfile(path_dillFile): 

141 # type: (str) -> pd.DataFrame 

142 """List all homogenization transformations included in the given .dill file. 

143 

144 :param path_dillFile: 

145 :return: 

146 """ 

147 df = pd.DataFrame(columns=_columns_df_trafos) 

148 

149 meth_nclust_str, src_sat, src_sen = os.path.splitext(os.path.basename(path_dillFile))[0].split('__') 

150 method, nclust_str = meth_nclust_str.split('_') 

151 nclust = int(nclust_str.split('clust')[1]) 

152 

153 with open(path_dillFile, 'rb') as inF: 

154 content = dill.load(inF) 

155 

156 for src_LBA, subdict_L1 in content.items(): 

157 for tgt_sat_sen, subdict_L2 in subdict_L1.items(): 

158 tgt_sat, tgt_sen = tgt_sat_sen 

159 

160 for tgt_LBA, subdict_L3 in subdict_L2.items(): 

161 df.loc[len(df)] = [method, 

162 src_sat, src_sen, src_LBA.split('__'), 

163 tgt_sat, tgt_sen, tgt_LBA.split('__'), 

164 nclust] 

165 

166 return df 

167 

168 

169def export_classifiers_as_JSON(export_rootDir, 

170 classifier_rootDir=options['classifiers']['rootdir'], 

171 method=None, 

172 src_sat=None, src_sen=None, src_LBA=None, 

173 tgt_sat=None, tgt_sen=None, tgt_LBA=None, 

174 n_clusters=None): 

175 # type: (str, str, str, str, str, List[str], str, str, List[str], int) -> None 

176 """Export spectral harmonization classifiers as JSON files that match the provided filtering criteria. 

177 

178 NOTE: So far, this function will only work for LR classifiers. 

179 

180 :param export_rootDir: directory where to save the exported JSON files 

181 :param classifier_rootDir: directory containing classifiers for homogenization, either as .zip archives or 

182 as .dill files 

183 :param method: filter by the machine learning approach to be used for spectral bands prediction 

184 :param src_sat: filter by source satellite, e.g., 'Landsat-8' 

185 :param src_sen: filter by source sensor, e.g., 'OLI_TIRS' 

186 :param src_LBA: filter by source bands list 

187 :param tgt_sat: filter by target satellite, e.g., 'Landsat-8' 

188 :param tgt_sen: filter by target sensor, e.g., 'OLI_TIRS' 

189 :param tgt_LBA: filter by target bands list 

190 :param n_clusters: filter by the number of spectral clusters to be used during LR/ RR/ QR 

191 homogenization 

192 :return: 

193 """ 

194 from .classifier import Cluster_Learner 

195 

196 # get matching classifiers 

197 df_trafos = list_available_transformations(classifier_rootDir=classifier_rootDir, 

198 method=method, 

199 src_sat=src_sat, src_sen=src_sen, src_LBA=src_LBA, 

200 tgt_sat=tgt_sat, tgt_sen=tgt_sen, tgt_LBA=tgt_LBA, 

201 n_clusters=n_clusters) 

202 

203 if len(df_trafos): 

204 # export each matching transformation to a JSON file 

205 for i, trafo in tqdm(df_trafos.iterrows(), total=len(df_trafos)): 

206 

207 CL = Cluster_Learner.from_disk( 

208 classifier_rootDir=classifier_rootDir, 

209 method=trafo.method, 

210 n_clusters=trafo.n_clusters, 

211 src_satellite=trafo.src_sat, src_sensor=trafo.src_sen, src_LBA=trafo.src_LBA, 

212 tgt_satellite=trafo.tgt_sat, tgt_sensor=trafo.tgt_sen, tgt_LBA=trafo.tgt_LBA) 

213 

214 path_out = os.path.join(export_rootDir, 

215 trafo.method, 

216 "src__%s__%s__%s" % (trafo.src_sat, trafo.src_sen, '_'.join(trafo.src_LBA)), 

217 "to__%s__%s__%s" % (trafo.tgt_sat, trafo.tgt_sen, '_'.join(trafo.tgt_LBA)), 

218 "%s__clust%d.json" % (trafo.method, trafo.n_clusters) 

219 ) 

220 os.makedirs(os.path.dirname(path_out), exist_ok=True) 

221 

222 with open(path_out, 'w') as outF: 

223 json.dump(CL.to_jsonable_dict(), outF, indent=4, sort_keys=True) 

224 

225 else: 

226 warnings.warn('No classifiers found matching the provided filter criteria. Nothing exported.', RuntimeWarning) 

227 

228 

229def download_pretrained_classifiers(method, tgt_dir=options['classifiers']['rootdir']): 

230 remote_filespecs = { 

231 '100k_conservrsp_SCA_SD100percSA90perc_without_aviris__SCADist90pSAM40p': { 

232 # 'LR': 'https://nextcloud.gfz-potsdam.de/s/Rzb75kckBreFfNE/download', # 20201008 

233 'LR': 'https://nextcloud.gfz-potsdam.de/s/mZEnS5g7AGWyRHB/download', 

234 # 'QR': 'https://nextcloud.gfz-potsdam.de/s/Kk4zoCXxAEkAFZL/download', # 20201008 

235 'QR': 'https://nextcloud.gfz-potsdam.de/s/JcQDbZBtTiw9NYi/download', 

236 } 

237 } 

238 clf_name = options['classifiers']['name'] 

239 try: 

240 url = remote_filespecs[clf_name][method] 

241 except KeyError: 

242 raise RuntimeError("Currently there are no %s classifiers named '%s' available." % (method, clf_name)) 

243 

244 for i in range(3): # try 3 times 

245 if i > 0: 

246 print('Download failed. Restarting...') 

247 

248 # get filename 

249 fn = urlopen(url).headers['content-disposition'].split('filename=')[-1].replace('"', '') 

250 

251 # download 

252 if not os.path.isdir(tgt_dir): 

253 os.makedirs(tgt_dir) 

254 outP, msg = urlretrieve(url, os.path.join(tgt_dir, fn)) 

255 

256 if os.path.getsize(outP) == int(msg.get('content-length')): 

257 return outP