Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
3# spechomo, Spectral homogenization of multispectral satellite data
4#
5# Copyright (C) 2019-2021
6# - Daniel Scheffler (GFZ Potsdam, daniel.scheffler@gfz-potsdam.de)
7# - Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences Potsdam,
8# Germany (https://www.gfz-potsdam.de/)
9#
10# This software was developed within the context of the GeoMultiSens project funded
11# by the German Federal Ministry of Education and Research
12# (project grant code: 01 IS 14 010 A-C).
13#
14# Licensed under the Apache License, Version 2.0 (the "License");
15# you may not use this file except in compliance with the License.
16# You may obtain a copy of the License at
17#
18# http://www.apache.org/licenses/LICENSE-2.0
19#
20# Please note the following exception: `spechomo` depends on tqdm, which is
21# distributed under the Mozilla Public Licence (MPL) v2.0 except for the files
22# "tqdm/_tqdm.py", "setup.py", "README.rst", "MANIFEST.in" and ".gitignore".
23# Details can be found here: https://github.com/tqdm/tqdm/blob/master/LICENCE.
24#
25# Unless required by applicable law or agreed to in writing, software
26# distributed under the License is distributed on an "AS IS" BASIS,
27# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
28# See the License for the specific language governing permissions and
29# limitations under the License.
31from glob import glob
32import os
33from zipfile import ZipFile
34import dill
35from multiprocessing import Pool
36from typing import Union, List # noqa F401 # flake8 issue
37import json
38import warnings
39from urllib.request import urlretrieve, urlopen
41import numpy as np # noqa F401 # flake8 issue
42from geoarray import GeoArray # noqa F401 # flake8 issue
43import pandas as pd
44from tempfile import TemporaryDirectory
45from natsort import natsorted
46from tqdm import tqdm
48from .options import options
51def im2spectra(geoArr):
52 # type: (Union[GeoArray, np.ndarray]) -> np.ndarray
53 """Convert 3D images to array of spectra samples (rows: samples; cols: spectral information)."""
54 return geoArr.reshape((geoArr.shape[0] * geoArr.shape[1], geoArr.shape[2]))
57def spectra2im(spectra, tgt_rows, tgt_cols):
58 # type: (Union[GeoArray, np.ndarray], int, int) -> np.ndarray
59 """Convert array of spectra samples (rows: samples; cols: spectral information) to a 3D image.
61 :param spectra: 2D array with rows: spectral samples / columns: spectral information (bands)
62 :param tgt_rows: number of target image rows
63 :param tgt_cols: number of target image rows
64 :return: 3D array (rows x columns x spectral bands)
65 """
66 return spectra.reshape((tgt_rows, tgt_cols, spectra.shape[1]))
69_columns_df_trafos = ['method', 'src_sat', 'src_sen', 'src_LBA', 'tgt_sat', 'tgt_sen', 'tgt_LBA', 'n_clusters']
72def list_available_transformations(classifier_rootDir=options['classifiers']['rootdir'],
73 method=None,
74 src_sat=None, src_sen=None, src_LBA=None,
75 tgt_sat=None, tgt_sen=None, tgt_LBA=None,
76 n_clusters=None):
77 # type: (str, str, str, str, List[str], str, str, List[str], int) -> pd.DataFrame
78 """List all sensor transformations available according to the given classifier root directory.
80 NOTE: This function can be used to copy/paste possible input parameters for
81 spechomo.SpectralHomogenizer.predict_by_machine_learner().
83 :param classifier_rootDir: directory containing classifiers for homogenization, either as .zip archives or
84 as .dill files
85 :param method: filter results by the machine learning approach to be used for spectral bands prediction
86 :param src_sat: filter results by source satellite, e.g., 'Landsat-8'
87 :param src_sen: filter results by source sensor, e.g., 'OLI_TIRS'
88 :param src_LBA: filter results by source bands list
89 :param tgt_sat: filter results by target satellite, e.g., 'Landsat-8'
90 :param tgt_sen: filter results by target sensor, e.g., 'OLI_TIRS'
91 :param tgt_LBA: filter results by target bands list
92 :param n_clusters: filter results by the number of spectral clusters to be used during LR/ RR/ QR
93 homogenization
94 :return: pandas.DataFrame listing all the available transformations
95 """
96 clf_zipfiles = glob(os.path.join(classifier_rootDir, '*_classifiers.zip'))
98 df = pd.DataFrame(columns=_columns_df_trafos)
100 if clf_zipfiles:
101 for path_zipfile in clf_zipfiles:
103 with TemporaryDirectory() as td, ZipFile(path_zipfile) as zF:
104 zF.extractall(td)
106 with Pool() as pool:
107 paths_dillfiles = [(os.path.join(td, fN)) for fN in natsorted(zF.namelist())]
108 dfs = pool.map(explore_classifer_dillfile, paths_dillfiles)
110 pool.close() # needed to make coverage work in multiprocessing
111 pool.join()
113 df = pd.concat([df] + dfs)
115 else:
116 paths_dillfiles = natsorted(glob(os.path.join(classifier_rootDir, '*.dill')))
118 if paths_dillfiles:
119 with Pool() as pool:
120 dfs = pool.map(explore_classifer_dillfile, paths_dillfiles)
121 pool.close() # needed to make coverage work in multiprocessing
122 pool.join()
124 df = pd.concat([df] + dfs)
126 # apply filters
127 if not df.empty:
128 df = df.reset_index(drop=True)
130 def _force_list(arg):
131 return [arg] if not isinstance(arg, list) else arg
133 for filterparam in _columns_df_trafos:
134 if locals()[filterparam]:
135 df = df[df[filterparam].isin(_force_list(locals()[filterparam]))]
137 return df
140def explore_classifer_dillfile(path_dillFile):
141 # type: (str) -> pd.DataFrame
142 """List all homogenization transformations included in the given .dill file.
144 :param path_dillFile:
145 :return:
146 """
147 df = pd.DataFrame(columns=_columns_df_trafos)
149 meth_nclust_str, src_sat, src_sen = os.path.splitext(os.path.basename(path_dillFile))[0].split('__')
150 method, nclust_str = meth_nclust_str.split('_')
151 nclust = int(nclust_str.split('clust')[1])
153 with open(path_dillFile, 'rb') as inF:
154 content = dill.load(inF)
156 for src_LBA, subdict_L1 in content.items():
157 for tgt_sat_sen, subdict_L2 in subdict_L1.items():
158 tgt_sat, tgt_sen = tgt_sat_sen
160 for tgt_LBA, subdict_L3 in subdict_L2.items():
161 df.loc[len(df)] = [method,
162 src_sat, src_sen, src_LBA.split('__'),
163 tgt_sat, tgt_sen, tgt_LBA.split('__'),
164 nclust]
166 return df
169def export_classifiers_as_JSON(export_rootDir,
170 classifier_rootDir=options['classifiers']['rootdir'],
171 method=None,
172 src_sat=None, src_sen=None, src_LBA=None,
173 tgt_sat=None, tgt_sen=None, tgt_LBA=None,
174 n_clusters=None):
175 # type: (str, str, str, str, str, List[str], str, str, List[str], int) -> None
176 """Export spectral harmonization classifiers as JSON files that match the provided filtering criteria.
178 NOTE: So far, this function will only work for LR classifiers.
180 :param export_rootDir: directory where to save the exported JSON files
181 :param classifier_rootDir: directory containing classifiers for homogenization, either as .zip archives or
182 as .dill files
183 :param method: filter by the machine learning approach to be used for spectral bands prediction
184 :param src_sat: filter by source satellite, e.g., 'Landsat-8'
185 :param src_sen: filter by source sensor, e.g., 'OLI_TIRS'
186 :param src_LBA: filter by source bands list
187 :param tgt_sat: filter by target satellite, e.g., 'Landsat-8'
188 :param tgt_sen: filter by target sensor, e.g., 'OLI_TIRS'
189 :param tgt_LBA: filter by target bands list
190 :param n_clusters: filter by the number of spectral clusters to be used during LR/ RR/ QR
191 homogenization
192 :return:
193 """
194 from .classifier import Cluster_Learner
196 # get matching classifiers
197 df_trafos = list_available_transformations(classifier_rootDir=classifier_rootDir,
198 method=method,
199 src_sat=src_sat, src_sen=src_sen, src_LBA=src_LBA,
200 tgt_sat=tgt_sat, tgt_sen=tgt_sen, tgt_LBA=tgt_LBA,
201 n_clusters=n_clusters)
203 if len(df_trafos):
204 # export each matching transformation to a JSON file
205 for i, trafo in tqdm(df_trafos.iterrows(), total=len(df_trafos)):
207 CL = Cluster_Learner.from_disk(
208 classifier_rootDir=classifier_rootDir,
209 method=trafo.method,
210 n_clusters=trafo.n_clusters,
211 src_satellite=trafo.src_sat, src_sensor=trafo.src_sen, src_LBA=trafo.src_LBA,
212 tgt_satellite=trafo.tgt_sat, tgt_sensor=trafo.tgt_sen, tgt_LBA=trafo.tgt_LBA)
214 path_out = os.path.join(export_rootDir,
215 trafo.method,
216 "src__%s__%s__%s" % (trafo.src_sat, trafo.src_sen, '_'.join(trafo.src_LBA)),
217 "to__%s__%s__%s" % (trafo.tgt_sat, trafo.tgt_sen, '_'.join(trafo.tgt_LBA)),
218 "%s__clust%d.json" % (trafo.method, trafo.n_clusters)
219 )
220 os.makedirs(os.path.dirname(path_out), exist_ok=True)
222 with open(path_out, 'w') as outF:
223 json.dump(CL.to_jsonable_dict(), outF, indent=4, sort_keys=True)
225 else:
226 warnings.warn('No classifiers found matching the provided filter criteria. Nothing exported.', RuntimeWarning)
229def download_pretrained_classifiers(method, tgt_dir=options['classifiers']['rootdir']):
230 remote_filespecs = {
231 '100k_conservrsp_SCA_SD100percSA90perc_without_aviris__SCADist90pSAM40p': {
232 # 'LR': 'https://nextcloud.gfz-potsdam.de/s/Rzb75kckBreFfNE/download', # 20201008
233 'LR': 'https://nextcloud.gfz-potsdam.de/s/mZEnS5g7AGWyRHB/download',
234 # 'QR': 'https://nextcloud.gfz-potsdam.de/s/Kk4zoCXxAEkAFZL/download', # 20201008
235 'QR': 'https://nextcloud.gfz-potsdam.de/s/JcQDbZBtTiw9NYi/download',
236 }
237 }
238 clf_name = options['classifiers']['name']
239 try:
240 url = remote_filespecs[clf_name][method]
241 except KeyError:
242 raise RuntimeError("Currently there are no %s classifiers named '%s' available." % (method, clf_name))
244 for i in range(3): # try 3 times
245 if i > 0:
246 print('Download failed. Restarting...')
248 # get filename
249 fn = urlopen(url).headers['content-disposition'].split('filename=')[-1].replace('"', '')
251 # download
252 if not os.path.isdir(tgt_dir):
253 os.makedirs(tgt_dir)
254 outP, msg = urlretrieve(url, os.path.join(tgt_dir, fn))
256 if os.path.getsize(outP) == int(msg.get('content-length')):
257 return outP