Source code for gms_preprocessing.options.options_schema

# -*- coding: utf-8 -*-

# gms_preprocessing, spatial and spectral homogenization of satellite remote sensing data
#
# Copyright (C) 2020  Daniel Scheffler (GFZ Potsdam, daniel.scheffler@gfz-potsdam.de)
#
# This software was developed within the context of the GeoMultiSens project funded
# by the German Federal Ministry of Education and Research
# (project grant code: 01 IS 14 010 A-C).
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later version.
# Please note the following exception: `gms_preprocessing` depends on tqdm, which
# is distributed under the Mozilla Public Licence (MPL) v2.0 except for the files
# "tqdm/_tqdm.py", "setup.py", "README.rst", "MANIFEST.in" and ".gitignore".
# Details can be found here: https://github.com/tqdm/tqdm/blob/master/LICENCE.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with this program.  If not, see <http://www.gnu.org/licenses/>.

"""Definition of gms options schema (as used by cerberus library)."""


gms_schema_input = dict(
    global_opts=dict(
        type='dict', required=False,
        schema=dict(
            inmem_serialization=dict(type='boolean', required=False),
            parallelization_level=dict(type='string', required=False, allowed=['scenes', 'tiles']),
            spatial_index_server_host=dict(type='string', required=False),
            spatial_index_server_port=dict(type='integer', required=False),
            CPUs=dict(type='integer', required=False, nullable=True),
            CPUs_all_jobs=dict(type='integer', required=False, nullable=True),
            max_mem_usage=dict(type='integer', required=False, min=0, max=100),
            critical_mem_usage=dict(type='integer', required=False, min=0, max=100),
            max_parallel_reads_writes=dict(type='integer', required=False, min=0),
            allow_subMultiprocessing=dict(type='boolean', required=False),
            delete_old_output=dict(type='boolean', required=False),
            disable_exception_handler=dict(type='boolean', required=False),
            disable_IO_locks=dict(type='boolean', required=False),
            disable_CPU_locks=dict(type='boolean', required=False),
            disable_DB_locks=dict(type='boolean', required=False),
            disable_memory_locks=dict(type='boolean', required=False),
            min_version_mem_usage_stats=dict(type='string', required=False),
            log_level=dict(type='string', required=False, allowed=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']),
            tiling_block_size_XY=dict(type='list', required=False, schema=dict(type="integer"), minlength=2,
                                      maxlength=2),
            is_test=dict(type='boolean', required=False),
            profiling=dict(type='boolean', required=False),
            benchmark_global=dict(type='boolean', required=False),
        )),
    paths=dict(
        type='dict', required=False,
        schema=dict(
            path_fileserver=dict(type='string', required=False),
            path_archive=dict(type='string', required=False),
            path_procdata_scenes=dict(type='string', required=False),
            path_procdata_MGRS=dict(type='string', required=False),
            path_tempdir=dict(type='string', required=False),
            path_benchmarks=dict(type='string', required=False),
            path_job_logs=dict(type='string', required=False),
            path_spatIdxSrv=dict(type='string', required=False),
            path_SNR_models=dict(type='string', required=False),
            path_dem_proc_srtm_90m=dict(type='string', required=False),
            path_earthSunDist=dict(type='string', required=False),
            path_solar_irr=dict(type='string', required=False),
            path_cloud_classif=dict(type='string', required=False),
            path_custom_sicor_options=dict(type='string', required=False, nullable=True),
            path_ECMWF_db=dict(type='string', required=False),
            path_spechomo_classif=dict(type='string', required=False, nullable=True),
        )),
    processors=dict(
        type='dict', required=False,
        schema=dict(
            general_opts=dict(type='dict', required=False, schema=dict(
                skip_thermal=dict(type='boolean', required=False),
                skip_pan=dict(type='boolean', required=False),
                sort_bands_by_cwl=dict(type='boolean', required=False),
                target_radunit_optical=dict(type='string', required=False, allowed=['Rad', 'TOA_Ref', 'BOA_Ref']),
                target_radunit_thermal=dict(type='string', required=False, allowed=['Rad', 'Temp']),
                scale_factor_TOARef=dict(type='integer', required=False),
                scale_factor_BOARef=dict(type='integer', required=False),
                mgrs_pixel_buffer=dict(type='integer', required=False),
                output_data_compression=dict(type='boolean', required=False),
                write_ENVIclassif_cloudmask=dict(type='boolean', required=False),
                )),
            L1A=dict(type='dict', required=False, schema=dict(
                run_processor=dict(type='boolean', required=False),
                write_output=dict(type='boolean', required=False),
                delete_output=dict(type='boolean', required=False),
                SZA_SAA_calculation_accurracy=dict(type='string', required=False, allowed=['coarse', 'fine']),
                export_VZA_SZA_SAA_RAA_stats=dict(type='boolean', required=False),
                )),
            L1B=dict(type='dict', required=False, schema=dict(
                run_processor=dict(type='boolean', required=False),
                write_output=dict(type='boolean', required=False),
                delete_output=dict(type='boolean', required=False),
                skip_coreg=dict(type='boolean', required=False),
                spatial_ref_min_overlap=dict(type='float', required=False, min=0, max=100),
                spatial_ref_min_cloudcov=dict(type='float', required=False, min=0, max=100),
                spatial_ref_max_cloudcov=dict(type='float', required=False, min=0, max=100),
                spatial_ref_plusminus_days=dict(type='integer', required=False),
                spatial_ref_plusminus_years=dict(type='integer', required=False),
                coreg_band_wavelength_for_matching=dict(type='integer', required=False, min=350, max=2500),
                coreg_max_shift_allowed=dict(type='float', required=False, min=0),
                coreg_window_size=dict(type='list', required=False, minlength=0, maxlength=2,
                                       schema=dict(type='integer', required=False, min=8)),
                )),
            L1C=dict(type='dict', required=False, schema=dict(
                run_processor=dict(type='boolean', required=False),
                write_output=dict(type='boolean', required=False),
                delete_output=dict(type='boolean', required=False),
                cloud_masking_algorithm=dict(type='dict', required=False, schema={
                    'Landsat-4': dict(type='string', required=False, allowed=['FMASK', 'Classical Bayesian', 'SICOR']),
                    'Landsat-5': dict(type='string', required=False, allowed=['FMASK', 'Classical Bayesian', 'SICOR']),
                    'Landsat-7': dict(type='string', required=False, allowed=['FMASK', 'Classical Bayesian', 'SICOR']),
                    'Landsat-8': dict(type='string', required=False, allowed=['FMASK', 'Classical Bayesian', 'SICOR']),
                    'Sentinel-2A': dict(type='string', required=False, allowed=['FMASK', 'Classical Bayesian',
                                                                                'SICOR']),
                    'Sentinel-2B': dict(type='string', required=False, allowed=['FMASK', 'Classical Bayesian',
                                                                                'SICOR']),
                    }),
                export_L1C_obj_dumps=dict(type='boolean', required=False),
                auto_download_ecmwf=dict(type='boolean', required=False),
                ac_fillnonclear_areas=dict(type='boolean', required=False),
                ac_clear_area_labels=dict(type='list', required=False, schema=dict(type='string', allowed=[
                    "Clear", "Snow", "Water", "Shadow", "Cirrus", "Cloud"])),
                ac_scale_factor_errors=dict(type='integer', required=False),
                ac_max_ram_gb=dict(type='integer', required=False),
                ac_estimate_accuracy=dict(type='boolean', required=False),
                ac_bandwise_accuracy=dict(type='boolean', required=False),
                )),
            L2A=dict(type='dict', required=False, schema=dict(
                run_processor=dict(type='boolean', required=False),
                write_output=dict(type='boolean', required=False),
                delete_output=dict(type='boolean', required=False),
                align_coord_grids=dict(type='boolean', required=False),
                match_gsd=dict(type='boolean', required=False),
                spatial_resamp_alg=dict(type='string', required=False,
                                        allowed=['nearest', 'bilinear', 'cubic', 'cubic_spline', 'lanczos', 'average',
                                                 'mode', 'max', 'min', 'med', 'q1', 'q3']),
                clip_to_extent=dict(type='boolean', required=False),
                spathomo_estimate_accuracy=dict(type='boolean', required=False),
                )),
            L2B=dict(type='dict', required=False, schema=dict(
                run_processor=dict(type='boolean', required=False),
                write_output=dict(type='boolean', required=False),
                delete_output=dict(type='boolean', required=False),
                spechomo_method=dict(type='string', required=False, allowed=['LI', 'LR', 'QR', 'RFR']),
                spechomo_n_clusters=dict(type='integer', required=False, allowed=[1, 5, 10, 15, 20, 30, 40, 50]),
                spechomo_classif_alg=dict(type='string', required=False, allowed=['MinDist', 'kNN', 'SAM', 'SID']),
                spechomo_kNN_n_neighbors=dict(type='integer', required=False, min=0),
                spechomo_estimate_accuracy=dict(type='boolean', required=False),
                spechomo_bandwise_accuracy=dict(type='boolean', required=False),
                )),
            L2C=dict(type='dict', required=False, schema=dict(
                run_processor=dict(type='boolean', required=False),
                write_output=dict(type='boolean', required=False),
                delete_output=dict(type='boolean', required=False),
                )),
        )),
    usecase=dict(
        type='dict', required=False, schema=dict(
            virtual_sensor_id=dict(type='integer', required=False),  # TODO add possible values
            datasetid_spatial_ref=dict(type='integer', required=False, nullable=True),
            datasetid_spectral_ref=dict(type='integer', required=False, nullable=True),
            target_CWL=dict(type='list', required=False, schema=dict(type='float')),
            target_FWHM=dict(type='list', required=False, schema=dict(type='float')),
            target_gsd=dict(type='list', required=False, schema=dict(type='float'),  maxlength=2),
            target_epsg_code=dict(type='integer', required=False, nullable=True),
            spatial_ref_gridx=dict(type='list', required=False, schema=dict(type='float'), maxlength=2),
            spatial_ref_gridy=dict(type='list', required=False, schema=dict(type='float'), maxlength=2),
        )),
)


[docs]def get_updated_schema(source_schema, key2update, new_value): def deep_update(schema, key2upd, new_val): """Return true if update, else false""" for key in schema: if key == key2upd: schema[key] = new_val elif isinstance(schema[key], dict): deep_update(schema[key], key2upd, new_val) return schema from copy import deepcopy tgt_schema = deepcopy(source_schema) return deep_update(tgt_schema, key2update, new_value)
gms_schema_config_output = get_updated_schema(gms_schema_input, key2update='required', new_value=True) parameter_mapping = dict( # global opts inmem_serialization=('global_opts', 'inmem_serialization'), parallelization_level=('global_opts', 'parallelization_level'), spatial_index_server_host=('global_opts', 'spatial_index_server_host'), spatial_index_server_port=('global_opts', 'spatial_index_server_port'), CPUs=('global_opts', 'CPUs'), CPUs_all_jobs=('global_opts', 'CPUs_all_jobs'), max_mem_usage=('global_opts', 'max_mem_usage'), critical_mem_usage=('global_opts', 'critical_mem_usage'), max_parallel_reads_writes=('global_opts', 'max_parallel_reads_writes'), allow_subMultiprocessing=('global_opts', 'allow_subMultiprocessing'), delete_old_output=('global_opts', 'delete_old_output'), disable_exception_handler=('global_opts', 'disable_exception_handler'), disable_IO_locks=('global_opts', 'disable_IO_locks'), disable_CPU_locks=('global_opts', 'disable_CPU_locks'), disable_DB_locks=('global_opts', 'disable_DB_locks'), disable_memory_locks=('global_opts', 'disable_memory_locks'), min_version_mem_usage_stats=('global_opts', 'min_version_mem_usage_stats'), log_level=('global_opts', 'log_level'), tiling_block_size_XY=('global_opts', 'tiling_block_size_XY'), is_test=('global_opts', 'is_test'), profiling=('global_opts', 'profiling'), benchmark_global=('global_opts', 'benchmark_global'), # paths path_fileserver=('paths', 'path_fileserver'), path_archive=('paths', 'path_archive'), path_procdata_scenes=('paths', 'path_procdata_scenes'), path_procdata_MGRS=('paths', 'path_procdata_MGRS'), path_tempdir=('paths', 'path_tempdir'), path_benchmarks=('paths', 'path_benchmarks'), path_job_logs=('paths', 'path_job_logs'), path_spatIdxSrv=('paths', 'path_spatIdxSrv'), path_SNR_models=('paths', 'path_SNR_models'), path_dem_proc_srtm_90m=('paths', 'path_dem_proc_srtm_90m'), path_earthSunDist=('paths', 'path_earthSunDist'), path_solar_irr=('paths', 'path_solar_irr'), path_cloud_classif=('paths', 'path_cloud_classif'), path_custom_sicor_options=('paths', 'path_custom_sicor_options'), path_ECMWF_db=('paths', 'path_ECMWF_db'), path_spechomo_classif=('paths', 'path_spechomo_classif'), # processors > general opts skip_thermal=('processors', 'general_opts', 'skip_thermal'), skip_pan=('processors', 'general_opts', 'skip_pan'), sort_bands_by_cwl=('processors', 'general_opts', 'sort_bands_by_cwl'), target_radunit_optical=('processors', 'general_opts', 'target_radunit_optical'), target_radunit_thermal=('processors', 'general_opts', 'target_radunit_thermal'), scale_factor_TOARef=('processors', 'general_opts', 'scale_factor_TOARef'), scale_factor_BOARef=('processors', 'general_opts', 'scale_factor_BOARef'), mgrs_pixel_buffer=('processors', 'general_opts', 'mgrs_pixel_buffer'), output_data_compression=('processors', 'general_opts', 'output_data_compression'), write_ENVIclassif_cloudmask=('processors', 'general_opts', 'write_ENVIclassif_cloudmask'), # processors > L1A exec_L1AP=('processors', 'L1A', ['run_processor', 'write_output', 'delete_output']), SZA_SAA_calculation_accurracy=('processors', 'L1A', 'SZA_SAA_calculation_accurracy'), export_VZA_SZA_SAA_RAA_stats=('processors', 'L1A', 'export_VZA_SZA_SAA_RAA_stats'), # processors > L1B exec_L1BP=('processors', 'L1B', ['run_processor', 'write_output', 'delete_output']), skip_coreg=('processors', 'L1B', 'skip_coreg'), spatial_ref_min_overlap=('processors', 'L1B', 'spatial_ref_min_overlap'), spatial_ref_min_cloudcov=('processors', 'L1B', 'spatial_ref_min_cloudcov'), spatial_ref_max_cloudcov=('processors', 'L1B', 'spatial_ref_max_cloudcov'), spatial_ref_plusminus_days=('processors', 'L1B', 'spatial_ref_plusminus_days'), spatial_ref_plusminus_years=('processors', 'L1B', 'spatial_ref_plusminus_years'), coreg_band_wavelength_for_matching=('processors', 'L1B', 'coreg_band_wavelength_for_matching'), coreg_max_shift_allowed=('processors', 'L1B', 'coreg_max_shift_allowed'), coreg_window_size=('processors', 'L1B', 'coreg_window_size'), # processors > L1C exec_L1CP=('processors', 'L1C', ['run_processor', 'write_output', 'delete_output']), cloud_masking_algorithm=('processors', 'L1C', 'cloud_masking_algorithm'), export_L1C_obj_dumps=('processors', 'L1C', 'export_L1C_obj_dumps'), auto_download_ecmwf=('processors', 'L1C', 'auto_download_ecmwf'), ac_fillnonclear_areas=('processors', 'L1C', 'ac_fillnonclear_areas'), ac_clear_area_labels=('processors', 'L1C', 'ac_clear_area_labels'), ac_scale_factor_errors=('processors', 'L1C', 'ac_scale_factor_errors'), ac_max_ram_gb=('processors', 'L1C', 'ac_max_ram_gb'), ac_estimate_accuracy=('processors', 'L1C', 'ac_estimate_accuracy'), ac_bandwise_accuracy=('processors', 'L1C', 'ac_bandwise_accuracy'), # processors > L2A exec_L2AP=('processors', 'L2A', ['run_processor', 'write_output', 'delete_output']), align_coord_grids=('processors', 'L2A', 'align_coord_grids'), match_gsd=('processors', 'L2A', 'match_gsd'), spatial_resamp_alg=('processors', 'L2A', 'spatial_resamp_alg'), clip_to_extent=('processors', 'L2A', 'clip_to_extent'), spathomo_estimate_accuracy=('processors', 'L2A', 'spathomo_estimate_accuracy'), # processors > L2B exec_L2BP=('processors', 'L2B', ['run_processor', 'write_output', 'delete_output']), spechomo_method=('processors', 'L2B', 'spechomo_method'), spechomo_n_clusters=('processors', 'L2B', 'spechomo_n_clusters'), spechomo_classif_alg=('processors', 'L2B', 'spechomo_classif_alg'), spechomo_kNN_n_neighbors=('processors', 'L2B', 'spechomo_kNN_n_neighbors'), spechomo_estimate_accuracy=('processors', 'L2B', 'spechomo_estimate_accuracy'), spechomo_bandwise_accuracy=('processors', 'L2B', 'spechomo_bandwise_accuracy'), # processors > L2C exec_L2CP=('processors', 'L2C', ['run_processor', 'write_output', 'delete_output']), # usecase virtual_sensor_id=('usecase', 'virtual_sensor_id'), datasetid_spatial_ref=('usecase', 'datasetid_spatial_ref'), virtual_sensor_name=('usecase', 'virtual_sensor_name'), datasetid_spectral_ref=('usecase', 'datasetid_spectral_ref'), target_CWL=('usecase', 'target_CWL'), target_FWHM=('usecase', 'target_FWHM'), target_gsd=('usecase', 'target_gsd'), target_epsg_code=('usecase', 'target_epsg_code'), spatial_ref_gridx=('usecase', 'spatial_ref_gridx'), spatial_ref_gridy=('usecase', 'spatial_ref_gridy'), )
[docs]def get_param_from_json_config(paramname, json_config): keymap = parameter_mapping[paramname] # tuple dict2search = json_config for i, k in enumerate(keymap): if i < len(keymap) - 1: # not the last element of the tuple -> contains a sub-dictionary dict2search = dict2search[k] elif isinstance(k, list): return [dict2search[sk] for sk in k] else: return dict2search[k]