#
#
#
import geopandas
import pandas
import os
import datetime
import functools
import traceback
import concurrent.futures
import logging

from . import cleans1timeseries
from . import cleans2timeseries



#
#    threading in data collection
#
_S2_IPROCESSESINPOOL          = 3      # number of threads querying cleans2timeseries.S2TimeSeries
_S2_MAX_ATTEMPTS              = 5      # maximum number of retries in case of failure
_S2_SLEEP_SECONDS             = 600    # sleep time between attempts (progressively incremented: attempt * _SLEEP_SECONDS)
_S2_SC_MULTI_GEOMETRIES_QUERY = True   # selection between one get_histogram_n_features or multiple get_histogram dataclient calls

_S1_IPROCESSESINPOOL          = 1      # number of threads querying cleans1timeseries.S1TimeSeries
_S1_MULTI_BAND_QUERY          = True  #
_S1_FIELDSPERQUERY            = 1      # number of fields per query
_S1_MIN_FIELDS_FOR_BULK_QUERY = 10000  # threshold selection between get_timeseries_n_features or get_timeseries dataclient calls
_S1_MAX_ATTEMPTS              = 5      # maximum number of retries in case of failure
_S1_SLEEP_SECONDS             = 600    # sleep time between attempts (progressively incremented: attempt * _SLEEP_SECONDS)

#
#
#
_LOC_SZROOTDIR = r"D:\data"
_WIN_SZROOTDIR = r"O:\data"
_MEP_SZROOTDIR = r"/data/CropSAR/data"

#
#
#
SZROOTDIR = _LOC_SZROOTDIR #_WIN_SZROOTDIR #_LOC_SZROOTDIR
SZVERSION = r"VTMP"
_ALLOWOVERWRITE = False

#
#    Q&D file and directory naming conventions
#
SZBASEDIR = os.path.join(SZROOTDIR, "training")

def getoutputdir(szbasedir, szversion, lstcroptypeids = None):
    '''
    create or retrieve the training data output directory according to some base/version/croptype(s) convention.
    :param szbasedir must be existing directory. subdirectory will be created if necessary
    :param szversion must be specified and may not be whitespace
    :param lstcroptypeids list of crop type id's considered in the files in this directory
    e.g.
    szbasedir/szversion/Croptype_TMP   - in case no croptypes are specified
    szbasedir/szversion/Croptype_901   - single croptype (lstcroptypeids = ['901']) specified
    szbasedir/szversion/Croptype_71_91 - two croptypes (lstcroptypeids = ['71', '91']) specified
    szbasedir/szversion/Croptype_ALL   - in case more than 8 entries in the list.
    '''
    if not os.path.isdir(szbasedir):          raise Exception("Base directory does not exist '%s'"%(str(szbasedir), ))
    if not (szversion and szversion.strip()): raise Exception("szversion must be non empty string (is '%s')"%(str(szversion), ))
    if lstcroptypeids is None:  lstcroptypeids = ['TMP']
    if len(lstcroptypeids) > 8: lstcroptypeids = ['ALL'] # assuming lst_all_relevant_crops  = ['201', '202', '901', '904', '311', '321', '71', '91', '60']
    szcroptypeidssubdir = functools.reduce((lambda sz, croptypeId: str(sz) + "_" + str(croptypeId)), lstcroptypeids, 'Croptype')
    szsubdir = os.path.join(szbasedir, szversion.strip())
    if not os.path.exists(szsubdir):
        logging.info ("Creating output directory '%s'" % (szsubdir,) ) 
        os.makedirs(szsubdir)
        os.chmod(szsubdir, 0o777)
    szsubdir = os.path.join(szsubdir, szcroptypeidssubdir)
    if not os.path.exists(szsubdir):
        logging.info ("Creating output directory '%s'" % (szsubdir,) ) 
        os.makedirs(szsubdir)
        os.chmod(szsubdir, 0o777)
    if os.path.exists(szsubdir):
        return szsubdir
    raise Exception("Output subdirectory could not be created. (in Base directory '%s')"%(str(szbasedir), ))

def _getoutputfilebasename(szlayername, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter):
    '''
    create common part of the training data filenames containing the date range, the minimum field area and the field inward buffer distance.
    mind you, these parameters were considered to be important at the start of the CropSAR project. this could/should be changed/improved later.  
    '''
    if not (szlayername and szlayername.strip()): szlayername = "TMP"
    szbasename = szlayername.strip()
    if not szbasename.endswith('_'): szbasename += "_"
    if (szyyyycropyear and szyyyycropyear.strip()): 
        szbasename += szyyyycropyear.strip()
    if not szbasename.endswith('_'): szbasename += "_"

    szname = szbasename + str(szyyyymmddfirst) + "_" + str(szyyyymmddlast) + "_"
    if 0 == iminimumfieldareainsquaremeters % 10000 :
        szname += str(int(iminimumfieldareainsquaremeters/10000)) + "ha" + "_"
    else:
        szname += str(iminimumfieldareainsquaremeters) + "m2" + "_"
    szname += str(inarrowfieldbordersinmeter) + "m"
    return szname

def getcleandataoutputfile(szoutdir, szlayername, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter, overwrite = _ALLOWOVERWRITE):
    szfile = os.path.join(szoutdir, _getoutputfilebasename(szlayername, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter) + ".csv")
    if not overwrite:
        if os.path.exists(szfile): raise Exception("Output file already exists '%s'"%(str(szfile), ))
    return szfile

def getorigndataoutputfile(szoutdir, szlayername, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter, overwrite = _ALLOWOVERWRITE):
    szfile = os.path.join(szoutdir, _getoutputfilebasename(szlayername, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter) + "_RAW.csv")
    if not overwrite:
        if os.path.exists(szfile): raise Exception("Output file already exists '%s'"%(str(szfile), ))
    return szfile

def getflagsdataoutputfile(szoutdir, szlayername, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter, overwrite = _ALLOWOVERWRITE):
    szfile = os.path.join(szoutdir, _getoutputfilebasename(szlayername, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter) + "_FLAGS.csv")
    if not overwrite:
        if os.path.exists(szfile): raise Exception("Output file already exists '%s'"%(str(szfile), ))
    return szfile

def getprnttologoutputfile(szoutdir, szlayername, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter, overwrite = _ALLOWOVERWRITE):
    szfile = os.path.join(szoutdir, _getoutputfilebasename(szlayername, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter) + "_LOG.txt")
    if not overwrite:
        if os.path.exists(szfile): raise Exception("Output file already exists '%s'"%(str(szfile), ))
    return szfile

#
#    Q&D merging of output files 'per croptype' to 'ALL' - according to file and directory naming conventions above
#
def mergeS2trainingdata(szcroplayer, szyyyycropyear, szshapefile, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter, lstcroptypeids, verbose=True):
    """
    merge csv files for different croptypes into one for the fields as found in the shape file
    :param szcroplayer : "S2_FAPAR", "S2_NDVI",... data layer name as specified by the dataclient 
    :param szyyyycropyear : "2016", "2017", "2018",... indication will be used only as prefix =  szcroplayer_szyyyycropyear of the input and output csv files
    :param szshapefile: shape file containing the parcels (assumes attributes 'fieldID','croptype' and 'area') - used to select the fields in the merge
    :param lstcroptypeids: croptypes to be merged
    :param ... 

    TODO: all parameters are used to determine the filenames of input and output files. this should be separated.
    TODO: at the moment this only makes sense to create the "Croptype_ALL" files in the same directory as a set of "Croptype_XXX" files
    TODO: anticipate case when no FLAGS and RAW files are available

    """
    datetime_tick_training_data  = datetime.datetime.now()
    #
    #    start by finding the output file names, avoid last minute crashes 
    #
    szoutdir            = getoutputdir(SZBASEDIR, SZVERSION, lstcroptypeids)
    szcleandatafilename = getcleandataoutputfile(szoutdir, szcroplayer, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter, overwrite = False)
    szorigndatafilename = getorigndataoutputfile(szoutdir, szcroplayer, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter, overwrite = False)
    szflagsdatafilename = getflagsdataoutputfile(szoutdir, szcroplayer, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter, overwrite = False)
    szprnttologfilename = getprnttologoutputfile(szoutdir, szcroplayer, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter, overwrite = False)
    #
    #
    #
    logfilehandler = logging.FileHandler(szprnttologfilename)
    logfilehandler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s', datefmt='%Y-%m-%d %H:%M:%S'))
    logging.getLogger().addHandler(logfilehandler)
    #
    #
    #
    try:
        #
        #
        #
        if verbose: logging.info ("mergeS2trainingdata starts (%s)" % (datetime_tick_training_data.strftime("%Y-%m-%d %H:%M:%S"), ) )
        #
        #    read the shape file into geopandas.geodataframe.GeoDataFrame, restrict to specified crop types and minimum area
        #
        parcelsgeodataframe = CropSARParcels.cropsar_shptopandas(szshapefile, lstcroptypeids=lstcroptypeids, iminimumfieldareainsquaremeters=iminimumfieldareainsquaremeters)
        #
        #    use the 'fieldID' as index
        #
        parcelsgeodataframe.set_index( 'fieldID', inplace=True, verify_integrity=True)
        #
        #    index for all resulting data frames
        #
        timeslotsindex = pandas.date_range(start=szyyyymmddfirst, end=szyyyymmddlast)
        #
        #
        #
        def getdataframe(lst_crop_dataframes, fieldId):
            for crop_dataframe in lst_crop_dataframes:
                if fieldId in crop_dataframe.columns :
                    if verbose: logging.info("Field(%s) - found." % (fieldId, ) )
                    return crop_dataframe.loc[timeslotsindex.intersection(crop_dataframe.index), fieldId]
            return None

        #
        #
        #
        if verbose: logging.info("Merging Cleaned Data")

        lst_crop_dataframes = []
        for croptype in lstcroptypeids:
            szoutdir            = getoutputdir(SZBASEDIR, SZVERSION, [croptype])
            crop_szdatafilename = getcleandataoutputfile(szoutdir, szcroplayer, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter, overwrite = True)
            if not (os.path.exists(crop_szdatafilename)) :
                if verbose: logging.info("Croptype(%4s) - no csv files available" % (croptype,) )
                continue
            crop_dataframe = pandas.read_csv(crop_szdatafilename, index_col=0, parse_dates=True, float_precision='round_trip')
            if verbose: logging.info("Croptype(%4s) - containing %7s fields" % (croptype, len(crop_dataframe.columns)) )
            lst_crop_dataframes.append(crop_dataframe)

        mergeddataframe = pandas.concat( [getdataframe(lst_crop_dataframes, fieldId) for fieldId in parcelsgeodataframe.index], axis=1, verify_integrity=True, sort=False, copy=False)
        mergeddataframe.to_csv(szcleandatafilename)

        #
        #
        #
        if verbose: logging.info("Merging Raw Data")

        lst_crop_dataframes = []
        for croptype in lstcroptypeids:
            szoutdir            = getoutputdir(SZBASEDIR, SZVERSION, [croptype])
            crop_szdatafilename = getorigndataoutputfile(szoutdir, szcroplayer, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter, overwrite = True)
            if not (os.path.exists(crop_szdatafilename)) :
                if verbose: logging.info("Croptype(%4s) - no RAW csv files available" % (croptype,) )
                continue
            crop_dataframe = pandas.read_csv(crop_szdatafilename, index_col=0, parse_dates=True, float_precision='round_trip')
            if verbose: logging.info("Croptype(%4s) - containing %7s fields" % (croptype, len(crop_dataframe.columns)) )
            lst_crop_dataframes.append(crop_dataframe)

        mergeddataframe = pandas.concat( [getdataframe(lst_crop_dataframes, fieldId) for fieldId in parcelsgeodataframe.index], axis=1, verify_integrity=True, sort=False, copy=False)
        mergeddataframe.to_csv(szorigndatafilename)

        #
        #
        #
        if verbose: logging.info("Merging Flags Data")

        lst_crop_dataframes = []
        for croptype in lstcroptypeids:
            szoutdir            = getoutputdir(SZBASEDIR, SZVERSION, [croptype])
            crop_szdatafilename = getflagsdataoutputfile(szoutdir, szcroplayer, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter, overwrite = True)
            if not (os.path.exists(crop_szdatafilename)) :
                if verbose: logging.info("Croptype(%4s) - no FLAGS csv files available" % (croptype,) )
                continue
            crop_dataframe = pandas.read_csv(crop_szdatafilename, index_col=0, parse_dates=True, float_precision='round_trip')
            if verbose: logging.info("Croptype(%4s) - containing %7s fields" % (croptype, len(crop_dataframe.columns)) )
            lst_crop_dataframes.append(crop_dataframe)

        mergeddataframe = pandas.concat( [getdataframe(lst_crop_dataframes, fieldId) for fieldId in parcelsgeodataframe.index], axis=1, verify_integrity=True, sort=False, copy=False)
        mergeddataframe.astype(float).to_csv(szflagsdatafilename, float_format='%.0f') # jiezes!

        #
        #
        #
        datetime_tock_training_data  = datetime.datetime.now()
        if verbose: logging.info ("mergeS2trainingdata done    start %s end %s)" % (datetime_tick_training_data.strftime("%Y-%m-%d %H:%M:%S"), datetime_tock_training_data.strftime("%Y-%m-%d %H:%M:%S")) )

    finally:
        #
        #    remove handler we added at function start
        #
        logging.getLogger().removeHandler(logfilehandler)

#
#
#
def mergeS1trainingdata(szoutputfilesprefix, szyyyycropyear, szshapefile, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter, lstcroptypeids, verbose=True):
    """
    merge csv files for different croptypes into one for the fields as found in the shape file

    :param szoutputfilesprefix : prefix for output filenames (e.g. S1_SIGMA0). No direct relation with dataclient layer names: for S1 we need multiple layers from wacky interfaces. 
    :param szyyyycropyear : "2016", "2017", "2018",... indication will be used only as prefix =  szoutputfilesprefix_szyyyycropyear of all output files generated. (by default files are not overwritten.)
    :param szshapefile: shape file containing the parcels (assumes attributes 'fieldID','croptype' and 'area') - used to select the fields in the merge
    :param lstcroptypeids: croptypes to be merged
    :param ... 

    TODO: all parameters are used to determine the filenames of input and output files. this should be separated.
    TODO: at the moment this only makes sense to create the "Croptype_ALL" files in the same directory as a set of "Croptype_XXX" files
    TODO: anticipate case when no FLAGS and RAW files are available

    """
    datetime_tick_training_data  = datetime.datetime.now()

    #
    #    start by finding the output file names, avoid last minute crashes 
    #
    szoutdir             = getoutputdir(SZBASEDIR, SZVERSION, lstcroptypeids)
    szprnttologfilename  = getprnttologoutputfile(szoutdir, szoutputfilesprefix, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter, overwrite = False)
    #
    #
    #
    logfilehandler = logging.FileHandler(szprnttologfilename)
    logfilehandler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s', datefmt='%Y-%m-%d %H:%M:%S'))
    logging.getLogger().addHandler(logfilehandler)
    #
    #
    #
    lszorignprefixes = ["_ASC_VV", "_ASC_VH", "_ASC_IA", "_DSC_VV", "_DSC_VH", "_DSC_IA"] # s1 training files with '_RAW' suffix
    lszcleanprefixes = ["_ASC_VV", "_ASC_VH",            "_DSC_VV", "_DSC_VH"]            # s1 training files with no suffix
    orignoutputfilenames = dict()
    for szprefix in lszorignprefixes:
        orignoutputfilenames[szprefix] = getorigndataoutputfile(szoutdir, szoutputfilesprefix + szprefix, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter, overwrite = False)
    cleanoutputfilenames = dict()
    for szprefix in lszcleanprefixes:
        cleanoutputfilenames[szprefix] = getcleandataoutputfile(szoutdir, szoutputfilesprefix + szprefix, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter, overwrite = False)
    #
    #
    #
    try:
        #
        #
        #
        if verbose: logging.info ("mergeS1trainingdata starts (%s)" % (datetime_tick_training_data.strftime("%Y-%m-%d %H:%M:%S"), ) )
        #
        #    read the shape file into geopandas.geodataframe.GeoDataFrame, restrict to specified crop types and minimum area
        #
        parcelsgeodataframe = CropSARParcels.cropsar_shptopandas(szshapefile, lstcroptypeids=lstcroptypeids, iminimumfieldareainsquaremeters=iminimumfieldareainsquaremeters)
        #
        #    use the 'fieldID' as index
        #
        parcelsgeodataframe.set_index( 'fieldID', inplace=True, verify_integrity=True)
        #
        #    index for all resulting data frames
        #
        timeslotsindex = pandas.date_range(start=szyyyymmddfirst, end=szyyyymmddlast)
        #
        #
        #
        def getdataframe(lst_crop_dataframes, fieldId):
            for crop_dataframe in lst_crop_dataframes:
                if fieldId in crop_dataframe.columns :
                    if verbose: logging.info("Field(%s) - found." % (fieldId, ) )
                    return crop_dataframe.loc[timeslotsindex.intersection(crop_dataframe.index), fieldId]
            return None

        #
        #    'orign' s1 training files
        #
        for szprefix in lszorignprefixes:
            if verbose: logging.info("Merging original '%s' data"%szprefix)
            lst_crop_dataframes = []
            for croptype in lstcroptypeids:
                crop_szoutdir       = getoutputdir(SZBASEDIR, SZVERSION, [croptype])
                crop_szdatafilename = getorigndataoutputfile(crop_szoutdir, szoutputfilesprefix + szprefix, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter, overwrite = True)
                if not (os.path.exists(crop_szdatafilename)) :
                    if verbose: logging.info("Croptype(%4s) - no original '%s' csv files available" % (croptype, szprefix) )
                    continue
                crop_dataframe = pandas.read_csv(crop_szdatafilename, index_col=0, parse_dates=True, float_precision='round_trip')
                if verbose: logging.info("Croptype(%4s) - '%s' containing %7s fields" % (croptype, szprefix, len(crop_dataframe.columns)) )
                lst_crop_dataframes.append(crop_dataframe)
            if len(lst_crop_dataframes) > 0 :
                mergeddataframe = pandas.concat( [getdataframe(lst_crop_dataframes, fieldId) for fieldId in parcelsgeodataframe.index], axis=1, verify_integrity=True, sort=False, copy=False)
                mergeddataframe.to_csv(orignoutputfilenames[szprefix])
            else:
                if verbose: logging.info("no original '%s' data available"%szprefix)
        #
        #    'clean' s1 training files
        #
        for szprefix in lszcleanprefixes:
            if verbose: logging.info("Merging processed '%s' data"%szprefix)
            lst_crop_dataframes = []
            for croptype in lstcroptypeids:
                crop_szoutdir       = getoutputdir(SZBASEDIR, SZVERSION, [croptype])
                crop_szdatafilename = getcleandataoutputfile(crop_szoutdir, szoutputfilesprefix + szprefix, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter, overwrite = True)
                if not (os.path.exists(crop_szdatafilename)) :
                    if verbose: logging.info("Croptype(%4s) - no processed '%s' csv files available" % (croptype, szprefix) )
                    continue
                crop_dataframe = pandas.read_csv(crop_szdatafilename, index_col=0, parse_dates=True, float_precision='round_trip')
                if verbose: logging.info("Croptype(%4s) - '%s' containing %7s fields" % (croptype, szprefix, len(crop_dataframe.columns)) )
                lst_crop_dataframes.append(crop_dataframe)
            if len(lst_crop_dataframes) > 0 :
                mergeddataframe = pandas.concat( [getdataframe(lst_crop_dataframes, fieldId) for fieldId in parcelsgeodataframe.index], axis=1, verify_integrity=True, sort=False, copy=False)
                mergeddataframe.to_csv(cleanoutputfilenames[szprefix])
            else:
                if verbose: logging.info("no processed '%s' data available"%szprefix)

        #
        #
        #
        datetime_tock_training_data  = datetime.datetime.now()
        if verbose: logging.info ("mergeS1trainingdata done    start %s end %s)" % (datetime_tick_training_data.strftime("%Y-%m-%d %H:%M:%S"), datetime_tock_training_data.strftime("%Y-%m-%d %H:%M:%S")) )

    finally:
        #
        #    remove handler we added at function start
        #
        logging.getLogger().removeHandler(logfilehandler)

#
#
#
class CropSARParcels:
    """
    some utilities to read, filter and convert the shape files describing the parcels used in CropSAR

    assumes (source) shape files containing: 
    - attribute 'fieldID'   : some unique parcel identifier
    - attribute 'croptype'  : crop type id (string) representing the crop in the parcel. eg. '901' for Potato ('niet-vroeg') ...
    - attribute 'area'      : area of the parcels in square meter

    gotcha's: 
    - GeoJSON & fiona have been fighting flame wars related to overwriting of existing files
    - fiona (gdal) needs to be setup properly (environment variables), otherwise crs's disappear silently

    """

    #
    #
    #
    @staticmethod
    def cropsar_shptopandas(szshapefile, lstfieldIDs=None, lstcroptypeids=None, iminimumfieldareainsquaremeters=None, imaximumfieldareainsquaremeters=None, verbose=True):
        """
        Read and filter shape file into geopandas.GeoDataFrame. Mind you, a geojson might work too.

        :param szshapefile: input shape file (containing attributes 'fieldID', 'croptype', 'area') 
        :param lstfieldIDs: optional (filter) list of parcel identifiers e.g. ['0000280600C79D76', '0000280600C79D79']
        :param lstcroptypeids: optional (filter) list of crops e.g. ['201', '202', '901']
        :param iminimumfieldareainsquaremeters: optional (filter) minimum area (in square meters) of the parcels
        :param imaximumfieldareainsquaremeters: optional (filter) maximum area (in square meters) of the parcels
        :param verbose: print stats
        """
        #
        #    read the shape file into geopandas.geodataframe.GeoDataFrame
        #    - actually .dbf file as table (if it is there)
        #    - extended with one additional column 'geometry' containing the shape files polygons
        #
        parcelsgeodataframe = geopandas.read_file(szshapefile)
        #
        #
        #
        if verbose: 
            logging.info("")
            logging.info("CropSARParcels: initial number of fields   : %10s ( file  : %s )" % (len(parcelsgeodataframe.index), os.path.basename(szshapefile)))
        #
        #    filter on field identifiers
        #
        if lstfieldIDs is not None:
            parcelsgeodataframe = parcelsgeodataframe.loc[parcelsgeodataframe.loc[:, 'fieldID'].isin(lstfieldIDs), :]
            if verbose: logging.info("- remaining with specified fieldID(s)      : %10s ( selected fieldID(s) : %s )" % (len(parcelsgeodataframe.index), lstfieldIDs))
        #
        #    filter on crop types
        #
        if lstcroptypeids is not None:
            lstcroptypeids = [str(croptypeId) for croptypeId in lstcroptypeids] # avoid problems where user forgot it had to be strings
            parcelsgeodataframe = parcelsgeodataframe.loc[parcelsgeodataframe.loc[:, 'croptype'].isin(lstcroptypeids), :]
            if verbose: logging.info("- remaining with specified croptype(s)     : %10s ( selected croptype(s) : %s )" % (len(parcelsgeodataframe.index), lstcroptypeids))
        #
        #    filter on minimal area
        #
        if iminimumfieldareainsquaremeters is not None:
            if iminimumfieldareainsquaremeters > 0 :
                parcelsgeodataframe = parcelsgeodataframe.loc[parcelsgeodataframe.loc[:, 'area'] >= iminimumfieldareainsquaremeters, :]
                if verbose: logging.info("- remaining above minimum 'area'           : %10s ( area : %s )" % (len(parcelsgeodataframe.index), iminimumfieldareainsquaremeters))
        #
        #    filter on maximal area
        #
        if imaximumfieldareainsquaremeters is not None:
            if imaximumfieldareainsquaremeters > 0 :
                parcelsgeodataframe = parcelsgeodataframe.loc[parcelsgeodataframe.loc[:, 'area'] <= imaximumfieldareainsquaremeters, :]
                if verbose: logging.info("- remaining below maximum 'area'           : %10s ( area : %s )" % (len(parcelsgeodataframe.index), imaximumfieldareainsquaremeters))

        if verbose: logging.info("- final number of fields                   : %10s" % (len(parcelsgeodataframe.index), ))
        #
        #
        #
        if verbose: logging.info("")
        return parcelsgeodataframe

    #
    #
    #
    @staticmethod
    def cropsar_shptoshp(szsrcshapefile, szdstshapefile, lstfieldIDs=None, lstcroptypeids=None, iminimumfieldareainsquaremeters=None, imaximumfieldareainsquaremeters=None, verbose=True):
        """
        Write filtered results as a shape file.
        """
        parcelsgeodataframe = CropSARParcels.cropsar_shptopandas(szsrcshapefile, lstfieldIDs, lstcroptypeids, iminimumfieldareainsquaremeters, imaximumfieldareainsquaremeters, verbose)
        parcelsgeodataframe.to_file(szdstshapefile, driver="ESRI Shapefile")
        if verbose: logging.info("output shape file                        : %s ( full : %s )" % (os.path.basename(szdstshapefile), szdstshapefile))
        return parcelsgeodataframe

    #
    #
    #
    @staticmethod
    def cropsar_shptojson(szshapefile, szjsonfile, lstfieldIDs=None, lstcroptypeids=None, iminimumfieldareainsquaremeters=None, imaximumfieldareainsquaremeters=None, verbose=True):
        """
        Write filtered results as a geojson file.
        """
        parcelsgeodataframe = CropSARParcels.cropsar_shptopandas(szshapefile, lstfieldIDs, lstcroptypeids, iminimumfieldareainsquaremeters, imaximumfieldareainsquaremeters, verbose)
        parcelsgeodataframe.to_file(szjsonfile, driver="GeoJSON") # beware: "GeoJSON driver does not overwrite existing files." problems on and off since 2016
        if verbose: logging.info("output json file                         : %s ( full : %s )" % (os.path.basename(szjsonfile), szjsonfile))
        return parcelsgeodataframe

    #
    #
    #
    @staticmethod
    def shptojson(szshapefile, szgeojsonfile):
        """
        convert shape file to geojson
        """
        geopandas.read_file(szshapefile).to_file(szgeojsonfile, driver="GeoJSON") # beware: "GeoJSON driver does not overwrite existing files."problems on and off since 2016

    @staticmethod
    def jsontoshp(szgeojsonfile, szshapefile):
        """
        convert geojson to shape file
        """
        geopandas.read_file(szgeojsonfile).to_file(szshapefile, driver="ESRI Shapefile")

    @staticmethod
    def pandastoshp(geodataframe, szshapefile):
        """
        write geopandas.GeoDataFrame as shape file
        """
        geodataframe.to_file(szshapefile, driver="ESRI Shapefile")

    @staticmethod
    def pandastojson(geodataframe, szgeojsonfile):
        """
        write geopandas.GeoDataFrame as geojson file
        """
        geodataframe.to_file(szgeojsonfile, driver="GeoJSON")

#
#    needed for our cunning plan to query the series data via a thread pool 
#
class _S2TimeSeriesConstructionArgs:
    def __init__(self, szlayername, fieldId, fieldsgeodataframe, szyyyymmddfirst, szyyyymmddlast, inarrowfieldbordersinmeter, lstiextendfieldbordersinmeter, imaxretries, isleepseconds, verbose):
        self._szlayername                   = szlayername
        self._fieldId                       = fieldId
        self._fieldsgeodataframe            = fieldsgeodataframe
        self._szyyyymmddfirst               = szyyyymmddfirst
        self._szyyyymmddlast                = szyyyymmddlast
        self._inarrowfieldbordersinmeter    = inarrowfieldbordersinmeter
        self._lstiextendfieldbordersinmeter = lstiextendfieldbordersinmeter
        self._imaxretries                   = imaxretries
        self._isleepseconds                 = isleepseconds
        self._verbose                       = verbose
        if verbose: logging.debug("_S2TimeSeriesConstructionArgs(fieldId: %s))"%(fieldId,))

def _makeS2TimeSeries(s2timeseriesconstructionargs):
    if s2timeseriesconstructionargs._verbose: logging.debug("_makeS2TimeSeries(fieldId: %s))"%(s2timeseriesconstructionargs._fieldId,))
    return cleans2timeseries.S2TimeSeries(
        s2timeseriesconstructionargs._szlayername,
        s2timeseriesconstructionargs._fieldId, 
        s2timeseriesconstructionargs._fieldsgeodataframe, 
        s2timeseriesconstructionargs._szyyyymmddfirst, 
        s2timeseriesconstructionargs._szyyyymmddlast, 
        inarrowfieldbordersinmeter        = s2timeseriesconstructionargs._inarrowfieldbordersinmeter, 
        lstiextendfieldbordersinmeter     = s2timeseriesconstructionargs._lstiextendfieldbordersinmeter,
        imaxretries                       = s2timeseriesconstructionargs._imaxretries,
        isleepseconds                     = s2timeseriesconstructionargs._isleepseconds,
        sceneclassificationsbysinglequery = _S2_SC_MULTI_GEOMETRIES_QUERY,
        verbose                           = s2timeseriesconstructionargs._verbose)

#
#
#
def makeS2TrainingData(szcroplayer, szyyyycropyear, szshapefile, szyyyymmddfirst, szyyyymmddlast, 
                       inarrowfieldbordersinmeter, lstnarrowedfieldvalidscenevalues, inarrowedfieldminpctareavalid, 
                       lstiextendfieldbordersinmeter, lstlstextendedfieldvalidscenevalues, lstiextendedfieldminpctareavalid,
                       localminimamaxdip = None, localminimamaxdif = None, localminimamaxgap = None, localminimamaxpas = 999,
                       lstfieldIDs = None, lstcroptypeids = None, iminimumfieldareainsquaremeters = None, imaximumfieldareainsquaremeters = None, verbose=True):
    """
    create training data files (.csv)

    :param szcroplayer : "S2_FAPAR", "S2_NDVI",... data layer name as specified by the dataclient 
    :param szyyyycropyear : "2016", "2017", "2018",... indication will be used only as prefix =  szcroplayer_szyyyycropyear of all output files generated. (by default files are not overwritten.)
    :param szshapefile: shape file containing the parcels (assumes attributes 'fieldID','croptype' and 'area')
    :param szyyyymmddfirst: first date in series
    :param szyyyymmddlast: last date in series
    :param inarrowfieldbordersinmeter: data is queried over the parcels polygon, narrowed (shrunken) by inarrowfieldbordersinmeter meter
    :param lstnarrowedfieldvalidscenevalues: list of scene classification values considered valid in this narrowed area
    :param inarrowedfieldminpctareavalid: minimum percentage of this narrowed area which must be valid, to accept the registration
    :param lstiextendfieldbordersinmeter: additional scene classifications are queried over the parcels polygon, extended by a border extension in meter
    :param lstlstextendedfieldvalidscenevalues: list of lists of scene classification values considered valid in these extended areas 
    :param lstiextendedfieldminpctareavalid: minimum percentages of these extended areas which must be valid, to accept the registration
    :param localminimamaxdip, localminimamaxdif, localminimamaxgap, localminimamaxpas: parameters passed to smooth.flaglocalminima
    :param lstfieldIDs: parcels in shape file can optionally be filtered to the IDs in this list  
    :param lstcroptypeids: parcel-croptypes in shape file can optionally be filtered to the types in this list   
    :param iminimumfieldareainsquaremeters: parcel-area in shape file can optionally be filtered to areas greated than iminimumfieldareainsquaremeters 

    """
    datetime_tick_training_data  = datetime.datetime.now()
    #
    #    start by finding the output file names, avoid last minute crashes 
    #
    szoutdir              = getoutputdir(SZBASEDIR, SZVERSION, lstcroptypeids)
    szcleandatafilename   = getcleandataoutputfile(szoutdir, szcroplayer, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter)
    szorigndatafilename   = getorigndataoutputfile(szoutdir, szcroplayer, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter)
    szflagsdatafilename   = getflagsdataoutputfile(szoutdir, szcroplayer, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter)
    szprnttologfilename   = getprnttologoutputfile(szoutdir, szcroplayer, szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter)
    #
    #
    #
    logfilehandler = logging.FileHandler(szprnttologfilename)
    logfilehandler.setFormatter(logging.Formatter('%(asctime)s %(levelname).4s %(message)s', datefmt='%Y-%m-%d %H:%M:%S'))
    logging.getLogger().addHandler(logfilehandler)
    #
    #
    #
    try:
        #
        #
        #
        if verbose:
            logging.info ("") 
            logging.info ("makeS2TrainingData starts (%s)" % (datetime_tick_training_data.strftime("%Y-%m-%d %H:%M:%S"), ) )
            logging.info ("- output Clean data: %s" % (szcleandatafilename,))
            logging.info ("- output Raw   data: %s" % (szorigndatafilename,))
            logging.info ("- output Flags data: %s" % (szflagsdatafilename,))
            logging.info ("- output Log   data: %s" % (szprnttologfilename,))
        #
        #    read the shape file into geopandas.geodataframe.GeoDataFrame, restrict to specified crop types and minimum area
        #
        parcelsgeodataframe = CropSARParcels.cropsar_shptopandas(szshapefile, lstfieldIDs=lstfieldIDs, lstcroptypeids=lstcroptypeids, iminimumfieldareainsquaremeters=iminimumfieldareainsquaremeters, imaximumfieldareainsquaremeters=imaximumfieldareainsquaremeters, verbose=verbose)
        if parcelsgeodataframe.empty:
            logging.warn ("makeS2TrainingData abend - no fields in selection")
            return
        #
        #    use the 'fieldID' as index
        #
        parcelsgeodataframe.set_index( 'fieldID', inplace=True, verify_integrity=True)
        #
        #    index for all resulting data frames
        #
        timeslotsindex = pandas.date_range(start=szyyyymmddfirst, end=szyyyymmddlast)
        #
        #    data frames  (over complete date_range) containing
        #    - the cleaned data (actual training data) values
        #    - original raw data                                      - just for future reference, debug, logging,...
        #    - flags indicating the reason a registration was refused - just for future reference, debug, logging,...
        #
        cleandatadataframe = pandas.DataFrame(index=timeslotsindex, dtype=float)
        origndatadataframe = pandas.DataFrame(index=timeslotsindex, dtype=float)
        flagsdatadataframe = pandas.DataFrame(index=timeslotsindex, dtype=object) # should be integers, but we need None's
        #
        #    construct the TimeSeriesData objects for each field, and have them retrieve their data in a thread pool
        #
        timeseriesdatalist = concurrent.futures.ThreadPoolExecutor(max_workers=_S2_IPROCESSESINPOOL).map(
            _makeS2TimeSeries, 
            [_S2TimeSeriesConstructionArgs(szcroplayer, fieldId, parcelsgeodataframe, szyyyymmddfirst, szyyyymmddlast, inarrowfieldbordersinmeter, lstiextendfieldbordersinmeter, _S2_MAX_ATTEMPTS, _S2_SLEEP_SECONDS, verbose) for fieldId in parcelsgeodataframe.index])
        #
        #    process the fields data time series 
        #
        cnt_cur_field = 0
        cnt_tot_field = len(parcelsgeodataframe.index)
        cnt_err_field = 0
        #
        #
        #
        for timeSeriesData in timeseriesdatalist:
            #
            #
            #
            datetime_tick_cur_field = datetime.datetime.now()
            #
            #
            #
            fieldId = timeSeriesData._fieldId
            #
            #
            #
            cnt_cur_field += 1
            if verbose:
                logging.info("") 
                logging.info("Field(%s) - %s/%s" % (fieldId, cnt_cur_field, cnt_tot_field) )
            #
            #
            #
            try:
                #
                #    retrieve 'clean' time series. in case the TimeSeriesData was not setup correctly, an exception will occur.
                #
                timeSeriesData.getTimeSeriesData(lstnarrowedfieldvalidscenevalues, inarrowedfieldminpctareavalid, lstlstextendedfieldvalidscenevalues, lstiextendedfieldminpctareavalid, localminimamaxdip, localminimamaxdif, localminimamaxgap, localminimamaxpas)

                cleanfielddatatimeseries = timeSeriesData._cleanfielddatatimeseries     # BEWARE: - in cases we want multiple calls we should copy here to keep TimeSeriesData object intact
                orignfielddatatimeseries = timeSeriesData._originalfielddatatimeseries  # BEWARE: - in cases we want multiple calls we should copy here to keep TimeSeriesData object intact
                flagsfielddatatimeseries = timeSeriesData._fielddataclassification      # BEWARE: - in cases we want multiple calls we should copy here to keep TimeSeriesData object intact

                #
                #    stretch results to 'full' series and write them in the data frames
                #
                cleandatadataseries = pandas.Series(index=timeslotsindex, name=fieldId, dtype=float)
                origndatadataseries = pandas.Series(index=timeslotsindex, name=fieldId, dtype=float)
                flagsdatadataseries = pandas.Series(index=timeslotsindex, name=fieldId, dtype=object)
                #
                #    this assumes cleanfielddatatimeseries.index is a subset of cleandatadataseries.index (being the timeslotsindex (pandas.date_range(start=szyyyymmddfirst, end=szyyyymmddlast)))
                #                 orignfielddatatimeseries.index                origndatadataseries.index
                #                 flagsfielddatatimeseries.index                flagsdatadataseries.index
                #
                cleandatadataseries.loc[cleanfielddatatimeseries.index] = cleanfielddatatimeseries
                origndatadataseries.loc[orignfielddatatimeseries.index] = orignfielddatatimeseries
                flagsdatadataseries.loc[flagsfielddatatimeseries.index] = flagsfielddatatimeseries

                cleandatadataframe = pandas.concat( [cleandatadataframe, cleandatadataseries], axis=1, verify_integrity=True, sort=False, copy=False)
                origndatadataframe = pandas.concat( [origndatadataframe, origndatadataseries], axis=1, verify_integrity=True, sort=False, copy=False)
                flagsdatadataframe = pandas.concat( [flagsdatadataframe, flagsdatadataseries], axis=1, verify_integrity=True, sort=False, copy=False)
                #
                #
                #
                datetime_tock_cur_field  = datetime.datetime.now()
                if verbose: logging.info("Field(%s) done - (%s seconds)" % (fieldId, int((datetime_tock_cur_field-datetime_tick_cur_field).total_seconds())) )

            except Exception as e:
                logging.error (traceback.format_exc())
                logging.error("Field(%s) failed" % (fieldId,) )
                #
                #
                #
                cnt_err_field += 1
                #
                #   remove crashes and all-NaN series from results. Kristof cannot train with these. (and enjoy the everlasting pandas versions clash) 
                #
                if fieldId in cleandatadataframe.columns: cleandatadataframe.drop (fieldId, axis=1, inplace=True ) # cleandatadataframe.drop (columns=fieldId, inplace=True )
                if fieldId in origndatadataframe.columns: origndatadataframe.drop (fieldId, axis=1, inplace=True ) # origndatadataframe.drop (columns=fieldId, inplace=True )
                if fieldId in flagsdatadataframe.columns: flagsdatadataframe.drop (fieldId, axis=1, inplace=True ) # flagsdatadataframe.drop (columns=fieldId, inplace=True )   

        #
        #
        #
        datetime_tock_training_data  = datetime.datetime.now()
        #
        #    write results to csv
        #
        cleandatadataframe.to_csv(szcleandatafilename)
        origndatadataframe.to_csv(szorigndatafilename)
        flagsdatadataframe.to_csv(szflagsdatafilename)
        #
        #
        #
        if verbose: 
            logging.info ("")
            logging.info ("makeS2TrainingData done    start %s end %s)" % (datetime_tick_training_data.strftime("%Y-%m-%d %H:%M:%S"), datetime_tock_training_data.strftime("%Y-%m-%d %H:%M:%S")) )
            logging.info ("makeS2TrainingData total   %s seconds for %s fields" %  (int((datetime_tock_training_data-datetime_tick_training_data).total_seconds()), cnt_tot_field))
            logging.info ("makeS2TrainingData average %s seconds per field" %  (( (datetime_tock_training_data-datetime_tick_training_data).total_seconds())/cnt_tot_field,))
            logging.info ("makeS2TrainingData failed  %s out of total %s fields" %  (cnt_err_field,cnt_tot_field))

    finally:
        #
        #    remove handler we added at function start
        #
        logging.getLogger().removeHandler(logfilehandler)




#
#    needed for our cunning plan to query the series data via a thread pool 
#
class _S1TimeSeriesConstructionArgs:
    def __init__(self, fieldIds, fieldsgeodataframe, szyyyymmddfirst, szyyyymmddlast, inarrowfieldbordersinmeter, imaxretries, isleepseconds, verbose):
        self._fieldIds                      = fieldIds
        self._fieldsgeodataframe            = fieldsgeodataframe
        self._szyyyymmddfirst               = szyyyymmddfirst
        self._szyyyymmddlast                = szyyyymmddlast
        self._inarrowfieldbordersinmeter    = inarrowfieldbordersinmeter
        self._imaxretries                   = imaxretries
        self._isleepseconds                 = isleepseconds
        self._verbose                       = verbose
        if verbose: logging.debug("_S1TimeSeriesConstructionArgs(fieldIds: %s))"%(fieldIds,))

def _makeS1TimeSeries(s1timeseriesconstructionargs):
    return cleans1timeseries.S1TimeSeries(
        s1timeseriesconstructionargs._fieldIds, 
        s1timeseriesconstructionargs._fieldsgeodataframe, 
        s1timeseriesconstructionargs._szyyyymmddfirst, 
        s1timeseriesconstructionargs._szyyyymmddlast, 
        inarrowfieldbordersinmeter = s1timeseriesconstructionargs._inarrowfieldbordersinmeter, 
        imaxretries                = s1timeseriesconstructionargs._imaxretries, 
        isleepseconds              = s1timeseriesconstructionargs._isleepseconds,
        bymultibandquery           = _S1_MULTI_BAND_QUERY,
        iminfieldsforbulkquery     = _S1_MIN_FIELDS_FOR_BULK_QUERY,
        verbose                    = s1timeseriesconstructionargs._verbose)
#
#
#
def makeS1TrainingData(szoutputfilesprefix, szyyyycropyear, szshapefile, szyyyymmddfirst, szyyyymmddlast,
                       inarrowfieldbordersinmeter, imovingaveragewindow, 
                       lstfieldIDs = None, lstcroptypeids = None, iminimumfieldareainsquaremeters = None, imaximumfieldareainsquaremeters = None, verbose=True):
    """
    create training data files (.csv)

    :param szoutputfilesprefix : prefix for output filenames (e.g. S1_SIGMA0). No direct relation with dataclient layer names: for S1 we need multiple layers from wacky interfaces. 
    :param szyyyycropyear : "2016", "2017", "2018",... indication will be used only as prefix =  szoutputfilesprefix_szyyyycropyear of all output files generated. (by default files are not overwritten.)
    :param szshapefile: shape file containing the parcels (assumes attributes 'fieldID','croptype' and 'area')
    :param szyyyymmddfirst: first date in series
    :param szyyyymmddlast: last date in series
    :param inarrowfieldbordersinmeter: data is queried over the parcels polygon, narrowed (shrunken) by inarrowfieldbordersinmeter meter
    :param imovingaveragewindow:
    :param lstfieldIDs: parcels in shape file can optionally be filtered to the IDs in this list  
    :param lstcroptypeids: parcel-croptypes in shape file can optionally be filtered to the types in this list   
    :param iminimumfieldareainsquaremeters: parcel-area in shape file can optionally be filtered to areas greated than iminimumfieldareainsquaremeters 
    """
    datetime_tick_training_data  = datetime.datetime.now()
    #
    #    start by finding the output file names, avoid last minute crashes 
    #
    szoutdir = getoutputdir(SZBASEDIR, SZVERSION, lstcroptypeids)

    if imovingaveragewindow > 0:
        szcleanASCVVfilename = getcleandataoutputfile(szoutdir, szoutputfilesprefix + "_ASC_VV", szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter)
        szcleanASCVHfilename = getcleandataoutputfile(szoutdir, szoutputfilesprefix + "_ASC_VH", szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter)
        szcleanDSCVVfilename = getcleandataoutputfile(szoutdir, szoutputfilesprefix + "_DSC_VV", szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter)
        szcleanDSCVHfilename = getcleandataoutputfile(szoutdir, szoutputfilesprefix + "_DSC_VH", szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter)

    szorignASCVVfilename = getorigndataoutputfile(szoutdir, szoutputfilesprefix + "_ASC_VV", szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter)
    szorignASCVHfilename = getorigndataoutputfile(szoutdir, szoutputfilesprefix + "_ASC_VH", szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter)
    szorignASCIAfilename = getorigndataoutputfile(szoutdir, szoutputfilesprefix + "_ASC_IA", szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter)
    szorignDSCVVfilename = getorigndataoutputfile(szoutdir, szoutputfilesprefix + "_DSC_VV", szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter)
    szorignDSCVHfilename = getorigndataoutputfile(szoutdir, szoutputfilesprefix + "_DSC_VH", szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter)
    szorignDSCIAfilename = getorigndataoutputfile(szoutdir, szoutputfilesprefix + "_DSC_IA", szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter)

    szprnttologfilename  = getprnttologoutputfile(szoutdir, szoutputfilesprefix            , szyyyycropyear, szyyyymmddfirst, szyyyymmddlast, iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter)
    #
    #
    #
    logfilehandler = logging.FileHandler(szprnttologfilename)
    logfilehandler.setFormatter(logging.Formatter('%(asctime)s %(levelname).4s %(message)s', datefmt='%Y-%m-%d %H:%M:%S'))
    logging.getLogger().addHandler(logfilehandler)
    #
    #
    #
    try:
        #
        #
        #
        if verbose:
            logging.info ("") 
            logging.info ("makeS1TrainingData starts (%s)" % (datetime_tick_training_data.strftime("%Y-%m-%d %H:%M:%S"), ) )
            if imovingaveragewindow > 0:
                logging.info ("- output Clean ASC VV data: %s" % (szcleanASCVVfilename,))
                logging.info ("- output Clean ASC VH data: %s" % (szcleanASCVHfilename,))
                logging.info ("- output Clean DSC VV data: %s" % (szcleanDSCVVfilename,))
                logging.info ("- output Clean DSC VH data: %s" % (szcleanDSCVHfilename,))
            logging.info ("- output Raw   ASC VV data: %s" % (szorignASCVVfilename,))
            logging.info ("- output Raw   ASC VH data: %s" % (szorignASCVHfilename,))
            logging.info ("- output Raw   ASC IA data: %s" % (szorignASCIAfilename,))
            logging.info ("- output Raw   DSC VV data: %s" % (szorignDSCVVfilename,))
            logging.info ("- output Raw   DSC VH data: %s" % (szorignDSCVHfilename,))
            logging.info ("- output Raw   DSC IA data: %s" % (szorignDSCIAfilename,))
            logging.info ("- output Log              : %s" % (szprnttologfilename,))
        #
        #    read the shape file into geopandas.geodataframe.GeoDataFrame, restrict to specified crop types and minimum area
        #
        parcelsgeodataframe = CropSARParcels.cropsar_shptopandas(szshapefile, lstfieldIDs=lstfieldIDs, lstcroptypeids=lstcroptypeids, iminimumfieldareainsquaremeters=iminimumfieldareainsquaremeters, imaximumfieldareainsquaremeters=imaximumfieldareainsquaremeters, verbose=verbose)
        if parcelsgeodataframe.empty:
            logging.warn ("makeS1TrainingData abend - no fields in selection")
            return
        #
        #    use the 'fieldID' as index
        #
        parcelsgeodataframe.set_index( 'fieldID', inplace=True, verify_integrity=True)
        #
        #    index for all resulting data frames
        #
        timeslotsindex = pandas.date_range(start=szyyyymmddfirst, end=szyyyymmddlast)
        #
        #    data frames  (over complete date_range) containing
        #
        if imovingaveragewindow > 0:
            cleanASCVVdataframe = pandas.DataFrame(index=timeslotsindex, dtype=float)
            cleanASCVHdataframe = pandas.DataFrame(index=timeslotsindex, dtype=float)
            cleanDSCVVdataframe = pandas.DataFrame(index=timeslotsindex, dtype=float)
            cleanDSCVHdataframe = pandas.DataFrame(index=timeslotsindex, dtype=float)
        orignASCVVdataframe = pandas.DataFrame(index=timeslotsindex, dtype=float)
        orignASCVHdataframe = pandas.DataFrame(index=timeslotsindex, dtype=float)
        orignASCIAdataframe = pandas.DataFrame(index=timeslotsindex, dtype=float)
        orignDSCVVdataframe = pandas.DataFrame(index=timeslotsindex, dtype=float)
        orignDSCVHdataframe = pandas.DataFrame(index=timeslotsindex, dtype=float)
        orignDSCIAdataframe = pandas.DataFrame(index=timeslotsindex, dtype=float)
        #
        #    construct the TimeSeriesData objects for each field, and have them retrieve their data in a thread pool
        #
        timeseriesargslist = [_S1TimeSeriesConstructionArgs( parcelsgeodataframe.index[iIdx: iIdx + _S1_FIELDSPERQUERY], parcelsgeodataframe, szyyyymmddfirst, szyyyymmddlast, inarrowfieldbordersinmeter, _S1_MAX_ATTEMPTS, _S1_SLEEP_SECONDS, verbose) for iIdx in range(0, len(parcelsgeodataframe.index), _S1_FIELDSPERQUERY)]
        timeseriesdatalist = concurrent.futures.ThreadPoolExecutor(max_workers=_S1_IPROCESSESINPOOL).map(_makeS1TimeSeries, timeseriesargslist)
        #
        #    process the fields data time series 
        #
        cnt_cur_timeseriesdata = 0
        cnt_tot_timeseriesdata = len(timeseriesargslist)
        cnt_cur_field          = 0
        cnt_tot_field          = len(parcelsgeodataframe.index)
        cnt_err_field          = 0
        #
        #
        #
        for timeSeriesData in timeseriesdatalist:
            #
            #
            #
            cnt_cur_timeseriesdata +=1
            #
            #
            #
            try :
                #
                #    retrieve resulting time series (chunk). in case the TimeSeriesData was not setup correctly, an exception will occur.
                #
                fieldIds         = timeSeriesData.getFieldIds()
                orignresultsDict = timeSeriesData.getTimeSeriesData()
                if imovingaveragewindow > 0: 
                    cleanresultsDict = timeSeriesData.getTimeSeriesData(imovingaveragewindow)

            except Exception as e:
                #
                #    complete chunk failed
                #
                fieldIds         = None
                orignresultsDict = None
                cleanresultsDict = None 
                logging.error (traceback.format_exc())
                logging.error("makeS1TrainingData - TimeSeriesData %s/%s failed" % (cnt_cur_timeseriesdata, cnt_tot_timeseriesdata))

            #
            #
            #
            if fieldIds is None:
                try:
                    cnt_cur_field += len(timeseriesargslist[cnt_cur_timeseriesdata - 1]._fieldIds)
                    cnt_err_field += len(timeseriesargslist[cnt_cur_timeseriesdata - 1]._fieldIds)
                except:
                    logging.error("something bad happened")
                continue

            #
            #    process the timeSeriesData (chunk)
            #
            for fieldId in fieldIds:
                #
                #
                #
                datetime_tick_cur_field = datetime.datetime.now()
                #
                #
                #
                #fieldId = timeSeriesData._fieldId
                #fieldId = timeSeriesData._orgfieldsindex[0]
                #
                #
                #
                cnt_cur_field += 1
                if verbose:
                    logging.info("") 
                    logging.info("makeS1TrainingData - Field(%s) - %s/%s in TimeSeriesData %s/%s" % (fieldId, cnt_cur_field, cnt_tot_field, cnt_cur_timeseriesdata, cnt_tot_timeseriesdata) )

                #
                #
                #    TODO: split up in ascending and descending for future use
                #
                #

                #
                #
                #
                try:
                    #
                    #    retrieve resulting time series. in case the TimeSeriesData was not setup correctly, an exception will occur.
                    #
                    if imovingaveragewindow > 0:
                        cleanASCVVtimeSeries = cleanresultsDict[fieldId]['ASCENDING']['VV']
                        cleanASCVHtimeSeries = cleanresultsDict[fieldId]['ASCENDING']['VH']
                        cleanDSCVVtimeSeries = cleanresultsDict[fieldId]['DESCENDING']['VV']
                        cleanDSCVHtimeSeries = cleanresultsDict[fieldId]['DESCENDING']['VH']
                        #
                        #    stretch results to 'full' series and write them in the data frames
                        #
                        cleanASCVVdataseries = pandas.Series(index=timeslotsindex, name=fieldId, dtype=float)
                        cleanASCVHdataseries = pandas.Series(index=timeslotsindex, name=fieldId, dtype=float)
                        cleanDSCVVdataseries = pandas.Series(index=timeslotsindex, name=fieldId, dtype=float)
                        cleanDSCVHdataseries = pandas.Series(index=timeslotsindex, name=fieldId, dtype=float)

                        cleanASCVVdataseries.loc[cleanASCVVtimeSeries.index] = cleanASCVVtimeSeries
                        cleanASCVHdataseries.loc[cleanASCVHtimeSeries.index] = cleanASCVHtimeSeries
                        cleanDSCVVdataseries.loc[cleanDSCVVtimeSeries.index] = cleanDSCVVtimeSeries
                        cleanDSCVHdataseries.loc[cleanDSCVHtimeSeries.index] = cleanDSCVHtimeSeries

                        cleanASCVVdataframe = pandas.concat( [ cleanASCVVdataframe, cleanASCVVdataseries], axis=1, verify_integrity=True, sort=False, copy=False)
                        cleanASCVHdataframe = pandas.concat( [ cleanASCVHdataframe, cleanASCVHdataseries], axis=1, verify_integrity=True, sort=False, copy=False)
                        cleanDSCVVdataframe = pandas.concat( [ cleanDSCVVdataframe, cleanDSCVVdataseries], axis=1, verify_integrity=True, sort=False, copy=False)
                        cleanDSCVHdataframe = pandas.concat( [ cleanDSCVHdataframe, cleanDSCVHdataseries], axis=1, verify_integrity=True, sort=False, copy=False)
                    #
                    #
                    #
                    orignASCVVtimeSeries = orignresultsDict[fieldId]['ASCENDING']['VV']
                    orignASCVHtimeSeries = orignresultsDict[fieldId]['ASCENDING']['VH']
                    orignASCIAtimeSeries = orignresultsDict[fieldId]['ASCENDING']['IA']
                    orignDSCVVtimeSeries = orignresultsDict[fieldId]['DESCENDING']['VV']
                    orignDSCVHtimeSeries = orignresultsDict[fieldId]['DESCENDING']['VH']
                    orignDSCIAtimeSeries = orignresultsDict[fieldId]['DESCENDING']['IA']
                    #
                    #    stretch results to 'full' series and write them in the data frames
                    #
                    orignASCVVdataseries = pandas.Series(index=timeslotsindex, name=fieldId, dtype=float)
                    orignASCVHdataseries = pandas.Series(index=timeslotsindex, name=fieldId, dtype=float)
                    orignASCIAdataseries = pandas.Series(index=timeslotsindex, name=fieldId, dtype=float)
                    orignDSCVVdataseries = pandas.Series(index=timeslotsindex, name=fieldId, dtype=float)
                    orignDSCVHdataseries = pandas.Series(index=timeslotsindex, name=fieldId, dtype=float)
                    orignDSCIAdataseries = pandas.Series(index=timeslotsindex, name=fieldId, dtype=float)

                    orignASCVVdataseries.loc[orignASCVVtimeSeries.index] = orignASCVVtimeSeries
                    orignASCVHdataseries.loc[orignASCVHtimeSeries.index] = orignASCVHtimeSeries
                    orignASCIAdataseries.loc[orignASCIAtimeSeries.index] = orignASCIAtimeSeries
                    orignDSCVVdataseries.loc[orignDSCVVtimeSeries.index] = orignDSCVVtimeSeries
                    orignDSCVHdataseries.loc[orignDSCVHtimeSeries.index] = orignDSCVHtimeSeries
                    orignDSCIAdataseries.loc[orignDSCIAtimeSeries.index] = orignDSCIAtimeSeries

                    orignASCVVdataframe = pandas.concat( [ orignASCVVdataframe, orignASCVVdataseries], axis=1, verify_integrity=True, sort=False, copy=False)
                    orignASCVHdataframe = pandas.concat( [ orignASCVHdataframe, orignASCVHdataseries], axis=1, verify_integrity=True, sort=False, copy=False)
                    orignASCIAdataframe = pandas.concat( [ orignASCIAdataframe, orignASCIAdataseries], axis=1, verify_integrity=True, sort=False, copy=False)
                    orignDSCVVdataframe = pandas.concat( [ orignDSCVVdataframe, orignDSCVVdataseries], axis=1, verify_integrity=True, sort=False, copy=False)
                    orignDSCVHdataframe = pandas.concat( [ orignDSCVHdataframe, orignDSCVHdataseries], axis=1, verify_integrity=True, sort=False, copy=False)
                    orignDSCIAdataframe = pandas.concat( [ orignDSCIAdataframe, orignDSCIAdataseries], axis=1, verify_integrity=True, sort=False, copy=False)
                    #
                    #
                    #
                    datetime_tock_cur_field  = datetime.datetime.now()
                    if verbose: logging.info("makeS1TrainingData - Field(%s) done - (%s seconds)" % (fieldId, int((datetime_tock_cur_field-datetime_tick_cur_field).total_seconds())) )

                except Exception as e:
                    logging.error (traceback.format_exc())
                    logging.error("makeS1TrainingData - Field(%s) failed" % (fieldId,) )
                    #
                    #
                    #
                    cnt_err_field += 1
                    #
                    #   remove crashes. (and enjoy the everlasting pandas versions clash) 
                    #
                    if imovingaveragewindow > 0:
                        if fieldId in cleanASCVVdataframe.columns: cleanASCVVdataframe.drop (fieldId, axis=1, inplace=True ) # cleanASCVVdataframe.drop (columns=fieldId, inplace=True )
                        if fieldId in cleanASCVHdataframe.columns: cleanASCVHdataframe.drop (fieldId, axis=1, inplace=True )    
                        if fieldId in cleanDSCVVdataframe.columns: cleanDSCVVdataframe.drop (fieldId, axis=1, inplace=True )   
                        if fieldId in cleanDSCVHdataframe.columns: cleanDSCVHdataframe.drop (fieldId, axis=1, inplace=True )   
                    if fieldId in orignASCVVdataframe.columns: orignASCVVdataframe.drop (fieldId, axis=1, inplace=True ) 
                    if fieldId in orignASCVHdataframe.columns: orignASCVHdataframe.drop (fieldId, axis=1, inplace=True )    
                    if fieldId in orignASCIAdataframe.columns: orignASCIAdataframe.drop (fieldId, axis=1, inplace=True )    
                    if fieldId in orignDSCVVdataframe.columns: orignDSCVVdataframe.drop (fieldId, axis=1, inplace=True )   
                    if fieldId in orignDSCVHdataframe.columns: orignDSCVHdataframe.drop (fieldId, axis=1, inplace=True )   
                    if fieldId in orignDSCIAdataframe.columns: orignDSCIAdataframe.drop (fieldId, axis=1, inplace=True )   

        #
        #    write results to csv
        #
        if imovingaveragewindow > 0:
            cleanASCVVdataframe.to_csv(szcleanASCVVfilename)
            cleanASCVHdataframe.to_csv(szcleanASCVHfilename)
            cleanDSCVVdataframe.to_csv(szcleanDSCVVfilename)
            cleanDSCVHdataframe.to_csv(szcleanDSCVHfilename)
        orignASCVVdataframe.to_csv(szorignASCVVfilename)
        orignASCVHdataframe.to_csv(szorignASCVHfilename)
        orignASCIAdataframe.to_csv(szorignASCIAfilename)
        orignDSCVVdataframe.to_csv(szorignDSCVVfilename)
        orignDSCVHdataframe.to_csv(szorignDSCVHfilename)
        orignDSCIAdataframe.to_csv(szorignDSCIAfilename)
        #
        #
        #
        datetime_tock_training_data  = datetime.datetime.now()
        #
        #
        #
        if verbose: 
            logging.info ("")
            logging.info ("makeS1TrainingData done    start %s end %s)" % (datetime_tick_training_data.strftime("%Y-%m-%d %H:%M:%S"), datetime_tock_training_data.strftime("%Y-%m-%d %H:%M:%S")) )
            logging.info ("makeS1TrainingData total   %s seconds for %s fields" %  (int((datetime_tock_training_data-datetime_tick_training_data).total_seconds()), cnt_tot_field))
            logging.info ("makeS1TrainingData average %s seconds per field" %  (( (datetime_tock_training_data-datetime_tick_training_data).total_seconds())/cnt_tot_field,))
            logging.info ("makeS1TrainingData failed  %s out of total %s fields" %  (cnt_err_field,cnt_tot_field))

    finally:
        #
        #    remove handler we added at function start
        #
        logging.getLogger().removeHandler(logfilehandler)


# #
# #
# #
# def zemain():
#     """
#     create training data files (.csv)
#     """
#     #
#     #
#     #
#     logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname).4s %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
# 
#     #
#     #    reference files
#     #
#     szrefshapefile_2016     = os.path.join(os.path.join(os.path.join(SZROOTDIR, "ref"), "shp"), "2016_CroptypesFlemishParcels_DeptLV.shp")   # 2015-2016 growing season
#     szrefshapefile_2017     = os.path.join(os.path.join(os.path.join(SZROOTDIR, "ref"), "shp"), "2017_CroptypesFlemishParcels_DeptLV.shp")   # 2016-2017 growing season
#     szrefshapefile_2018     = os.path.join(os.path.join(os.path.join(SZROOTDIR, "ref"), "shp"), "2018_CroptypesFlemishParcels_DeptLV.shp")   # 2017-2018 growing season
# 
#     dict_szrefshapefiles    = {
#         "2016":szrefshapefile_2016,
#         "2017":szrefshapefile_2017,
#         "2018":szrefshapefile_2018,
#         } 
# 
#     lst_crop_maize          = ['201', '202']                                               # MIND THE STRINGS 
#     lst_potato_early        = ['904']                                                      # MIND THE STRINGS 
#     lst_potato_non_early    = ['901']                                                      # MIND THE STRINGS 
#     lst_all_relevant_crops  = ['201', '202', '901', '904', '311', '321', '71', '91', '60'] # MIND THE STRINGS
# 
#     #
#     #    local - test
#     #
#     szshapefile_2017_5ha_problems = r"D:\data\training\V000\Croptype_all\CroptypesFlemishParcels_2017_5ha_problems.shp"
#     if False:
#         #
#         #    subsets of reference shape files
#         #
#         szsrcshapefile                  = szrefshapefile_2017
#         szdstshapefile                  = szshapefile_2017_5ha_problems
#         lstfieldIDs                     = ['000028045CCB4FA7', '0000280461BE8431', '0000280461C6F3F5', '0000280462F3ABD8', '00002804641D9C18']
#         lstcroptypeids                  = lst_all_relevant_crops
#         iminimumfieldareainsquaremeters = 10000 * 30
#         imaximumfieldareainsquaremeters = None
# 
#         CropSARParcels.cropsar_shptoshp(szsrcshapefile, szdstshapefile, lstfieldIDs, lstcroptypeids, iminimumfieldareainsquaremeters, imaximumfieldareainsquaremeters, verbose=True)
# 
#     #
#     #    local - test
#     #
#     szshapefile_2017_1ha_allcrops = r"D:\data\training\V001\Croptype_all\CroptypesFlemishParcels_2017_1ha.shp"
#     if False:
#         #
#         #    subsets of reference shape files
#         #
#         szsrcshapefile                  = szrefshapefile_2017
#         szdstshapefile                  = szshapefile_2017_1ha_allcrops
#         lstfieldIDs                     = None
#         lstcroptypeids                  = lst_all_relevant_crops
#         iminimumfieldareainsquaremeters = 10000
#         imaximumfieldareainsquaremeters = None
# 
#         CropSARParcels.cropsar_shptoshp(szsrcshapefile, szdstshapefile, lstfieldIDs, lstcroptypeids, iminimumfieldareainsquaremeters, imaximumfieldareainsquaremeters, verbose=True)
# 
#     #
#     #    local - test
#     #
#     szshapefile_2018_1ha_allcrops = r"D:\data\training\V000\Croptype_all\CroptypesFlemishParcels_2018_1ha.shp"
#     if False:
#         #
#         #    subsets of reference shape files
#         #
#         szsrcshapefile                  = szrefshapefile_2018
#         szdstshapefile                  = szshapefile_2018_1ha_allcrops
#         lstfieldIDs                     = None
#         lstcroptypeids                  = lst_all_relevant_crops
#         iminimumfieldareainsquaremeters = 10000
#         imaximumfieldareainsquaremeters = None
# 
#         CropSARParcels.cropsar_shptoshp(szsrcshapefile, szdstshapefile, lstfieldIDs, lstcroptypeids, iminimumfieldareainsquaremeters, imaximumfieldareainsquaremeters, verbose=True)
# 
#     #
#     #    local - test
#     #
#     szshapefile_2017_10ha_901 = r"D:\data\training\V000\Croptype_901\CroptypesFlemishParcels_2017_10ha.shp"
#     if False:
#         #
#         #    subsets of reference shape files
#         #
#         szsrcshapefile                  = szrefshapefile_2017
#         szdstshapefile                  = szshapefile_2017_10ha_901
#         lstfieldIDs                     = None
#         lstcroptypeids                  = lst_potato_non_early
#         iminimumfieldareainsquaremeters = 100000
#         imaximumfieldareainsquaremeters = None
# 
#         CropSARParcels.cropsar_shptoshp(szsrcshapefile, szdstshapefile, lstfieldIDs, lstcroptypeids, iminimumfieldareainsquaremeters, imaximumfieldareainsquaremeters, verbose=True)
# 
#     #
#     #    local - test
#     #
#     szshapefile_2017_10ha_allcrops = r"D:\data\training\V000\Croptype_all\CroptypesFlemishParcels_2017_10ha.shp"
#     if False:
#         #
#         #    subsets of reference shape files
#         #
#         szsrcshapefile                  = szrefshapefile_2017
#         szdstshapefile                  = szshapefile_2017_10ha_allcrops
#         lstfieldIDs                     = None
#         lstcroptypeids                  = lst_all_relevant_crops
#         iminimumfieldareainsquaremeters = 100000
#         imaximumfieldareainsquaremeters = None
# 
#         CropSARParcels.cropsar_shptoshp(szsrcshapefile, szdstshapefile, lstfieldIDs, lstcroptypeids, iminimumfieldareainsquaremeters, imaximumfieldareainsquaremeters, verbose=True)
# 
#     #
#     #    local - test
#     #
#     szshapefile_2018_10ha_allcrops = r"D:\data\training\V000\Croptype_all\CroptypesFlemishParcels_2018_10ha.shp"
#     if False:
#         #
#         #    subsets of reference shape files
#         #
#         szsrcshapefile                  = szrefshapefile_2018
#         szdstshapefile                  = szshapefile_2018_10ha_allcrops
#         lstfieldIDs                     = None
#         lstcroptypeids                  = lst_all_relevant_crops
#         iminimumfieldareainsquaremeters = 100000
#         imaximumfieldareainsquaremeters = None
# 
#         CropSARParcels.cropsar_shptoshp(szsrcshapefile, szdstshapefile, lstfieldIDs, lstcroptypeids, iminimumfieldareainsquaremeters, imaximumfieldareainsquaremeters, verbose=True)
# 
#     #
#     #    local - test
#     #
#     szshapefile_2017_20ha_allcrops = r"D:\data\training\V000\Croptype_all\CroptypesFlemishParcels_2017_20ha.shp"
#     if False:
#         #
#         #    subsets of reference shape files
#         #
#         szsrcshapefile                  = szrefshapefile_2017
#         szdstshapefile                  = szshapefile_2017_20ha_allcrops
#         lstfieldIDs                     = None
#         lstcroptypeids                  = lst_all_relevant_crops
#         iminimumfieldareainsquaremeters = 10000 * 20
#         imaximumfieldareainsquaremeters = None
# 
#         CropSARParcels.cropsar_shptoshp(szsrcshapefile, szdstshapefile, lstfieldIDs, lstcroptypeids, iminimumfieldareainsquaremeters, imaximumfieldareainsquaremeters, verbose=True)
# 
#     #
#     #    local - test
#     #
#     szshapefile_2017_30ha_allcrops = r"D:\data\training\V000\Croptype_all\CroptypesFlemishParcels_2017_30ha.shp"
#     if False:
#         #
#         #    subsets of reference shape files
#         #
#         szsrcshapefile                  = szrefshapefile_2017
#         szdstshapefile                  = szshapefile_2017_30ha_allcrops
#         lstfieldIDs                     = None
#         lstcroptypeids                  = lst_all_relevant_crops
#         iminimumfieldareainsquaremeters = 10000 * 30
#         imaximumfieldareainsquaremeters = None
# 
#         CropSARParcels.cropsar_shptoshp(szsrcshapefile, szdstshapefile, lstfieldIDs, lstcroptypeids, iminimumfieldareainsquaremeters, imaximumfieldareainsquaremeters, verbose=True)
# 
# 
#     ##################################
#     #                                #
#     #    make training data files    #
#     #                                #
#     ##################################
# 
#     do_s1       = True
#     do_s2       = False
#     do_training = do_s1 or do_s2
#     do_pertype  = True
#     do_merge    = True
# 
#     #
#     #    cleaning parameters
#     #
#     lstfieldIDs                      = None
#     lstcroptypeids                   = lst_all_relevant_crops            # (logging) optional if shape file already limited to required crops  
#     iminimumfieldareainsquaremeters  = 10000 * 40                        # (logging) optional if shape file already limited to minimum area (m2)
#     imaximumfieldareainsquaremeters  = None 
#     #
#     #    common S1 & S2
#     #
#     inarrowfieldbordersinmeter       = 10                                # 10 meter inside polygon
#     #
#     #    S1
#     #
#     imovingaveragewindow             = 11                                # S1 smoothing by average over moving window
#     #
#     #    S2
#     #
#     lstnarrowedfieldvalidscenevalues = [ 4, 5, 6, 7 ]                    # vegetation, bare soil, water, low probability cloud
#     inarrowedfieldminpctareavalid    = 85                                # at least inarrowedfieldminpctareavalid % in narrowfield
#     iextendfieldbordersinmeter       = [300, 1000, 1000]                 # extended borders
#     lstextendedfieldvalidscenevalues = [[3,8,9], [8,9], [7,10]]          # everything but medium and high probability clouds or shadows - medium and high probability clouds - high probability clouds and cirrus 
#     iextendedfieldminpctareavalid    = [-5, -5, -50]                     # ?
#     maxdip                           = 0.01                              # V003 uses 0.006. V004 0.01
#     maxdif                           = 0.1                               # 0.2  #0.05
#     maxgap                           = 60                                # V003-None V004-60
#     maxpas                           = 999
#     #
#     #
#     #
#     szyyyycropyear = "2017" #"2017" #"2017" #"2017" # "2018"
#     szcroplayer    = "S2_NDVI" #"S2_FAPAR" #"S2_NDVI" #"S2_FAPAR" #"S2_NDVI"
#     #
#     #
#     #
#     if do_training:
#         #
#         #
#         #
#         szshapefile                      = dict_szrefshapefiles[str(int(szyyyycropyear))]
#         szyyyymmddfirst                  = str(int(szyyyycropyear) - 1)  + "0601" 
#         szyyyymmddlast                   = str(int(szyyyycropyear))      + "0801" #"1231"
# 
#         if do_pertype :
#             #
#             # files per croptype in the list
#             #
#             if do_s1:
#                 for croptypeid in lstcroptypeids:
#                     makeS1TrainingData("S1_SIGMA0", szyyyycropyear, szshapefile, szyyyymmddfirst, szyyyymmddlast,
#                                        inarrowfieldbordersinmeter, imovingaveragewindow,
#                                        lstfieldIDs, [croptypeid], iminimumfieldareainsquaremeters, imaximumfieldareainsquaremeters, verbose=True)
#                 if do_merge:
#                     mergeS1trainingdata("S1_SIGMA0", szyyyycropyear, szshapefile, szyyyymmddfirst, szyyyymmddlast,
#                                         iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter, lstcroptypeids)
# 
#             if do_s2:
#                 for croptypeid in lstcroptypeids:
#                     makeS2TrainingData(szcroplayer, szyyyycropyear, szshapefile, szyyyymmddfirst, szyyyymmddlast, 
#                                        inarrowfieldbordersinmeter, lstnarrowedfieldvalidscenevalues, inarrowedfieldminpctareavalid, 
#                                        iextendfieldbordersinmeter, lstextendedfieldvalidscenevalues, iextendedfieldminpctareavalid, 
#                                        maxdip, maxdif, maxgap, maxpas, 
#                                        lstfieldIDs, [croptypeid], iminimumfieldareainsquaremeters, imaximumfieldareainsquaremeters, verbose=True)
#                 if do_merge:
#                     mergeS2trainingdata(szcroplayer, szyyyycropyear, szshapefile, szyyyymmddfirst, szyyyymmddlast,
#                                         iminimumfieldareainsquaremeters, inarrowfieldbordersinmeter, lstcroptypeids)
# 
#         else:
#             #
#             # single set of files containing all croptypes in the list
#             #
#             if do_s1:
#                 makeS1TrainingData("S1_SIGMA0", szyyyycropyear, szshapefile, szyyyymmddfirst, szyyyymmddlast,
#                                    inarrowfieldbordersinmeter, imovingaveragewindow,
#                                    lstfieldIDs, lstcroptypeids, iminimumfieldareainsquaremeters, imaximumfieldareainsquaremeters, verbose=True)
#             if do_s2:
#                 makeS2TrainingData(szcroplayer, szyyyycropyear, szshapefile, szyyyymmddfirst, szyyyymmddlast, 
#                                    inarrowfieldbordersinmeter, lstnarrowedfieldvalidscenevalues, inarrowedfieldminpctareavalid, 
#                                    iextendfieldbordersinmeter, lstextendedfieldvalidscenevalues, iextendedfieldminpctareavalid, 
#                                    maxdip, maxdif, maxgap, maxpas, 
#                                    lstfieldIDs, lstcroptypeids, iminimumfieldareainsquaremeters, imaximumfieldareainsquaremeters, verbose=True)
# 
# #
# #
# #
# if __name__ == '__main__':
#     zemain()

