#
#
#
import logging
import os
import sys
import pandas
import geopandas
import threading
import datetime
import dateutil
import time
import traceback

from . import wicconfig
from . import tsservice


#
#    thread querying simple time series
#
class SimpleTimeSeriesThread(threading.Thread):
    """
    """

    #
    #
    #
    def __init__(self, fieldId, szlayername, geodataframe, szyyyymmddfirst, szyyyymmddlast, imaxretries, isleepseconds, dataclient, verbose = True):
        threading.Thread.__init__(self)
        #
        #
        #
        self.fieldId          = fieldId
        self.szlayername      = szlayername
        self.geodataframe     = geodataframe
        self.szyyyymmddfirst  = szyyyymmddfirst
        self.szyyyymmddlast   = szyyyymmddlast
        self.imaxretries      = imaxretries
        self.isleepseconds    = isleepseconds
        self.dataclient       = dataclient
        self.verbose          = verbose
        #
        #
        #
        if self.verbose: logging.info("SimpleTimeSeriesThread - init - Parcel (%s) Layer(%s) %s - %s" % (self.fieldId, self.szlayername, self.szyyyymmddfirst, self.szyyyymmddlast))

    #
    #
    #
    def run(self):
        #
        #
        #
        attempt = 0
        while True:
            #
            #
            #
            datetime_tick_attempt = datetime.datetime.now()
            #
            #
            #
            attempt += 1
            #
            #
            #
            try:
                if self.verbose: logging.info("SimpleTimeSeriesThread - exec - Parcel (%s) Layer(%s) - attempt(%s) starts" % (self.fieldId, self.szlayername, attempt))
                #
                #
                #
                self.data = self.dataclient.get_timeseries(self.geodataframe.loc[self.fieldId], self.szlayername, self.szyyyymmddfirst, self.szyyyymmddlast)

            except Exception:
                self.data = None
                if self.verbose: logging.info (traceback.format_exc())

            #
            #
            #
            datetime_tock_attempt = datetime.datetime.now()
            #
            #
            #
            if (self.data is not None):
                if self.verbose: logging.info("SimpleTimeSeriesThread - exec - Parcel (%s) Layer(%s) - attempt(%s) success (%s seconds)" % (self.fieldId, self.szlayername, attempt, int((datetime_tock_attempt-datetime_tick_attempt).total_seconds())))
                return 

            #
            #    if data series was not (yet) obtained: consider retry
            #
            if (self.imaxretries <= attempt):
                logging.error("SimpleTimeSeriesThread - exec - Parcel (%s) Layer(%s) - attempt(%s) failed (%s seconds) - abend" % (self.fieldId, self.szlayername, attempt, int((datetime_tock_attempt-datetime_tick_attempt).total_seconds())))
                return

            #
            #    wait a while
            #
            logging.error("SimpleTimeSeriesThread - exec - Parcel (%s) Layer(%s) - attempt(%s) failed (%s seconds) - retry in %s seconds" % (self.fieldId, self.szlayername, attempt, int((datetime_tock_attempt-datetime_tick_attempt).total_seconds()), self.isleepseconds * attempt))
            time.sleep(self.isleepseconds * attempt)

#
#    thread querying histogram time series
#
class HistoTimeSeriesThread(threading.Thread):
    """
    """

    #
    #
    #
    def __init__(self, fieldId, szlayername, geodataframe, szyyyymmddfirst, szyyyymmddlast, imaxretries, isleepseconds, dataclient, verbose = True):
        threading.Thread.__init__(self)
        #
        #
        #
        self.fieldId          = fieldId
        self.szlayername      = szlayername
        self.geodataframe     = geodataframe
        self.szyyyymmddfirst  = szyyyymmddfirst
        self.szyyyymmddlast   = szyyyymmddlast
        self.imaxretries      = imaxretries
        self.isleepseconds    = isleepseconds
        self.dataclient       = dataclient
        self.verbose          = verbose
        #
        #
        #
        if self.verbose: logging.info("HistoTimeSeriesThread - init - Parcel (%s) Layer(%s) %s - %s" % (self.fieldId, self.szlayername, self.szyyyymmddfirst, self.szyyyymmddlast))

    #
    #
    #
    def run(self):
        #
        #
        #
        attempt = 0
        while True:
            #
            #
            #
            datetime_tick_attempt = datetime.datetime.now()
            #
            #
            #
            attempt += 1
            #
            #
            #
            try:
                if self.verbose: logging.info("HistoTimeSeriesThread - exec - Parcel (%s) Layer(%s) - attempt(%s) starts" % (self.fieldId, self.szlayername, attempt))
                #
                #
                #
                self.data = self.dataclient.get_histogram(self.geodataframe.loc[self.fieldId],  self.szlayername, self.szyyyymmddfirst, self.szyyyymmddlast)

            except Exception:
                self.data = None
                if self.verbose: logging.info (traceback.format_exc())

            #
            #
            #
            datetime_tock_attempt = datetime.datetime.now()
            #
            #
            #
            if (self.data is not None):
                if self.verbose: logging.info("HistoTimeSeriesThread - exec - Parcel (%s) Layer(%s) - attempt(%s) success (%s seconds)" % (self.fieldId, self.szlayername, attempt, int((datetime_tock_attempt-datetime_tick_attempt).total_seconds())))
                return 

            #
            #    if data series was not (yet) obtained: consider retry
            #
            if (self.imaxretries <= attempt):
                logging.error("HistoTimeSeriesThread - exec - Parcel (%s) Layer(%s) - attempt(%s) failed (%s seconds) - abend" % (self.fieldId, self.szlayername, attempt, int((datetime_tock_attempt-datetime_tick_attempt).total_seconds())))
                return

            #
            #    wait a while
            #
            logging.error("HistoTimeSeriesThread - exec - Parcel (%s) Layer(%s) - attempt(%s) failed (%s seconds) - retry in %s seconds" % (self.fieldId, self.szlayername, attempt, int((datetime_tock_attempt-datetime_tick_attempt).total_seconds()), self.isleepseconds * attempt))
            time.sleep(self.isleepseconds * attempt)

#
#
#
def wicCollectParcelSimpleTimeSeriesData(pidx, geodataframe, layerId, lsibuffersinmeter, szlayername, dataclient):
    """
    """

    logging.info("wiccollect - wicCollectParcelSimpleTimeSeriesData - parcel (%s) - layer (%s) - starts" % (str(pidx), str(layerId)) )

    #
    #
    #
    for ibuffer in lsibuffersinmeter:

        time.sleep(10) # give others a chance
        logging.info("wiccollect - wicCollectParcelSimpleTimeSeriesData - parcel (%s) - layer name (%s) - buffer (%s)" % (str(pidx), str(szlayername), str(ibuffer)) )

        #
        #
        #
        first_date_available = None
        last_date_available  = None
        olddata_pandasseries = None

        if wicconfig.wicworkspace.parcelRawDataSimpleTimeseriesCSVExists(pidx, layerId, ibuffer):
            #
            #    the csv for the specified product (layerId) for specified parcel (pidx) already exists
            #
            olddata_pandasdataframe = pandas.read_csv(wicconfig.wicworkspace.parcelRawDataSimpleTimeseriesCSV(pidx, layerId, ibuffer), index_col=0, parse_dates=True, float_precision='round_trip')
            olddata_pandasdataframe.sort_index(inplace=True)                 # should be obsolete
            first_date_available = olddata_pandasdataframe.index[0].date()   # <class 'datetime.date'>
            last_date_available  = olddata_pandasdataframe.index[-1].date()  # <class 'datetime.date'>
            olddata_pandasseries = olddata_pandasdataframe.iloc[:, 0]
            logging.info("wiccollect - wicCollectParcelSimpleTimeSeriesData - parcel (%s) - layer name (%s) - buffer (%s) - existing data: from: %s till: %s registrations: %s" % (str(pidx), str(szlayername), str(ibuffer), first_date_available, last_date_available, len(olddata_pandasdataframe.index)))

        #
        #    which defaults policy - we should install a very large committee to discuss this for several years
        #
        if wicconfig.wiccollectparams['datetimedatelast'] is None:
            last_query_date = datetime.date.today()
            logging.info("wiccollect - wicCollectParcelSimpleTimeSeriesData - parcel (%s) - layer name (%s) - buffer (%s) - required till date not specified. defaults to: %s" % (str(pidx), str(szlayername), str(ibuffer), last_query_date))
        else:
            last_query_date = wicconfig.wiccollectparams['datetimedatelast']  # should be  <class 'datetime.date'>

        if wicconfig.wiccollectparams['datetimedatefirst'] is None:
            if last_date_available is None:
                first_query_date = last_query_date.replace(day=1) + dateutil.relativedelta.relativedelta(months=-1)
            else:
                first_query_date = last_date_available + dateutil.relativedelta.relativedelta(days=+1)
            logging.info("wiccollect - wicCollectParcelSimpleTimeSeriesData - parcel (%s) - layer name (%s) - buffer (%s) - required from date not specified. defaults to: %s" % (str(pidx), str(szlayername), str(ibuffer), first_query_date))
        else:
            first_query_date = wicconfig.wiccollectparams['datetimedatefirst'] 

        logging.info("wiccollect - wicCollectParcelSimpleTimeSeriesData - parcel (%s) - layer name (%s) - buffer (%s) - required data: from: %s till: %s" % (str(pidx), str(szlayername), str(ibuffer), first_query_date, last_query_date))

        #
        #    which update/overwrite/ignore policy - another committee needed
        #
        if (last_query_date <  first_query_date):
            logging.info("wiccollect - wicCollectParcelSimpleTimeSeriesData - parcel (%s) - layer name (%s) - buffer (%s) - required data: from: %s till: %s - ignored." % (str(pidx), str(szlayername), str(ibuffer), first_query_date, last_query_date))
        else:
            #
            #    apply buffer if present
            #
            pidxgeodataframe = geodataframe.loc[pidx:pidx].copy()                    # copy geometry for this parcel
            if ibuffer != 0:
                pidxgeodataframe = pidxgeodataframe.to_crs('epsg:32631')   # UTM-31
                #pidxgeodataframe = pidxgeodataframe.to_crs('epsg:3857')    # geometry to "Web_Mercator" (meter-based) (gives small differences with UTM-31, but is more general)
                pidxgeodataframe['geometry'] = pidxgeodataframe.buffer(ibuffer)      # apply buffer
                pidxgeodataframe = pidxgeodataframe.to_crs('epsg:4326')    # back to EPSG:4326 (WGS 84) to access data-client

            simpleTimeSeriesThread = SimpleTimeSeriesThread(pidx, szlayername, pidxgeodataframe, first_query_date.strftime('%Y%m%d'), last_query_date.strftime('%Y%m%d'), 10, 600, dataclient)
            simpleTimeSeriesThread.run()
            newdata_pandasseries = simpleTimeSeriesThread.data
            if newdata_pandasseries is None:
                logging.warning("wiccollect - wicCollectParcelSimpleTimeSeriesData - parcel (%s) - layer name (%s) - buffer (%s) - obtain data:   from: %s till: %s - failed/no data available" % (str(pidx), str(szlayername), str(ibuffer), first_query_date, last_query_date))

            else:
                newdata_pandasseries.sort_index(inplace=True)               # probably obsolete, but not with all dataclient.tsservice interfaces
                first_date_newdata = newdata_pandasseries.index[0].date()   # <class 'datetime.date'>
                last_date_newdata  = newdata_pandasseries.index[-1].date()  # <class 'datetime.date'>

                logging.info("wiccollect - wicCollectParcelSimpleTimeSeriesData - parcel (%s) - layer name (%s) - buffer (%s) - obtained data: from: %s till: %s registrations: %s" % (str(pidx), str(szlayername), str(ibuffer), first_date_newdata, last_date_newdata, len(newdata_pandasseries.index)))

                if olddata_pandasseries is not None:
                    #
                    #    merge old and new data. old will be overwritten where new available
                    #    BEWARE : this can give inconsistencies in case old data contains (invalid, obsolete, ...) registrations, not present in new data
                    #
                    index  = newdata_pandasseries.index.union(olddata_pandasseries.index)
                    data   = [ newdata_pandasseries.loc[i] if i in newdata_pandasseries.index else olddata_pandasseries.loc[i]  for i in index]
                    newdata_pandasseries = pandas.Series(index=index, data=data, name=pidx) 
                    first_date_newdata   = newdata_pandasseries.index[0].date()   # <class 'datetime.date'>
                    last_date_newdata    = newdata_pandasseries.index[-1].date()  # <class 'datetime.date'>
                
                pandas.DataFrame(index=newdata_pandasseries.index, columns=[pidx], data=newdata_pandasseries.values).to_csv(wicconfig.wicworkspace.parcelRawDataSimpleTimeseriesCSV(pidx, layerId, ibuffer))

                logging.info("wiccollect - wicCollectParcelSimpleTimeSeriesData - parcel (%s) - layer name (%s) - buffer (%s) - updated  data: from: %s till: %s registrations: %s" % (str(pidx), str(szlayername), str(ibuffer), first_date_newdata, last_date_newdata, len(newdata_pandasseries.index)))

#
#
#
def wicCollectParcelHistoTimeSeriesData(pidx, geodataframe, layerId, lsibuffersinmeter, szlayername, dataclient):
    """
    """

    logging.info("wiccollect - wicCollectParcelHistoTimeSeriesData - parcel (%s) - layer (%s) - starts" % (str(pidx), str(layerId)) )

    #
    #
    #
    for ibuffer in lsibuffersinmeter:

        time.sleep(10) # give others a chance
        logging.info("wiccollect - wicCollectParcelHistoTimeSeriesData - parcel (%s) - layer name (%s) - buffer (%s)" % (str(pidx), str(szlayername), str(ibuffer)) )

        #
        #
        #
        first_date_available    = None
        last_date_available     = None
        olddata_pandasdataframe = None
        
        if wicconfig.wicworkspace.parcelRawDataHistoTimeseriesCSVExists(pidx, layerId, ibuffer):
            #
            #    the csv for the specified product (layerId) for specified parcel (pidx) already exists
            #
            olddata_pandasdataframe = pandas.read_csv(wicconfig.wicworkspace.parcelRawDataHistoTimeseriesCSV(pidx, layerId, ibuffer), index_col=0, parse_dates=True, dtype=int) #float_precision='round_trip')
            olddata_pandasdataframe.columns = olddata_pandasdataframe.columns.map(float) # 'force' back to dtype='float64' (otherwise  dtype='object')
            olddata_pandasdataframe.sort_index(inplace=True)                             # should be obsolete
            first_date_available = olddata_pandasdataframe.index[0].date()               # <class 'datetime.date'>
            last_date_available  = olddata_pandasdataframe.index[-1].date()              # <class 'datetime.date'>
            logging.info("wiccollect - wicCollectParcelHistoTimeSeriesData - parcel (%s) - layer name (%s) - buffer (%s) - existing data: from: %s till: %s registrations: %s" % (str(pidx), str(szlayername), str(ibuffer), first_date_available, last_date_available, len(olddata_pandasdataframe.index)))

        #
        #    which defaults policy - we should install a very large committee to discuss this for several years
        #
        if wicconfig.wiccollectparams['datetimedatelast'] is None:
            last_query_date = datetime.date.today()
            logging.info("wiccollect - wicCollectParcelHistoTimeSeriesData - parcel (%s) - layer name (%s) - buffer (%s) - required till date not specified. defaults to: %s" % (str(pidx), str(szlayername), str(ibuffer), last_query_date))
        else:
            last_query_date = wicconfig.wiccollectparams['datetimedatelast']  # should be  <class 'datetime.date'>

        if wicconfig.wiccollectparams['datetimedatefirst'] is None:
            if last_date_available is None:
                first_query_date = last_query_date.replace(day=1) + dateutil.relativedelta.relativedelta(months=-1)
            else:
                first_query_date = last_date_available + dateutil.relativedelta.relativedelta(days=+1)
            logging.info("wiccollect - wicCollectParcelHistoTimeSeriesData - parcel (%s) - layer name (%s) - buffer (%s) - required from date not specified. defaults to: %s" % (str(pidx), str(szlayername), str(ibuffer), first_query_date))
        else:
            first_query_date = wicconfig.wiccollectparams['datetimedatefirst'] 

        logging.info("wiccollect - wicCollectParcelHistoTimeSeriesData - parcel (%s) - layer name (%s) - buffer (%s) - required data: from: %s till: %s" % (str(pidx), str(szlayername), str(ibuffer), first_query_date, last_query_date))

        #
        #    which update/overwrite/ignore policy - another committee needed
        #
        if (last_query_date <  first_query_date):
            logging.info("wiccollect - wicCollectParcelHistoTimeSeriesData - parcel (%s) - layer name (%s) - buffer (%s) - required data: from: %s till: %s - ignored." % (str(pidx), str(szlayername), str(ibuffer), first_query_date, last_query_date))
        else:
            #
            #    apply buffer if present
            #
            pidxgeodataframe = geodataframe.loc[pidx:pidx].copy()                    # copy geometry for this parcel
            if ibuffer != 0:
                pidxgeodataframe = pidxgeodataframe.to_crs('epsg:32631')   # UTM-31
                #pidxgeodataframe = pidxgeodataframe.to_crs('epsg:3857')    # geometry to "Web_Mercator" (meter-based) (gives small differences with UTM-31, but is more general)
                pidxgeodataframe['geometry'] = pidxgeodataframe.buffer(ibuffer)      # apply buffer
                pidxgeodataframe = pidxgeodataframe.to_crs('epsg:4326')    # back to EPSG:4326 (WGS 84) to access data-client

            histoTimeSeriesThread = HistoTimeSeriesThread(pidx, szlayername, pidxgeodataframe, first_query_date.strftime('%Y%m%d'), last_query_date.strftime('%Y%m%d'), 10, 600, dataclient)
            histoTimeSeriesThread.run()
            newdata_pandasdataframe = histoTimeSeriesThread.data
            if newdata_pandasdataframe is None:
                logging.warning("wiccollect - wicCollectParcelHistoTimeSeriesData - parcel (%s) - layer name (%s) - buffer (%s) - obtain data:   from: %s till: %s - failed/no data available" % (str(pidx), str(szlayername), str(ibuffer), first_query_date, last_query_date))

            else:
                newdata_pandasdataframe.columns = newdata_pandasdataframe.columns.map(float) # 'force' to dtype='float64' (just to be sure)
                newdata_pandasdataframe.sort_index(inplace=True)                             # probably obsolete, but not with all dataclient.tsservice interfaces
                first_date_newdata = newdata_pandasdataframe.index[0].date()                 # <class 'datetime.date'>
                last_date_newdata  = newdata_pandasdataframe.index[-1].date()                # <class 'datetime.date'>

                logging.info("wiccollect - wicCollectParcelHistoTimeSeriesData - parcel (%s) - layer name (%s) - buffer (%s) - obtained data: from: %s till: %s registrations: %s" % (str(pidx), str(szlayername), str(ibuffer), first_date_newdata, last_date_newdata, len(newdata_pandasdataframe.index)))

                if olddata_pandasdataframe is not None:
                    #
                    #    merge old and new data. old will be overwritten where new available
                    #    BEWARE : this can give inconsistencies in case old data contains (invalid, obsolete, ...) registrations, not present in new data
                    #    TODO: test if it does what we want it to do - e.g. what if NaN column occurs
                    #
                    index     = newdata_pandasdataframe.index.union(olddata_pandasdataframe.index)
                    columns   = newdata_pandasdataframe.columns.union(olddata_pandasdataframe.columns)
                    dataframe = pandas.DataFrame(index=index, columns=columns, dtype=int)
                    dataframe.loc[olddata_pandasdataframe.index, :] = olddata_pandasdataframe
                    dataframe.loc[newdata_pandasdataframe.index, :] = newdata_pandasdataframe
                    newdata_pandasdataframe = dataframe
                    first_date_newdata   = newdata_pandasdataframe.index[0].date()   # <class 'datetime.date'>
                    last_date_newdata    = newdata_pandasdataframe.index[-1].date()  # <class 'datetime.date'>
                
                newdata_pandasdataframe = newdata_pandasdataframe.fillna(0)
                newdata_pandasdataframe = newdata_pandasdataframe.astype(int)
                newdata_pandasdataframe.to_csv(wicconfig.wicworkspace.parcelRawDataHistoTimeseriesCSV(pidx, layerId, ibuffer))

                logging.info("wiccollect - wicCollectParcelHistoTimeSeriesData - parcel (%s) - layer name (%s) - buffer (%s) - updated  data: from: %s till: %s registrations: %s" % (str(pidx), str(szlayername), str(ibuffer), first_date_newdata, last_date_newdata, len(newdata_pandasdataframe.index)))

#
#
#
def wicCollectParcel(pidx, geodataframe):
    """
    collect/update raw data for the parcel specified (pidx)
    :param pidx : unique parcel index value
    :param geodataframe : geopandas.geodataframe.GeoDataFrame containing the parcel with this index value
    """

    logging.info("wiccollect - wicCollectParcel - raw data collection/update parcel (%s) starts" % (str(pidx),) )

    #
    #
    #
    for layerId in wicconfig.wiccollectparams['dcrawdatalayers'] :
        if not layerId in wicconfig.wiccollectconfig['dcrawdatalayers'] :
            #
            #   layerId must be known in parameters and configuration 
            #
            logging.warning("wiccollect - wicCollectParcel - raw data collection/update parcel (%s) - layer not configured: %s" % (str(pidx), str(layerId)) )
            continue

        collectParamsDict = wicconfig.wiccollectparams['dcrawdatalayers'][layerId]
        collectConfigDict = wicconfig.wiccollectconfig['dcrawdatalayers'][layerId]

        if not collectParamsDict['bdocollect']:
            logging.info("wiccollect - wicCollectParcel - raw data collection/update parcel (%s) - skipping %s" % (str(pidx), str(layerId)) )
            continue

        logging.info("wiccollect - wicCollectParcel - raw data collection/update parcel (%s) - collect  %s" % (str(pidx), str(layerId)) )

        lsibuffersinmeter = collectParamsDict['lsibuffersinmeter']               
   
        bdohistogram      = collectConfigDict['bdohistogram']
        szlayername       = collectConfigDict['szlayername']
        endpoint          = collectConfigDict['endpoint']

        dataclient        = tsservice.DataClient.for_probav(endpoint=endpoint)

        if bdohistogram:
            wicCollectParcelHistoTimeSeriesData(pidx, geodataframe, layerId, lsibuffersinmeter, szlayername, dataclient)
        else:
            wicCollectParcelSimpleTimeSeriesData(pidx, geodataframe, layerId, lsibuffersinmeter, szlayername, dataclient)

#
#
#
def wicCollect():
    """
    collection of raw data - all parameters are expected to be available from 
    wicconfig.wiccollectconfig dictionary specifying the properties of the raw data to be collected 
    wicconfig.wiccollectparams dictionary specifying the specific parameters for the raw data to be collected 
    wicconfig.wicworkspace class defining the workspace organization 
    """
    #
    #    read the shape file into geopandas.geodataframe.GeoDataFrame
    #
    szparcelsshapefile  = wicconfig.wiccollectconfig['szparcelsshapefile']
    logging.info("wiccollect - wicCollect - using shape file: %s" % (str(szparcelsshapefile), ))    
    parcelsgeodataframe = geopandas.read_file(szparcelsshapefile)
    #
    #    no parcels no fun
    #
    if parcelsgeodataframe.empty:
        logging.warning ("wiccollect - wicCollect - exits. no fields selected from parcels ShapeFile %s", (szparcelsshapefile,))
        return
    #
    #    our CropSAR reference files do contain 'fieldID'
    #    in any case, the index must be unique, since we'll use it to specify SubDirectories
    #
    #    TODO: check if their string representation is unique
    #
    if 'fieldID' in parcelsgeodataframe.columns:
        parcelsgeodataframe.set_index( 'fieldID', inplace=True)
    
    if not parcelsgeodataframe.index.is_unique:
        raise Exception('parcels DataFrame Index has duplicate keys')

    logging.info("wiccollect - wicCollect - shape file specifies %s parcels" % (len(parcelsgeodataframe.index), ))    
    
    #parcelsgeodataframe = parcelsgeodataframe.loc['000028044A31C991':'000028044A31C991']

    #
    #    check/create the workspace raw data directory
    #
    if not wicconfig.wicworkspace.parcelsRawDataPathExists():
        if not wicconfig.wicworkspace.parcelsRawDataPathCreate():
            raise Exception("workspace raw data path could not be created '%s'" % (str(wicconfig.wicworkspace.parcelsRawDataPath()), ))
        logging.info("wiccollect - wicCollect - workspace raw data path created: %s" % (str(wicconfig.wicworkspace.parcelsRawDataPath()), ))
    else:
        logging.info("wiccollect - wicCollect - workspace has existing raw data path: %s" % (str(wicconfig.wicworkspace.parcelsRawDataPath()), ))
    #
    #    process all/new data for all parcels in the parcelsgeodataframe
    #
    for pidx in parcelsgeodataframe.index:
        #
        #    check/create the workspace - raw data directory for this parcel (pidx)
        #
        if not wicconfig.wicworkspace.parcelRawDataPathExists(pidx):
            if not wicconfig.wicworkspace.parcelRawDataPathCreate(pidx):
                #
                #    TODO: do we want to continue with next parcel or abandon completely ?
                #
                raise Exception("workspace raw data path for parcel(%s) could not be created '%s'" % (str(pidx), str(wicconfig.wicworkspace.parcelRawDataPath(pidx)), ))
            logging.info("wiccollect - wicCollect - workspace raw data path for parcel(%s) created: %s" % (str(pidx), str(wicconfig.wicworkspace.parcelRawDataPath(pidx)), ))
        else:
            logging.info("wiccollect - wicCollect - workspace raw data path for parcel(%s) exists: %s" % (str(pidx), str(wicconfig.wicworkspace.parcelRawDataPath(pidx)), ))
        #
        #
        #
        wicCollectParcel(pidx, parcelsgeodataframe)

#
#
#
def main(argv=sys.argv):
    #
    #    TODO: 
    #    - argument parsing etc.

    #
    #    check if the workspace directory exists
    #
    if not wicconfig.wicworkspace.workspacePathExists(): 
        raise Exception("workspace root directory does not exist '%s'" % (str(wicconfig.wicworkspace.workspacePath()), ))

    #
    #
    #
    logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname).4s %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
    logfilehandler = logging.FileHandler(os.path.join(wicconfig.wicworkspace.workspacePath(), "wiccollect.log"))
    logfilehandler.setFormatter(logging.Formatter('%(asctime)s %(levelname).4s %(message)s', datefmt='%Y-%m-%d %H:%M:%S'))
    logging.getLogger().addHandler(logfilehandler)
    #
    #    TODO: 
    #    - setup & check workspace config
    #    - setup & check workspace params
    #

    #
    #
    #
    szyyyymmddfirst                  = None
    szyyyymmddlast                   = None
# 
#     szyyyymmddfirst                  = "20150601"
#     szyyyymmddlast                   = "20161231"
# 
#     szyyyymmddfirst                  = "20160601"
#     szyyyymmddlast                   = "20171231"
# 
#     szyyyymmddfirst                  = "20170601"
#     szyyyymmddlast                   = "20181231"

    #
    #
    #
    if szyyyymmddfirst is not None:
        wicconfig.wiccollectparams['datetimedatefirst'] = pandas.to_datetime(szyyyymmddfirst).date()
    else:
        pass
        #wicconfig.wiccollectparams['datetimedatefirst'] = None
    #
    #
    #
    if szyyyymmddlast is not None:
        wicconfig.wiccollectparams['datetimedatelast'] = pandas.to_datetime(szyyyymmddlast).date()
    else:
        pass
        #wicconfig.wiccollectparams['datetimedatelast'] = None
    #
    #
    #
    wicCollect()

#
#
#
if __name__ == '__main__':
    main()
