#
#
#
import logging
import os
import sys
import traceback
import pandas
import geopandas

from s2_clean import wicconfig
from s2_clean import cleans1timeseries
from s2_clean import cleans2timeseries

#
#
#
def pseudolayer(pseudolayerId):
    szparameterversion = ''
    pseudolayerparams  = None
    pseudolayerconfig  = None
    dopseudolayer      = False
    if 'dcpseudolayers' not in wicconfig.wiccleanconfig:
        raise Exception("wiccclean - wicconfig.wiccleanconfig 'dcpseudolayers'parameter missing")
    else:    
        if pseudolayerId not in wicconfig.wiccleanconfig['dcpseudolayers']:
            logging.warning("wiccclean - pseudolayer not configured: %s" % (str(pseudolayerId), ) )
        else:
            pseudolayerconfig = wicconfig.wiccleanconfig['dcpseudolayers'][pseudolayerId]
            if 'dcpseudolayers' not in wicconfig.wiccleanparams:
                raise Exception("wiccclean - wicconfig.wiccleanparams 'dcpseudolayers'parameter missing")
            else:    
                if pseudolayerId not in wicconfig.wiccleanparams['dcpseudolayers']:
                    logging.warning("wiccclean - pseudolayer not parameterized: %s" % (str(pseudolayerId), ) )
                else:
                    pseudolayerparams = wicconfig.wiccleanparams['dcpseudolayers'][pseudolayerId]
                    if 'szversion' not in pseudolayerparams:
                        logging.warning("wiccclean - pseudolayer 'szversion' parameter missing: %s" % (str(pseudolayerId), ) )
                    else:
                        szparameterversion = str(pseudolayerparams['szversion'])
                        if 'bdoclean' not in pseudolayerparams:
                            logging.warning("wiccclean - pseudolayer 'bdoclean' parameter missing: %s" % (str(pseudolayerId), ) )
                        else:
                            if pseudolayerparams['bdoclean']:
                                dopseudolayer = True
                    
    return (dopseudolayer, pseudolayerconfig, pseudolayerparams, szparameterversion, pseudolayerId) 


#
#    find date ranges
#
def _firstandlast(currentfirst, currentlast, datetimeindex):
    first = currentfirst if (currentfirst and currentfirst < datetimeindex[0].date())  else datetimeindex[0].date()
    last  = currentlast  if (currentlast  and currentlast  > datetimeindex[-1].date()) else datetimeindex[-1].date()
    #print ("curr from %s - curr till %s - new from %s - new till %s - from %s - till %s" % (currentfirst, currentlast, datetimeindex[0].date(), datetimeindex[-1].date(), first, last ))
    return (first, last) 

#
#
#
def wicCleanParcel_S1(pidx, geodataframe):
    """
    """
    #
    #    S1_CLEAN
    #
    dopseudolayer, pseudolayerconfig, pseudolayerparams, szparameterversion, pseudolayerId = pseudolayer('S1_CLEAN')

    if not dopseudolayer:
        logging.info("wiccclean - wicCleanParcel_S1 - parcel (%s) - skipping %s" % (str(pidx), str(pseudolayerId)) )
        return

    logging.info("wiccclean - wicCleanParcel_S1 - parcel (%s) - pseudolayer(%s) starts" % (str(pidx), str(pseudolayerId)) )

    try:
        #
        #
        #
        inarrowfieldbordersinmeter = pseudolayerconfig['inarrowfieldbordersinmeter']
        #
        #    read S1 layers from workspace CSV values
        #
        szs1ascendingVV_pandasdataframe  = pandas.read_csv(wicconfig.wicworkspace.parcelRawDataSimpleTimeseriesCSV(pidx, "S1_GRD_SIGMA0_ASCENDING_VV",     inarrowfieldbordersinmeter), index_col=0, parse_dates=True, float_precision='round_trip')
        szs1ascendingVH_pandasdataframe  = pandas.read_csv(wicconfig.wicworkspace.parcelRawDataSimpleTimeseriesCSV(pidx, "S1_GRD_SIGMA0_ASCENDING_VH",     inarrowfieldbordersinmeter), index_col=0, parse_dates=True, float_precision='round_trip')
        szs1ascendingIA_pandasdataframe  = pandas.read_csv(wicconfig.wicworkspace.parcelRawDataSimpleTimeseriesCSV(pidx, "S1_GRD_SIGMA0_ASCENDING_ANGLE",  inarrowfieldbordersinmeter), index_col=0, parse_dates=True, float_precision='round_trip')
        szs1descendingVV_pandasdataframe = pandas.read_csv(wicconfig.wicworkspace.parcelRawDataSimpleTimeseriesCSV(pidx, "S1_GRD_SIGMA0_DESCENDING_VV",    inarrowfieldbordersinmeter), index_col=0, parse_dates=True, float_precision='round_trip')
        szs1descendingVH_pandasdataframe = pandas.read_csv(wicconfig.wicworkspace.parcelRawDataSimpleTimeseriesCSV(pidx, "S1_GRD_SIGMA0_DESCENDING_VH",    inarrowfieldbordersinmeter), index_col=0, parse_dates=True, float_precision='round_trip')
        szs1descendingIA_pandasdataframe = pandas.read_csv(wicconfig.wicworkspace.parcelRawDataSimpleTimeseriesCSV(pidx, "S1_GRD_SIGMA0_DESCENDING_ANGLE", inarrowfieldbordersinmeter), index_col=0, parse_dates=True, float_precision='round_trip')

        szs1ascendingVV_pandasdataframe.sort_index(inplace=True)  ; first_date, last_date = _firstandlast(None,       None,      szs1ascendingVV_pandasdataframe.index)
        szs1ascendingVH_pandasdataframe.sort_index(inplace=True)  ; first_date, last_date = _firstandlast(first_date, last_date, szs1ascendingVH_pandasdataframe.index)
        szs1ascendingIA_pandasdataframe.sort_index(inplace=True)  ; first_date, last_date = _firstandlast(first_date, last_date, szs1ascendingIA_pandasdataframe.index)
        szs1descendingVV_pandasdataframe.sort_index(inplace=True) ; first_date, last_date = _firstandlast(first_date, last_date, szs1descendingVV_pandasdataframe.index)
        szs1descendingVH_pandasdataframe.sort_index(inplace=True) ; first_date, last_date = _firstandlast(first_date, last_date, szs1descendingVH_pandasdataframe.index)
        szs1descendingIA_pandasdataframe.sort_index(inplace=True) ; first_date, last_date = _firstandlast(first_date, last_date, szs1descendingIA_pandasdataframe.index)

        seriesdict = { 
            pidx : {
                'ASCENDING' : {
                    'VV' : szs1ascendingVV_pandasdataframe.iloc[:,0],
                    'VH' : szs1ascendingVH_pandasdataframe.iloc[:,0],
                    'IA' : szs1ascendingIA_pandasdataframe.iloc[:,0]
                    },
                'DESCENDING' : {
                    'VV' : szs1descendingVV_pandasdataframe.iloc[:,0],
                    'VH' : szs1descendingVH_pandasdataframe.iloc[:,0],
                    'IA' : szs1descendingIA_pandasdataframe.iloc[:,0]
                    }
                }
            }

        imovingaveragewindow = int(pseudolayerparams['imovingaveragewindow']) # more parameterchecks?

        s1timeseriesobj  = cleans1timeseries.S1TimeSeriesFromDataSeries( pidx, seriesdict, first_date, last_date, verbose = True )
        s1timeseriesdict = s1timeseriesobj.getTimeSeriesData(imovingaveragewindow)[pidx]
    
        pandas.DataFrame(index=s1timeseriesdict['ASCENDING']['VV'].index,   columns=[pidx], data=s1timeseriesdict['ASCENDING']['VV'].values).to_csv(wicconfig.wicworkspace.parcelResDataTimeseriesCSV(pidx,   'S1_GRD_SIGMA0_ASCENDING_VV',     szparameterversion))
        pandas.DataFrame(index=s1timeseriesdict['ASCENDING']['VH'].index,   columns=[pidx], data=s1timeseriesdict['ASCENDING']['VH'].values).to_csv(wicconfig.wicworkspace.parcelResDataTimeseriesCSV(pidx,   'S1_GRD_SIGMA0_ASCENDING_VH',     szparameterversion))
        pandas.DataFrame(index=s1timeseriesdict['ASCENDING']['IA'].index,   columns=[pidx], data=s1timeseriesdict['ASCENDING']['IA'].values).to_csv(wicconfig.wicworkspace.parcelResDataTimeseriesCSV(pidx,   'S1_GRD_SIGMA0_ASCENDING_ANGLE',  szparameterversion))
        pandas.DataFrame(index=s1timeseriesdict['ASCENDING']['RVI'].index,  columns=[pidx], data=s1timeseriesdict['ASCENDING']['RVI'].values).to_csv(wicconfig.wicworkspace.parcelResDataTimeseriesCSV(pidx,  'S1_GRD_SIGMA0_ASCENDING_RVI',    szparameterversion))
        pandas.DataFrame(index=s1timeseriesdict['DESCENDING']['VV'].index,  columns=[pidx], data=s1timeseriesdict['DESCENDING']['VV'].values).to_csv(wicconfig.wicworkspace.parcelResDataTimeseriesCSV(pidx,  'S1_GRD_SIGMA0_DESCENDING_VV',    szparameterversion))
        pandas.DataFrame(index=s1timeseriesdict['DESCENDING']['VH'].index,  columns=[pidx], data=s1timeseriesdict['DESCENDING']['VH'].values).to_csv(wicconfig.wicworkspace.parcelResDataTimeseriesCSV(pidx,  'S1_GRD_SIGMA0_DESCENDING_VH',    szparameterversion))
        pandas.DataFrame(index=s1timeseriesdict['DESCENDING']['IA'].index,  columns=[pidx], data=s1timeseriesdict['DESCENDING']['IA'].values).to_csv(wicconfig.wicworkspace.parcelResDataTimeseriesCSV(pidx,  'S1_GRD_SIGMA0_DESCENDING_ANGLE', szparameterversion))
        pandas.DataFrame(index=s1timeseriesdict['DESCENDING']['RVI'].index, columns=[pidx], data=s1timeseriesdict['DESCENDING']['RVI'].values).to_csv(wicconfig.wicworkspace.parcelResDataTimeseriesCSV(pidx, 'S1_GRD_SIGMA0_DESCENDING_RVI',   szparameterversion))

    except Exception as e:
        logging.error (traceback.format_exc())
        logging.error("wiccclean - wicCleanParcel_S1 - parcel (%s) - pseudolayer(%s) failed." % (str(pidx), str(pseudolayerId)) )

#
#
#
def wicCleanParcel_S2(pidx, geodataframe):
    """
    """
    #
    #    S2_CLEAN
    #
    dopseudolayer, pseudolayerconfig, pseudolayerparams, szparameterversion, pseudolayerId = pseudolayer('S2_CLEAN')

    if not dopseudolayer:
        logging.info("wiccclean - wicCleanParcel_S2 - parcel (%s) - skipping %s" % (str(pidx), str(pseudolayerId)) )
        return

    logging.info("wiccclean - wicCleanParcel_S2 - parcel (%s) - pseudolayer(%s) starts" % (str(pidx), str(pseudolayerId)) )

    try:
        #
        #    S2 clean
        #
        narrowedfielddata_pandasdataframe  = pandas.read_csv(wicconfig.wicworkspace.parcelRawDataSimpleTimeseriesCSV(pidx, "S2_FAPAR",               pseudolayerconfig['inarrowfieldbordersinmeter']), index_col=0, parse_dates=True, float_precision='round_trip')
        narrowedfieldscene_pandasdataframe = pandas.read_csv(wicconfig.wicworkspace.parcelRawDataHistoTimeseriesCSV(pidx,  "S2_SCENECLASSIFICATION", pseudolayerconfig['inarrowfieldbordersinmeter']), index_col=0, parse_dates=True, float_precision='round_trip')

        extendedfieldscene_pandasdataframes = []
        for ibuffersinmeter in pseudolayerconfig['lstiextendfieldbordersinmeter']:
            extendedfieldscene_pandasdataframe = pandas.read_csv(wicconfig.wicworkspace.parcelRawDataHistoTimeseriesCSV(pidx, "S2_SCENECLASSIFICATION", ibuffersinmeter), index_col=0, parse_dates=True, float_precision='round_trip')
            extendedfieldscene_pandasdataframes.append(extendedfieldscene_pandasdataframe)

        narrowedfielddata_pandasdataframe.sort_index(inplace=True)      ; first_date, last_date = _firstandlast(None,       None,      narrowedfielddata_pandasdataframe.index)
        narrowedfieldscene_pandasdataframe.sort_index(inplace=True)     ; first_date, last_date = _firstandlast(first_date, last_date, narrowedfieldscene_pandasdataframe.index)
        for extendedfieldscene_pandasdataframe in extendedfieldscene_pandasdataframes:
            extendedfieldscene_pandasdataframe.sort_index(inplace=True) ; first_date, last_date = _firstandlast(first_date, last_date, extendedfieldscene_pandasdataframe.index)


        framesdict = { 
                'DATA'     : narrowedfielddata_pandasdataframe.iloc[:,0],
                'SCENE'    : narrowedfieldscene_pandasdataframe,
                'EXSCENES' : extendedfieldscene_pandasdataframes
                }

        s2timeseries = cleans2timeseries.S2TimeSeriesFromDataFrames( 'S2_FAPAR', pidx, framesdict, first_date, last_date, verbose = True)
        s2timeseriesdict = s2timeseries.getTimeSeriesData(
            pseudolayerparams['lstnarrowedfieldvalidscenevalues'], pseudolayerparams['inarrowedfieldminpctareavalid'],
            pseudolayerparams['lstlstextendedfieldvalidscenevalues'], pseudolayerparams['lstiextendedfieldminpctareavalid'],
            pseudolayerparams['localminimamaxdip'], pseudolayerparams['localminimamaxdif'], pseudolayerparams['localminimamaxgap'], pseudolayerparams['localminimamaxpas'])
        s2timeseriesdict['S2_FAPAR']['ORIGN'].to_csv(wicconfig.wicworkspace.parcelResDataTimeseriesCSV(pidx, 'S2_FAPAR_ORIGN', szparameterversion))
        s2timeseriesdict['S2_FAPAR']['CLEAN'].to_csv(wicconfig.wicworkspace.parcelResDataTimeseriesCSV(pidx, 'S2_FAPAR_CLEAN', szparameterversion))
        s2timeseriesdict['S2_FAPAR']['FLAGS'].to_csv(wicconfig.wicworkspace.parcelResDataTimeseriesCSV(pidx, 'S2_FAPAR_FLAGS', szparameterversion))

    except Exception as e:
        logging.error (traceback.format_exc())
        logging.error("wiccclean - wicCleanParcel_S2 - parcel (%s) - pseudolayer(%s) failed." % (str(pidx), str(pseudolayerId)) )

#
#
#
def wicCleanParcel(pidx, geodataframe):
    """
    """

    logging.info("wiccclean - wicCleanParcel -  processing all/new data parcel (%s) starts" % (str(pidx),) )

    #
    #    S1 "clean"
    #
    if True:
        wicCleanParcel_S1(pidx, geodataframe)

    #
    #    S2 "clean"
    #
    if True:
        wicCleanParcel_S2(pidx, geodataframe)

#
#
#
def wicClean():
    """
    cleaning (or processing) collection of raw data - all parameters are expected to be available from 
    wicconfig.wiccleanconfig  
    wicconfig.wiccleanparams  
    wicconfig.wicworkspace class defining the workspace organization 
    """
    #
    #    read the shape file into geopandas.geodataframe.GeoDataFrame
    #
    szparcelsshapefile  = wicconfig.wiccollectconfig['szparcelsshapefile']
    logging.info("wiccclean - wicClean - using shape file: %s" % (str(szparcelsshapefile), ))    
    parcelsgeodataframe = geopandas.read_file(szparcelsshapefile)
    #
    #    no parcels no fun
    #
    if parcelsgeodataframe.empty:
        logging.warning ("wiccclean - wicClean - exits. no fields selected from parcels ShapeFile %s", (szparcelsshapefile,))
        return
    #
    #    our CropSAR reference files do contain 'fieldID'
    #    in any case, the index must be unique, since we'll use it to specify SubDirectories
    #
    #    TODO: check if their string representation is unique
    #
    if 'fieldID' in parcelsgeodataframe.columns:
        parcelsgeodataframe.set_index( 'fieldID', inplace=True)
    
    if not parcelsgeodataframe.index.is_unique:
        raise Exception('parcels DataFrame Index has duplicate keys')

    logging.info("wiccclean - wicClean - shape file specifies %s parcels" % (len(parcelsgeodataframe.index), ))    
    #
    #    check/create the workspace raw data directory
    #
    if not wicconfig.wicworkspace.parcelsResDataPathExists():
        if not wicconfig.wicworkspace.parcelsResDataPathCreate():
            raise Exception("workspace results data path could not be created '%s'" % (str(wicconfig.wicworkspace.parcelsResDataPath()), ))
        logging.info("wiccclean - wicClean - workspace results data path created: %s" % (str(wicconfig.wicworkspace.parcelsResDataPath()), ))
    else:
        logging.info("wiccclean - wicClean - workspace has existing results data path: %s" % (str(wicconfig.wicworkspace.parcelsResDataPath()), ))
    #
    #    process all/new data for all parcels in the parcelsgeodataframe
    #
    for pidx in parcelsgeodataframe.index:
        #
        #    verify the workspace does contain input data for this parcel - i know, this is not generic
        #
        if not wicconfig.wicworkspace.parcelRawDataPathExists(pidx):
            logging.warning("wiccclean - wicClean - skipping parcel(%s) - workspace raw data path does not exist: %s" % (str(pidx), str(wicconfig.wicworkspace.parcelRawDataPath(pidx)), ))
            continue
        #
        #    check/create the workspace - res data directory for this parcel (pidx)
        #
        if not wicconfig.wicworkspace.parcelResDataPathExists(pidx):
            if not wicconfig.wicworkspace.parcelResDataPathCreate(pidx):
                #
                #    TODO: do we want to continue with next parcel or abandon completely ?
                #
                raise Exception("workspace results data path for parcel(%s) could not be created '%s'" % (str(pidx), str(wicconfig.wicworkspace.parcelResDataPath(pidx)), ))
            logging.info("wiccclean - wicClean - workspace results data path for parcel(%s) created: %s" % (str(pidx), str(wicconfig.wicworkspace.parcelResDataPath(pidx)), ))
        else:
            logging.info("wiccclean - wicClean - workspace results data path for parcel(%s) exists: %s" % (str(pidx), str(wicconfig.wicworkspace.parcelResDataPath(pidx)), ))
        #
        #
        #
        wicCleanParcel(pidx, parcelsgeodataframe)

#
#
#
def main(argv=sys.argv):
    #
    #    TODO: 
    #    - argument parsing etc.

    #
    #    check if the workspace directory exists
    #
    if not wicconfig.wicworkspace.workspacePathExists(): 
        raise Exception("workspace root directory does not exist '%s'" % (str(wicconfig.wicworkspace.workspacePath()), ))

    #
    #
    #
    logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname).4s %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
    logfilehandler = logging.FileHandler(os.path.join(wicconfig.wicworkspace.workspacePath(), "wicclean.log"))
    logfilehandler.setFormatter(logging.Formatter('%(asctime)s %(levelname).4s %(message)s', datefmt='%Y-%m-%d %H:%M:%S'))
    logging.getLogger().addHandler(logfilehandler)
    #
    #    TODO: 
    #    - setup & check workspace config
    #    - setup & check workspace params
    #

    #
    #    remark: dates not yet used
    #
    szyyyymmddfirst = None
    szyyyymmddlast  = None  
    #
    #
    #
    if szyyyymmddfirst is not None:
        wicconfig.wiccleanparams['datetimedatefirst'] = pandas.to_datetime(szyyyymmddfirst).date()
    else:
        wicconfig.wiccleanparams['datetimedatefirst'] = None
    #
    #
    #
    if szyyyymmddlast is not None:
        wicconfig.wiccleanparams['datetimedatelast'] = pandas.to_datetime(szyyyymmddlast).date()
    else:
        wicconfig.wiccleanparams['datetimedatelast'] = None
    #
    #
    #
    wicClean()

#
#
#
if __name__ == '__main__':
    main()
