from pyspark.sql.types import *
from cropsar.readers import *
from cropsar.preprocessing.utils import *
import configparser
import sys

log = logging.getLogger(__name__)

def getDataStacks(fieldID, inputData, timesteps, outputResolution=1, S1smoothing=None, takeOutRate=None,
                  NRTfraction=None, S2layername='FAPAR', S1var='gamma', S2scaling=None):
    """
    Generate dataStacks round all valid FAPAR points in a time series

    :param fieldID: unique fieldID as present in the input data
    :param inputData: dictionary containing the S1 and S2 data for all fields
    :param timesteps: amount of timesteps (days) before and after each POI to extract
    :param outputResolution: temporal resolution (in days) of output
    :param S1smoothing: whether or not to apply S1 smoothing (None or amount of days of moving window)
    :param takeOutRate: Max rate used to determine how many valid FAPAR observations should be removed [0-1]. [Use None for no removal]
    :param NRTfraction: the fraction of generated datastacks which should be stopped at a random point after POI to simulate NRT data
    :param S2layername: layername of S2 data [FAPAR, FCOVER]
    :param S1var: which varible the backscatter represents [gamma, sigma]
    :param S2scaling: optional manual scaling range for S2 values
    :return: rdd containing the outputs
    """

    # Get the FAPAR TS
    S2data = pd.DataFrame.copy(inputData['S2'][S2layername][fieldID], deep=True)

    # Check if there's enough S2 data, for now 3 points at least
    # Otherwise we skip this field and return
    if np.sum(S2data.notnull()) < 3:
        log.warning('Not enough S2 data points! Skipping ...')
        return (None,None)

    max = None

    # Look for timesteps where we have a valid S2data
    # We disregard the first one, since we need forward imputed values for the procedure
    candidatePoints = S2data.loc[S2data.notnull()].iloc[1:]

    # Get the datetime index of these points
    candidatePoints = candidatePoints.index.tolist()

    outputs = pd.DataFrame.copy(S2data.loc[candidatePoints], deep=True)
    outputs = minmaxscaler(outputs, 's2_' + S2layername.lower(), range=S2scaling)

    # Set seed for reproducability
    np.random.seed(5)

    # Generate input stack
    inputs = generate_input_datastack(S2data, candidatePoints, S1smoothing, fieldID, inputData, max, outputResolution,
                                      timesteps, NRTfraction=NRTfraction, takeOutRate=takeOutRate,
                                      S2layername=S2layername, S1var=S1var, fortraining=True, S2scaling=S2scaling)

    log.info('Field {} fully processed!'.format(fieldID))

    return (inputs,outputs)


def getFullDataStacks(fieldID, inputData, startDate, endDate, timesteps, outputResolution=1,
                         S1smoothing=None, useAfter=True, S2layername='FAPAR', S1var='gamma'):
        """
        Similar function as original getDataStacks, but returns full time series instead of only around valid FAPAR points
        new parameters:

        :param startDate: start date of the returned dataStack
        :param endDate: end date of the returned dataStack
        :param useAfter: whether or not to use available observations past specified endDate; for gap-filling application,
                        this can be useful; for testing purposes, we want to end the time series and not use future observations
        :param S2layername: layername of S2 data [FAPAR, FCOVER]
        :param S1var: which varible the backscatter represents [gamma, sigma]
        """

        # Get the S2data TS
        S2data = inputData['S2'][S2layername][fieldID]

        # Check if we need to end TS at certain moment
        if useAfter is False:
            log.warning('"useAfter" parameter disabled: cutting off data after {} ...'.format(endDate))
            max = endDate
        else:
            max = None

        S2data = S2data.loc[:max]

        # Get the outputs
        outputs = pd.DataFrame.copy(S2data, deep=True)
        outputs = minmaxscaler(outputs, 's2_' + S2layername.lower())

        # Now we construct the DatetimeIndex
        finalIndex = pd.date_range(start=startDate, end=endDate, freq=str(outputResolution) + 'D')

        # If we don't request daily resolution, we have to reindex the outputs
        if outputResolution != 1:
            # Reindex based on the constructed index; for the new index, the previous valid observation will be used,
            # as long as the difference with the actual observation date does not exceed the required temporal resolution
            # In the example of 5-daily outputs, a certain index will use the previous valid observation as long as it
            # was acquired no longer than 5 days ago
            outputs = outputs[outputs.notnull()].reindex(index=finalIndex, method='ffill',
                                                         tolerance=str(outputResolution) + 'D')
        else:
            # Just cut out period of interest
            outputs = outputs.loc[startDate:endDate]

        # We need to create datastacks around all indexes
        POIs = list(finalIndex)

        inputs = generate_input_datastack(S2data, POIs, S1smoothing, fieldID, inputData, max, outputResolution,
                                          timesteps, S2layername=S2layername, S1var=S1var)

        log.info('Field {} fully processed!'.format(fieldID))

        return inputs, outputs, finalIndex

def generate_input_datastack(S2series, POIs, S1smoothing, fieldID, inputData, max, outputResolution,
                                     timesteps, NRTfraction=None, takeOutRate=None,
                                     S2layername='FAPAR', S1var='gamma', fortraining=False, S2scaling=None):
    '''
    Function that constructs the input datastacks around a series of S2 data points.
    :param S2series: input S2 time series
    :param POIs: list of indexes used as center points around which to build the datastacks
    :param S1smoothing: smoothing window for S1 [use None for no smoothing]
    :param fieldID: identifier of the field
    :param inputData: full input data dictionary
    :param max: end date on which to cutoff all time series [use None for no end date]
    :param outputResolution: amount of days to use for each timestep [default should be 1]
    :param timesteps: amount of timesteps to use before and after the POI [default should be 150]
    :param NRTfraction: amount of POIs to use in NRT fashion, where all time series after POI are cut off
    :param takeOutRate: maximum relative amount of valid S2 points that are removed to simulate cloudy regions [use None for no removal]
    :param S2layername: S2 variable to use ['FAPAR', 'FCOVER']
    :param S1var: ['gamma','sigma']
    :param fortraining: indicate whether datastack is generated for training purposes, in which case FAPAR POIs will be removed
    :param S2scaling: optional manual scaling range for S2 values
    :return:
    '''
    def reindex(timeseries, index, dateStart, dateEnd):
        # Now we center around our POI; NaN are automatically appended where needed
        timeseries = timeseries.loc[dateStart:dateEnd].reindex(index, method='ffill').reset_index(drop=True)
        # Fill the NaN values
        return timeseries.ffill().fillna(value=0.).values

    log.info('Building datastack for CropSAR input ...')

    # We find the first valid S2 data index in the TS
    firstValid = S2series.loc[S2series.notnull()].index[0]
    inputs = np.empty(
        (len(POIs), int(timesteps * 2 / outputResolution) + 1, 3)) * np.nan  # 2 S1 vars, 1 S2 var

    # If Sentinel-1 time series are delivered in seperate orbit passes, we first need to combine them
    # todo: currently, the training data is still provided as separate orbits, so for now, we need this for compatibility
    if 'ASCENDING' in inputData['S1'].keys():
        inputData['S1'] = combine_s1_orbits(inputData['S1'], fieldID)

    variables = ['VV', 'VH']
    i = 0
    S1_timeseries = {
        'VV': {},
        'VH': {},
    }

    log.info('Preprocessing S1 time series ...')
    for variable in variables:
        S1_currentTS = inputData['S1'][variable][fieldID]

        # End the TS at specific index if requested
        S1_currentTS = S1_currentTS.loc[:max]

        # Get the incidence angle
        incidenceAngleTS = inputData['S1']['incidenceAngle'][fieldID]
        incidenceAngleTS = incidenceAngleTS.loc[:max]

        # Specify whether we use the S1 scalers for smoothed values or not
        if S1smoothing is None or S1smoothing < 11:
            S1layername = 's1_' + variable.lower()
        else:
            S1layername = 's1_' + variable.lower() + '_smooth'
        log.info('S1 scaler used: ' + S1layername)

        # Preprocess S1 time series
        S1_currentTS = preprocess_sentinel1(S1_currentTS, S1layername, incidenceAngleTS, S1smoothing, S1var=S1var)

        # We cut off the days before the first valid FAPAR because that part of the TS is not useable
        S1_currentTSsubset = S1_currentTS.loc[firstValid:]

        S1_timeseries[variable][fieldID] = S1_currentTSsubset
        i += 1

    # Create the index for the time series to be acquired as symmetrical amount of timesteps around POI dates
    log.info('Creating indexes ...')
    deltaT = pd.Timedelta(str(timesteps) + ' days')
    dateStart = pd.DatetimeIndex(POIs) - deltaT
    dateEnd = pd.DatetimeIndex(POIs) + deltaT
    ix = [pd.date_range(start=pd.to_datetime(start), end=pd.to_datetime(end),
                        freq=str(outputResolution) + 'D') for start, end in
          zip(dateStart.tolist(), dateEnd.tolist())]

    #######################
    # Create the S1 stack #
    #######################

    i = 0
    log.info('Creating S1 stack ...')
    for variable in variables:
        # Get the TS
        S1_currentTSsubset = S1_timeseries[variable][fieldID]
        S1_currentTSsubset = [reindex(S1_currentTSsubset, index, start, end)
                              for index, start, end in zip(ix, dateStart, dateEnd)]

        # Put in datastack
        inputs[:, :, i] = S1_currentTSsubset

        i += 1

    #######################
    # Create the S2 stack #
    #######################

    log.info('Creating S2 stack ...')

    # Make a copy of the current S2 TS
    S2_currentTSCopy = pd.DataFrame.copy(S2series, deep=True).loc[firstValid:]

    # Check if need to end TS at certain moment
    S2_currentTSCopy = S2_currentTSCopy.loc[:max]

    # Now we need to expand the TS for proper deltaObs calculation
    # todo: is this still needed?
    expandedIndex = pd.date_range(start=S2_currentTSCopy.index[0] - deltaT,
                                  end=S2_currentTSCopy.index[-1] + deltaT,
                                  freq=str(outputResolution) + 'D')
    S2_currentTSCopy = S2_currentTSCopy.reindex(expandedIndex)

    # Copy the S2 time series as many times as needed
    S2_currentTSCopy = pd.DataFrame(index=S2_currentTSCopy.index,
                                    data=np.repeat(np.expand_dims(S2_currentTSCopy.values, axis=1), len(POIs),
                                                   axis=1))

    # If we are in training phase, we need to remove most of the S2data on the POIs because we need to learn to predict them!
    # and while we're on it, we allow for removing additional S2 points for simulation of more clouds and sparser series
    if fortraining:  # We're in training phase, should use the loop for now
        log.info('-'*75)
        print('Takeoutrate: {}'.format(takeOutRate))
        for i in range(len(POIs)):
            print('POI: {}'.format(POIs[i]))
            # There's a 90% chance we will remove the S2 data at this POI
            if np.random.random() <= 0.9:
                print('Removing the S2 point for prediction ...')
                S2_currentTSCopy.iloc[:, i].loc[POIs[i]] = np.nan
            else: print('Keeping the S2 point for prediction ...')

            # Remove additional S2data observations if needed
            if takeOutRate is not None:

                # Get the indexes of valid FAPARs
                S2_tstemp = pd.Series.copy(S2_currentTSCopy.iloc[:, i], deep=True)
                validFAPARindex = S2_tstemp[S2_tstemp.notnull()].index.tolist()

                # If our POI survived previous removal round, we should give it a fair chance and leave it in here
                if POIs[i] in validFAPARindex: validFAPARindex.remove(POIs[i])

                # Determine how many FAPARs we'll remove
                removalRatio = np.random.choice(np.arange(int(100*takeOutRate)))

                # Determine which Obs will be removed
                toBeRemoved = np.random.choice(validFAPARindex, size=int(removalRatio/100 * len(validFAPARindex)),
                                               replace=False)
                print(
                    'Randomly removing {} of {} valid S2data points ...'.format(len(toBeRemoved), len(validFAPARindex)))
                S2_tstemp.loc[toBeRemoved] = np.nan
                S2_currentTSCopy.iloc[:, i] = S2_tstemp


    # FIRST rescale and THEN fill with zeroes
    S2_currentTS_filled = minmaxscaler(S2_currentTSCopy, 's2_' + S2layername.lower(), range=S2scaling).ffill().fillna(value=0.)

    # Put in datastack
    inputs[:, :, 2] = [reindex(S2_currentTS_filled.iloc[:, i], index, start, end)
                       for index, start, end, i in
                       zip(ix, dateStart, dateEnd, np.arange(S2_currentTS_filled.shape[1]))]

    # If we specify that we want a certain amount of NRT datastacks, we need to remove the 'future part' of the stack here
    # Since we don't want to ONLY train on the NRT day itself, but also on everything in between gap-filling and pure NRT
    # we need to stop the time series at a random point in between
    if NRTfraction:
        def ffill(arr):
            # Function to foward fill numpy array
            mask = np.isnan(arr)
            idx = np.where(~mask, np.arange(mask.shape[1]), 0)
            np.maximum.accumulate(idx, axis=1, out=idx)
            out = arr[np.arange(idx.shape[0])[:, None], idx]
            return out
        log.info('-'*75)
        for i in range(len(POIs)):
            print('POI: {}'.format(POIs[i]))
            if np.random.random() <= NRTfraction:
                # We want to simulate NRT in this profile
                print('Simulating NRT ...')
                cutMoment = np.random.choice(
                    np.arange(int(timesteps / outputResolution) + 1, int(timesteps * 2 / outputResolution) + 1))
                # Despite all this, we should enforce actually quite some real NRT examples
                if np.random.random() <= 0.33: # One in three of these cases is NRT enforced
                    cutMoment = int(timesteps / outputResolution) + 1
                print('Cut moment: {}'.format(cutMoment))
                # Fill the rest of the time series with 0
                for variable in range(inputs.shape[2]): inputs[i, cutMoment:, variable] = 0.

                # In the case of S2 inputs, we want somewhat earlier that all obs are gone, because this is where we
                # have most problems
                if np.random.random() <= 0.5:  # In half of the cases, this is done
                    # Choose a moment between POI minus 3 months and the cutmoment
                    extracutmoment = np.random.choice(np.arange(int(timesteps / outputResolution) - 90, cutMoment))
                    print('Stopping S2 even earlier at cut moment: {}'.format(extracutmoment))
                    inputs[i, extracutmoment:, 2] = np.nan # First set everything past this point to nan
                    inputs[:, :, 2] = ffill(inputs[:, :, 2]) # then forward fill
                    inputs[i, cutMoment:, 2] = 0. # finally set from original cutmoment onward to zero

    return inputs

def convert_row(tuple):
    '''
    helper function to transform an input-output tuple into a SPARK SQL RDD ROW
    :param tuple: (fieldID, (inputs, outputs))
    :return:
    '''
    field_id  = tuple[0]
    outputs   = float(tuple[1][1])
    s1_data   = tuple[1][0][ :, 0:2].tolist()
    s2_data   = tuple[1][0][ :, 2:3].tolist()
    return Row(field_id=field_id, outputs=outputs, s1_data=s1_data,s2_data=s2_data)

def compute_and_write_tfrecords(field_subset, timeseries_df, outputResolution, output_basename,
                                timesteps, S1smoothing, takeOutRate, NRTfraction,
                                S2layername='FAPAR', S1var='gamma', nrparts=200, S2scaling=None):
    '''
    Function to start the SPARK processing for building datastacks and saving them to TFrecord files
    :param field_subset:
    :param timeseries_df:
    :param outputResolution:
    :param output_basename:
    :param timesteps:
    :param S1smoothing:
    :param takeOutRate:
    :param NRTfraction:
    :param S2layername:
    :param S1var:
    :return:
    '''

    field_subset.sort()
    field_subset = list(map(lambda x: str(x), field_subset))
    fields_df = SparkSession.builder.getOrCreate().createDataFrame(field_subset, schema=StringType())
    filtered = fields_df.join(timeseries_df,fields_df.value == timeseries_df.name,'inner').drop(fields_df.value)

    if S2scaling is not None:
        S2scaling = [float(S2scaling[0]), float(S2scaling[1])]
        log.warning('Manual S2 scaling range provided !! {}'.format(S2scaling))

    log.info('Splitting the job in {} parts ...'.format(nrparts))

    ## Repartition for efficient processing on the executors
    timeseries = filtered.repartition(max(2,int(len(field_subset)/nrparts)))

    ## Generate Inputs on SPARK
    input_output_rdd = timeseries.rdd \
        .map(lambda row: (row.name, getDataStacks(row.name, df_row_to_dict(row.name, row, S2layername), timesteps,
                                                  outputResolution=outputResolution, S1smoothing=S1smoothing,
                                                  takeOutRate=takeOutRate, NRTfraction=NRTfraction,
                                                  S2layername=S2layername, S1var=S1var, S2scaling=S2scaling)))\
        .filter(lambda t:t[1][0] is not None) \
        .flatMapValues(lambda input_output_tuple:list(zip(input_output_tuple[0],input_output_tuple[1]))) \
        .map(convert_row)
    output_df = input_output_rdd.toDF()

    # Write to output files
    output_df.repartition(nrparts).write.format("tfrecords").mode('overwrite').option("recordType", "SequenceExample") \
        .option("codec", "org.apache.hadoop.io.compress.GzipCodec").save(output_basename)

def parse_params(S1smoothing, outputResolution, takeOutRate, timesteps, minArea, NRTfraction):
    '''
    Function to parse parameters and building output pattern for file naming
    :param S1smoothing:
    :param outputResolution:
    :param takeOutRate:
    :param timesteps:
    :param minArea:
    :param NRTfraction:
    :return:
    '''
    if S1smoothing: S1smoothing = int(S1smoothing)
    S1smoothingParam = '_S1smoothing' + str(S1smoothing) + 'd' if S1smoothing is not None else ''
    takeOutRateParam = '_takeOutRate' + str(takeOutRate).replace(".","") if takeOutRate is not None else ''
    minAreaParam = '_minArea' + str(minArea) if minArea else ''
    NRTfractionParam = '_NRTfraction' + str(NRTfraction).replace(".","") if NRTfraction else ''
    outPattern = '_multiCrop_allFields_DataStack_' + str(
        outputResolution) + 'd' + S1smoothingParam + takeOutRateParam + \
                 minAreaParam + NRTfractionParam + '_' + str(timesteps) + 'd' + '_'
    return S1smoothing, outPattern

def main_parquet(indir = '/data/CropSAR/tmp/dataStackGenerationSPARK/data/parquetFiles/',
                 outDir = '/data/CropSAR/tmp/dataStackGenerationSPARK/data/dataStacks/',
                 s2_path=None, year='2018', timesteps=150, outputResolution=1,
                 S1smoothing = None, takeOutRate=None, minArea = None,
                 NRTfraction=None, S2layername='FAPAR', S1var='gamma', nroutputfiles=200, logFile=None,
                 S2scaling=None):
    """
    Convert timeseries parquet files into a datastack that can be used for training a Neural Network.
    validation, calibration and test directories are generated.

    :param indir:  Input directory containing Parquet files
    :param outdir: Output directory where to write
    :param year: The year for which to generate data stacks
    :param timesteps:
    :param outputResolution:
    :param S1smoothing: The size of the moving window (in days) for S1 smoothing. Not specifying this parameter implies no smoothing. If specified, should be one of [12,30,60]
    :param takeOutScale: Exponential function parameter that determined how many additional FAPARs should be removed
    :param minArea: if specified, this is the minimum area in m² for a parcel to be included in the datastack
    :param NRTfraction: if specified [0-1], this is the fraction of datastacks in which part of the future part of the time series is removed for NRT specific training
    :param S2layername: layername of S2 data [FAPAR, FCOVER]
    :param S1var: which bakcscatter time series are used [sigma, gamma]
    :param S2scaling: optional manual scaling range (min,max) to use for rescaling the S2 values
    :return: No result
    """

    # Make sure pathlib.Path objects are converted to strings if needed
    indir = str(indir)
    outDir = str(outDir)
    s2_path = str(s2_path) if s2_path is not None else None

    # Initiate ini with settings
    config = configparser.ConfigParser()
    config['Data'] = {
        'inputDir': indir[7:] if indir[0:4] == 'file' else indir,
        'outputDir': outDir[7:] if outDir[0:4] == 'file' else outDir,
        's2Path': s2_path if s2_path is not None else os.path.join(indir, 'S2_FAPAR.parquet'),
        'year': str(year),
    }

    config['Parameters'] = {
        'timesteps': str(timesteps),
        'outputResolution': str(outputResolution),
        'S1smoothing': str(S1smoothing),
        'takeOutRate': str(takeOutRate),
        'minArea': str(minArea),
        'NRTfraction': str(NRTfraction),
        'S2scaling': str(S2scaling) if S2scaling is not None else 'default',
        'CAL/VAL/TEST': indir
    }

    S1smoothing, outPattern = parse_params(S1smoothing, outputResolution, takeOutRate, timesteps, minArea, NRTfraction)

    # Create the outDir if needed
    os.makedirs(outDir[7:], exist_ok=True) if outDir[0:4] == 'file' else os.makedirs(outDir, exist_ok=True)

    # Initiate spark
    sc = get_spark_context()

    # Initiate logging
    log = get_logger(name="cropsar_datastacks", filename=logFile)

    log.info('Creating datastacks for the year(s): {}'.format(year))
    log.info('DATASTACK TYPE: RNN')

    calvaltestfile = os.path.join(indir, 'croptypes_calvaltest.parquet')
    CAL_fields, VAL_fields, TEST_fields= load_fields(calvaltestfile)

    ##### Subset the fields on minimum area if requested
    if minArea:
        log.info('Subsetting fields on minimum area of {} m² ...'.format(minArea))
        fields = pd.read_parquet(os.path.join(indir, 'croptypes_calvaltest.parquet'))
        areasCAL = fields.loc[CAL_fields]
        CAL_fields = areasCAL.loc[areasCAL['area'] >= minArea].index.tolist()
        areasVAL = fields.loc[VAL_fields]
        VAL_fields = areasVAL.loc[areasVAL['area'] >= minArea].index.tolist()
        areasTEST = fields.loc[TEST_fields]
        TEST_fields = areasTEST.loc[areasTEST['area'] >= minArea].index.tolist()

    log.info('Nr of CAL fields: {}'.format(len(CAL_fields)))
    log.info('Nr of VAL fields: {}'.format(len(VAL_fields)))
    log.info('Nr of TEST fields: {}'.format(len(TEST_fields)))

    config['NrFields'] = {
        'CAL': len(CAL_fields),
        'VAL': len(VAL_fields),
        'TEST': len(TEST_fields)
    }

    #read all of the timeseries input data, as a PySpark Dataframe
    timeseries_df = read_full_timeseries_input(indir, s2_path=s2_path, S2layername=S2layername)

    log.info('Done reading data.')
    output_basename = os.path.join(outDir, str(year) + outPattern)
    log.info('Basename: {}'.format(output_basename))

    # Save the ini file
    configFile = output_basename[7:] + '.ini' if output_basename[0:4] == 'file' else output_basename + '.ini'
    if os.path.exists(configFile): os.remove(configFile)
    with open(configFile, 'w') as f: config.write(f)
    log.info('config.ini file saved to: {}'.format(configFile))

    timeseries_df.persist()

    # Process the calibration stacks
    log.info('-'*75)
    log.info('Creating calibration timeseries.')
    compute_and_write_tfrecords(CAL_fields, timeseries_df, outputResolution, output_basename + 'CAL_',
                                timesteps,S1smoothing, takeOutRate, NRTfraction,
                                S2layername=S2layername, S1var=S1var, nrparts=nroutputfiles, S2scaling=S2scaling)

    # Process the calibration stacks
    log.info('-' * 75)
    log.info('Creating validation timeseries.')
    compute_and_write_tfrecords(VAL_fields, timeseries_df, outputResolution, output_basename + 'VAL_',
                                timesteps, S1smoothing, takeOutRate, NRTfraction,
                                S2layername=S2layername, S1var=S1var, nrparts=nroutputfiles, S2scaling=S2scaling)

    # Process the calibration stacks
    log.info('-' * 75)
    log.info('Creating test timeseries.')
    compute_and_write_tfrecords(TEST_fields, timeseries_df, outputResolution, output_basename + 'TEST_',
                                timesteps, S1smoothing, takeOutRate, NRTfraction,
                                S2layername=S2layername, S1var=S1var, nrparts=nroutputfiles, S2scaling=S2scaling)
    timeseries_df.unpersist()

    log.info('-' * 80)
    log.info('DATASTACK GENERATION PROGRAM SUCCESSFULLY FINISHED!')
    log.info('-' * 80)

if __name__ == '__main__':
    print('RUNNING AS MAIN - ONLY FOR TEST PURPOSES!')

    main_parquet(
        indir='/home/kristofvt/git/cropsar/tests/testresources/parquet',
        s2_path='/home/kristofvt/git/cropsar/tests/testresources/parquet/fapar.parquet',
        outDir='/home/kristofvt/tests/',
        S1smoothing=13,
        takeOutRate=0.8,
        minArea=10000,
        NRTfraction=0.15,
        nroutputfiles=2
    )