#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 24 07:02:01 2022

@author: bertelsl

Algorithm:
    
    - search for S1, S2 files in the pre-defined input directories:
        
        'S1dir': r'/data/EEA_HRL_VLCC/data/ref/lucas/LUCAS2018/LUCAS2018_CREO_BACKSCATTER/'
        'S2dir': r'/data/EEA_HRL_VLCC/data/ref/lucas/LUCAS2018/LUCAS2018_SHUB/'
        
    - use satio to composite and interpolate the data over the full time period 2017/09/01 - 2019/03/31 
   
    - save the pre-processed data to the output directory as netcdf, only for the time period of interest 2018/03/01 - 2018/08/31:
         
        'fOutdir': r'/data/EEA_HRL_VLCC/user/luc/data/LUCAS2018/01_LUCAS_preprocessed/',
        
Version: 31/08/2022

"""

import os
import glob
import netCDF4
import json
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from satio.timeseries import Timeseries
from satio.features import multitemporal_speckle
from datetime import datetime

def _to_db(pwr):
    # Helper function to convert power units to dB
    return 10 * np.log10(pwr)

def _to_pwr(db):
    # Helper function to convert dB to power units
    return np.power(10, db / 10)

#================================================================================================================
class cLUCASpreprocessing(object):
#================================================================================================================
    def __init__(self, Info):        
#================================================================================================================
        
        self.fOutdir = Info['fOutdir']
        self.S1dir = Info['S1dir']
        self.S2dir = Info['S2dir']
        self.start_date_interpolate = Info['start_date_interpolate']
        self.start_end_interpolate = Info['end_date_interpolate']
        self.start_date_focus = Info['start_date_focus']
        self.end_date_focus = Info['end_date_focus']
        self.full_period = Info['full_periods']
        self.fullT = pd.date_range(self.start_date_interpolate, periods=Info['full_periods'])
        self.focusT = pd.date_range(self.start_date_focus, periods=Info['focus_periods'])
        self.overwrite = Info['overwrite']
        
        if not os.path.isdir(self.fOutdir):
            os.mkdir(self.fOutdir)
 
#================================================================================================================
    def start_processing(self):        
#================================================================================================================

        aS1dirs = glob.glob(self.S1dir + 'Lucas *')
        aS1dirs.sort()
        aS2dirs = glob.glob(self.S2dir + 'Lucas *')
        aS2dirs.sort()

        for S1dir in aS1dirs:

            basename = os.path.basename(S1dir)
            basename = basename.split(' creodias')[0]
            basename = basename.split('.')[0]
            
            fOut = os.path.join(self.fOutdir, 'S1_'+basename+'.nc')

            if os.path.isfile(fOut):
                if self.overwrite:
                    os.remove(fOut)
                else:
                    continue
            
            print('\r Processing: {}'.format(basename), end='                                                                                                                       ')

            if self.read_labels(S1dir, basename):
                self.process_S1_data(S1dir, fOut)

        print('')

        # search for S1 en S2 files
        for S2dir in aS2dirs:

            basename = os.path.basename(S2dir)
            basename = basename.split(' terrascope')[0]
            basename = basename.split('.')[0]
            
            fOut = os.path.join(self.fOutdir, 'S2_'+basename+'.nc')
            
            if os.path.isfile(fOut):
                if self.overwrite:
                    os.remove(fOut)
                else:
                    continue
            
            print('\r Processing: {}'.format(basename), end='                                                                                                                       ')

            if self.read_labels(S2dir, basename):
                self.process_S2_data(S2dir, fOut)

#================================================================================================================
    def read_labels(self, Sdir, basename):        
#================================================================================================================
        
        fInfo = glob.glob(os.path.join(Sdir, 'group_*.json'))
        
        if len(fInfo) == 0:
            return False
        
        try:
            data = json.load(open(fInfo[0]))        
        except:
            print('*** ERROR: failed to read {}'.format(fInfo))
            raise
            
        aFeatures = data["features"]
        
        self.aLabels = []
        self.aIDs = []
        
        for feature in aFeatures:
            self.aLabels.append(feature['properties']['LC1_LABEL'])
            self.aIDs.append(feature['properties']['POINT_ID'])
                
        return True
    
#================================================================================================================
    def process_S1_data(self, S1dir, fOut):        
#================================================================================================================

        xInput = self.read_data('S1', S1dir)

        ''' ### Processing radar bands ###'''
        # Select the VV and VH bands (assumed to be in dB range!)
        radar_bands = [b for b in xInput.variables if 'VV' in b or 'VH' in b]
        band_data = xInput[radar_bands]

        # Make a Satio Timeseries out of this
        raw_data = band_data.to_array(dim='band').transpose('band', 't', 'feature').values.astype(np.float32)  # Radar data is in float
        timestamps = band_data.t.values
        band_names = radar_bands
        raw_data_expanded = np.expand_dims(raw_data, axis=-1)
        
        ts = Timeseries(data=raw_data_expanded, timestamps=timestamps, bands=band_names)

        # Before doing manupulations
        # first need to get rid of dB
        data_lin = _to_pwr(ts.data)
        ts.data = data_lin 

        # Compositing: use S1-specific frequency, window and a mean compositing operation
        composite_settings = dict(
            freq=10,
            window=20,
            mode='mean',
            start=pd.to_datetime(ts.timestamps[0]).strftime('%Y-%m-%d'),
            end=pd.to_datetime(ts.timestamps[-1]).strftime('%Y-%m-%d')
        )
        ts_composited = ts.composite(**composite_settings)
        
        # Linear interpolation
        S1_ts_interpolated = ts_composited.interpolate()
        
        # Finally we can now go back to dB
        ts_interpolated_db = _to_db(S1_ts_interpolated.data)
        
        # And override the data array
        S1_ts_interpolated.data = ts_interpolated_db
        
       # Now we're back in an xarray dataset with bands as variables and the dummy spatial dimension has been dropped      
        S1_processed_data = S1_ts_interpolated.to_xarray().to_dataset('bands').squeeze(drop=True)  
        
        VH = S1_processed_data['VH'].values
        VV = S1_processed_data['VV'].values
        
        if len(self.aIDs) == 1:
            VH = np.reshape(VH, (len(VH), 1))
            VV = np.reshape(VV, (len(VV), 1))

        '''create the new dataset'''
        xData = xr.Dataset(
                data_vars=dict(
                    VH=(["date", "labels"], VH),
                    VV=(["date", "labels"], VV),
                    lat=(["labels"], xInput['lat'].values),
                    lon=(["labels"], xInput['lon'].values),        
                    IDs=(["labels"], self.aIDs)
                ),
                coords=dict(
                    date=S1_processed_data['time'].values,
                    labels = (self.aLabels)
                ),
                attrs=dict(description="satio cleaned data."),
            )

        xData.to_netcdf(fOut)
        
#================================================================================================================
    def process_S2_data(self, S2dir, fOut):        
#================================================================================================================

        xInput = self.read_data('S2', S2dir)
        
        ''' ### Processing optical bands ###'''
        '''Select the optical bands'''
        optical_bands = [b for b in xInput.variables if b.startswith('B')]
        xdata = xInput[optical_bands]

        '''Get the numpy data behind the xarray Dataset'''
        raw_data = xdata.to_array(dim='band').transpose('band', 't', 'feature').values.astype(np.uint16)
        
        '''Get the metadata that Satio needs to describe the timeseries'''
        timestamps = xdata.t.values
        band_names = optical_bands
        
        '''Transform the data into a Satio Timeseries object'''
        raw_data_expanded = np.expand_dims(raw_data, axis=-1)
        ts = Timeseries(data=raw_data_expanded, timestamps=timestamps, bands=band_names)
        
        '''1) Compositing'''
        composite_settings = dict(
        freq=10,
        window=20,
        mode='median',
        start=pd.to_datetime(ts.timestamps[0]).strftime('%Y-%m-%d'),
        end=pd.to_datetime(ts.timestamps[-1]).strftime('%Y-%m-%d')
        )

        ts_composited = ts.composite(**composite_settings)
        
        '''2) Interpolation'''
        S2_ts_interpolated = ts_composited.interpolate()

        S2_processed_data = S2_ts_interpolated.to_xarray().to_dataset('bands').squeeze(drop=True)  # Now we're back in an xarray dataset with bands as variables and the dummy spatial dimension has been dropped

        self.lon = xInput['lon'].values

        B02 = S2_processed_data['B02'].values
        B03 = S2_processed_data['B03'].values
        B04 = S2_processed_data['B04'].values
        B05 = S2_processed_data['B05'].values
        B06 = S2_processed_data['B06'].values
        B07 = S2_processed_data['B07'].values
        B08 = S2_processed_data['B08'].values
        B8A = S2_processed_data['B8A'].values
        B11 = S2_processed_data['B11'].values
        B12 = S2_processed_data['B12'].values

        if len(self.aIDs) == 1:
            B02 = np.reshape(B02, (len(B02), 1))
            B03 = np.reshape(B02, (len(B03), 1))
            B04 = np.reshape(B02, (len(B04), 1))
            B05 = np.reshape(B02, (len(B05), 1))
            B06 = np.reshape(B02, (len(B06), 1))
            B07 = np.reshape(B02, (len(B07), 1))
            B08 = np.reshape(B02, (len(B08), 1))
            B8A = np.reshape(B02, (len(B8A), 1))
            B11 = np.reshape(B02, (len(B11), 1))
            B12 = np.reshape(B02, (len(B12), 1))

        '''create the new dataset'''
        new_data = xr.Dataset(
                data_vars=dict(
                    B02=(["date", "labels"], B02),
                    B03=(["date", "labels"], B03),
                    B04=(["date", "labels"], B04),
                    B05=(["date", "labels"], B05),
                    B06=(["date", "labels"], B06),
                    B07=(["date", "labels"], B07),
                    B08=(["date", "labels"], B08),
                    B8A=(["date", "labels"], B8A),
                    B11=(["date", "labels"], B11),
                    B12=(["date", "labels"], B12),
                    lat=(["labels"], xInput['lat'].values),
                    lon=(["labels"], xInput['lon'].values),        
                    IDs=(["labels"], self.aIDs)
                ),
                coords=dict(
                    date=S2_processed_data['time'].values,
                    labels = (self.aLabels)
                ),
                attrs=dict(description="satio cleaned data."),
            )

        # Remove empty entries
        check_zero = np.sum(B04, axis=0)
        iXs = np.where(check_zero == 0)
        
        if len(iXs[0]) > 0:  
            for iX in iXs[0]:
                new_data = new_data.where(new_data['IDs'] != self.aIDs[iX], drop=True)
        
        new_data.to_netcdf(fOut)
        
#================================================================================================================
    def read_data(self, forS, indir):        
#================================================================================================================

        fIn = os.path.join(indir, 'timeseries.nc')

        xInput = xr.open_dataset(fIn)        

        # get available dates for the actual data
        file2read = netCDF4.Dataset(fIn,'r')
        at_date = file2read.variables['t'][:]
        at_date = np.ma.getdata(at_date)

        ### S1 bands ###
        if forS == 'S1':
    
            aVH = xInput['VH'].values
            aVV = xInput['VV'].values

            aVH_expanded = np.zeros(shape=(np.shape(aVH)[0], self.full_period))
            aVH_expanded[:, at_date] = aVH
            
            aVV_expanded = np.zeros(shape=(np.shape(aVV)[0], self.full_period))
            aVV_expanded[:, at_date] = aVV

            '''create the new dataset'''
            xData = xr.Dataset(
                    data_vars=dict(
                        VH=(["feature", "t"], aVH_expanded),
                        VV=(["feature", "t"], aVV_expanded),
                    ),
                    coords=dict(
                        t=self.fullT,
                        feature = self.aLabels,
                        lat=(["feature"], xInput['lat'].values),
                        lon=(["feature"], xInput['lon'].values),        
                        IDs=(["feature"], self.aIDs)                       
                    ),
                    attrs=dict(description="satio cleaned data."),
                )  
                
            xNew =  xData.sel(t=slice(self.focusT[0], self.focusT[-1]), drop=True)

            xData = xr.Dataset(
                    data_vars=dict(
                        VH=(["feature", "t"], xNew['VH'].values),
                        VV=(["feature", "t"], xNew['VV'].values),
                    ),
                    coords=dict(
                        t=self.focusT,
                        feature = xNew['feature'].values,
                        lat=(["feature"], xNew['lat'].values),
                        lon=(["feature"], xNew['lon'].values),        
                        IDs=(["feature"], xNew['IDs'].values)               
                    ),
                    attrs=dict(description="satio cleaned data."),
                )             

        ### S2 bands ###
        if forS == 'S2':
    
            aB02 = xInput['B02'].values
            aB03 = xInput['B03'].values
            aB04 = xInput['B04'].values
            aB05 = xInput['B05'].values
            aB06 = xInput['B06'].values
            aB07 = xInput['B07'].values
            aB08 = xInput['B08'].values
            aB8A = xInput['B8A'].values
            aB11 = xInput['B11'].values
            aB12 = xInput['B12'].values
            
            aB02[np.isnan(aB02)] = 0
            aB03[np.isnan(aB03)] = 0
            aB04[np.isnan(aB04)] = 0
            aB05[np.isnan(aB05)] = 0
            aB06[np.isnan(aB06)] = 0
            aB07[np.isnan(aB07)] = 0
            aB08[np.isnan(aB08)] = 0
            aB8A[np.isnan(aB8A)] = 0
            aB11[np.isnan(aB11)] = 0
            aB12[np.isnan(aB12)] = 0
 
            aB02_expanded = np.zeros(shape=(np.shape(aB02)[0], 577))
            aB02_expanded[:, at_date] = aB02
            aB03_expanded = np.zeros(shape=(np.shape(aB02)[0], 577))
            aB03_expanded[:, at_date] = aB03           
            aB04_expanded = np.zeros(shape=(np.shape(aB02)[0], 577))
            aB04_expanded[:, at_date] = aB04
            aB05_expanded = np.zeros(shape=(np.shape(aB02)[0], 577))
            aB05_expanded[:, at_date] = aB05
            aB06_expanded = np.zeros(shape=(np.shape(aB02)[0], 577))
            aB06_expanded[:, at_date] = aB06
            aB07_expanded = np.zeros(shape=(np.shape(aB02)[0], 577))
            aB07_expanded[:, at_date] = aB07
            aB08_expanded = np.zeros(shape=(np.shape(aB02)[0], 577))
            aB08_expanded[:, at_date] = aB08
            aB8A_expanded = np.zeros(shape=(np.shape(aB02)[0], 577))
            aB8A_expanded[:, at_date] = aB8A
            aB11_expanded = np.zeros(shape=(np.shape(aB02)[0], 577))
            aB11_expanded[:, at_date] = aB11
            aB12_expanded = np.zeros(shape=(np.shape(aB02)[0], 577))
            aB12_expanded[:, at_date] = aB12
                 
            '''create the new dataset'''
            xData = xr.Dataset(
                    data_vars=dict(
                        B02=(["feature", "t"], aB02_expanded),
                        B03=(["feature", "t"], aB03_expanded),
                        B04=(["feature", "t"], aB04_expanded),
                        B05=(["feature", "t"], aB05_expanded),
                        B06=(["feature", "t"], aB06_expanded),
                        B07=(["feature", "t"], aB07_expanded),
                        B08=(["feature", "t"], aB08_expanded),
                        B8A=(["feature", "t"], aB8A_expanded),
                        B11=(["feature", "t"], aB11_expanded),
                        B12=(["feature", "t"], aB12_expanded),
                    ),
                    coords=dict(
                        t=self.fullT,
                        feature = self.aLabels,
                        lat=(["feature"], xInput['lat'].values),
                        lon=(["feature"], xInput['lon'].values),        
                        IDs=(["feature"], self.aIDs)                       
                    ),
                    attrs=dict(description="satio cleaned data."),
                )  
                
            xNew =  xData.sel(t=slice(self.focusT[0], self.focusT[-1]), drop=True)

            xData = xr.Dataset(
                    data_vars=dict(
                        B02=(["feature", "t"], xNew['B02'].values),
                        B03=(["feature", "t"], xNew['B03'].values),
                        B04=(["feature", "t"], xNew['B04'].values),
                        B05=(["feature", "t"], xNew['B05'].values),
                        B06=(["feature", "t"], xNew['B06'].values),
                        B07=(["feature", "t"], xNew['B07'].values),
                        B08=(["feature", "t"], xNew['B08'].values),
                        B8A=(["feature", "t"], xNew['B8A'].values),
                        B11=(["feature", "t"], xNew['B11'].values),
                        B12=(["feature", "t"], xNew['B12'].values),
                    ),
                    coords=dict(
                        t=self.focusT,
                        feature = xNew['feature'].values,
                        lat=(["feature"], xNew['lat'].values),
                        lon=(["feature"], xNew['lon'].values),        
                        IDs=(["feature"], xNew['IDs'].values)               
                    ),
                    attrs=dict(description="satio cleaned data."),
                )  

        return xData

#================================================================================================================
if __name__ == '__main__':
#================================================================================================================

    Info = {
        'S1dir': r'/data/EEA_HRL_VLCC/data/ref/lucas/LUCAS2018/LUCAS2018_CREO_BACKSCATTER/',
        'S2dir': r'/data/EEA_HRL_VLCC/data/ref/lucas/LUCAS2018/LUCAS2018_SHUB/',
        'fOutdir': r'/data/EEA_HRL_VLCC/user/luc/data/LUCAS2018/01_LUCAS_preprocessed/',
        'start_date_interpolate': '2017-09-01T00:00:00.000000000',
        'end_date_interpolate': '2019-04-01T00:00:00.000000000',
        'start_date_focus': '2018-03-01',
        'end_date_focus': '2018-08-31',        
        'full_periods': 577, # 2017/09/01 till 2019/03/31
        'focus_periods': 184, # 2018/03/01 till 2018/08/31
        'overwrite': False
        }
    
    oLUCASpreprocessing = cLUCASpreprocessing(Info)
    oLUCASpreprocessing.start_processing()
    
    
