#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 16 08:03:48 2022

@author: bertelsl
"""

import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd 
from satio.timeseries import Timeseries

'''Load one of the input data files as an xarray Dataset'''
input_file = '/data/EEA_HRL_VLCC/data/ref/lucas/LUCAS2018/2018_EU_LUCAS2018_POINT_JD/Lucas group_[621923953627856894, 621924219518943230).json/timeseries.nc'
input_data = xr.open_dataset(input_file)

input_data

'''Select the optical bands'''
optical_bands = [b for b in input_data.variables if b.startswith('B')]
optical_bands.remove('B01')
optical_bands.remove('B09')
band_data = input_data[optical_bands]
band_data

'''
# Get the numpy data behind the xarray Dataset
# for this we need to transform the dataset into a DataArray (which transforms the data variables into a new dimension) and 
# from this we can extract the Numpy array with the raw data. Caution: Satio knows two kinds of datatypes 
# to handle timeseries: uint16, where the value 0 is assumed no data float32, where NaN is assumed no data 
# The original scaled optical data we're extracting here comes as dtype float64, so explicitly cast to uint16 for proper handling by satio.
'''
raw_data = band_data.to_array(dim='band').transpose('band', 't', 'feature').values.astype(np.uint16)
raw_data.shape  # bands X timestamps X entries

'''
# Get the metadata that Satio needs to describe the timeseries
# that is the timestamps and the band names
'''
timestamps = band_data.t.values
band_names = optical_bands

'''
# Transform the data into a Satio Timeseries object
# A Satio Timeseries object holds a 4D array (bands - timestamps - X - Y) on which certain operations can be easily performed. 
# So there's one issue we need to workaround. Satio is meant for spatial data (X, Y) while we have here a list of points 
# (5085 entries in this example). We can "trick" satio by using the entries as one spatial dimension and add another dummy 
# spatial dimension to get the desired 4D array.
'''
raw_data_expanded = np.expand_dims(raw_data, axis=-1)
raw_data_expanded.shape  # Now we have the proper 4D dimensions that satio Timeseries need

ts = Timeseries(data=raw_data_expanded, timestamps=timestamps, bands=band_names)


'''
Actual usage of Satio functionality
1) Compositing
Now that the Timeseries is masked, we can composite to our desired frequency where the nodata is automatically discarded. 
We take 10-day composites which are computed with 20-day moving windows. 
The operation that is performed on all valid observations within the 20-day window is "median". 
Here we can also subset on a shorter timeseries if we want. For now, we just take from start till end of the original time series. 
Any compositing windows which have no valid observations will still get a nodata value (0 in case of uint16).
'''

composite_settings = dict(
    freq=10,
    window=20,
    mode='median',
    start=pd.to_datetime(ts.timestamps[0]).strftime('%Y-%m-%d'),
    end=pd.to_datetime(ts.timestamps[-1]).strftime('%Y-%m-%d')
)

ts_composited = ts.composite(**composite_settings)

plt.plot(ts_composited.data[0,:, 0, 0])

'''
2) Interpolation
Finally the interpolation, which takes the composited timesteries and linearly interpolates any missing values.
'''

ts_interpolated = ts_composited.interpolate()
plt.plot(ts_interpolated.data[0,:, 0, 0])

'''
After Satio processing
if we're done processing with satio, we can get back to the original data and proceed with our analysis
'''

processed_data = ts_interpolated.to_xarray().to_dataset('bands').squeeze(drop=True)  # Now we're back in an xarray dataset with bands as variables and the dummy spatial dimension has been dropped

# As an example, compute NDVI
b08 = processed_data['B08']
b04 = processed_data['B04']

ndvi = (b08 - b04) / (b08 + b04)
# Plot some examples

plt.plot(ndvi.values[:, [8, 10, 14]])


# save to file
# fOut = '/data/EEA_HRL_VLCC/user/luc/data/LUCAS2018/5_HANTS_test/test.jlib'
# ts_interpolated.save(fOut)



