"""Data preprocessing scripts."""

from __future__ import annotations

from time import time

import pandas as pd

from vito_crop_classification.data.postprocess import main as postprocess_df
from vito_crop_classification.data.preprocess import main as preprocess_df
from vito_crop_classification.vito_logger import bh_logger


def transform_df(
    df: pd.DataFrame,
    scale_cfg: dict[str, tuple[float, float]] | None = None,
    scale_dynamically: bool = True,
) -> tuple[pd.DataFrame, dict[str, tuple[float, float]]]:
    """
    Transform the raw dataframe.

    Steps in order are:
        * Preprocessing: Adding new data, scaling values, etc.
        * Postprocessing: Removing unusefull data, merging corresponding columns together

    Parameters
    ----------
    df : pd.DataFrame
        Dataframe to process
    scale_cfg : dict[str, tuple[float, float]]
        Configuration used to scale the values
    scale_dynamically : bool
        Update the scaling configuration dynamically, raise an exception otherwise

    Return
    ------
    df : pd.DataFrame
        Transformed pandas dataframe
    scale_cfg : dict[str, tuple[float, float]]
        Scaling configuration applied on the transformed DataFrame
    """
    # Preprocess
    bh_logger("Starting preprocessing..")
    bh_logger(f" - Initial DataFrame shape: {df.shape}")

    preprocess_start = time()
    df, scale_cfg = preprocess_df(df, scale_cfg=scale_cfg, scale_dynamically=scale_dynamically)
    preprocess_end = time()

    time_diff = preprocess_end - preprocess_start
    bh_logger(f" - Final DataFrame shape: {df.shape}")
    bh_logger(f" - Finished in {int(time_diff // 60)} minutes and  {int(time_diff % 60)} seconds\n")

    # Postprocess
    bh_logger("Starting postprocessing..")
    bh_logger(f" - Initial DataFrame shape: {df.shape}")

    postprocess_start = time()
    df = postprocess_df(df)
    postprocess_end = time()

    time_diff = postprocess_end - postprocess_start
    bh_logger(f" - Final DataFrame shape: {df.shape}")
    bh_logger(f" - Finished in {int(time_diff // 60)} minutes and  {int(time_diff % 60)} seconds\n")
    return df, scale_cfg


if __name__ == "__main__":
    from vito_crop_classification.data.loaders import load_data

    my_df = load_data().sample(200_000)
    my_df, my_scale_cfg = transform_df(my_df, scale_cfg=None)
