import json
from openeo.rest import OpenEoApiError
from pathlib import Path
import logging
import fire
from time import sleep
import geopandas as gpd
from shapely import wkt
import pandas as pd
import os
import subprocess


from cropclass.openeo.inference import croptype_map
from cropclass.utils import laea20km_id_to_extent
from cropclass.utils.catalog import push_STAC_to_catalog
from cropclass._version import  __version__
from cropclass.config import get_job_options, get_processing_options, get_collection_options
import openeo
from openeo.extra.job_management import MultiBackendJobManager,_format_usage_stat

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("openeo_classification.cropmap")



def produce_on_creodias():
    produce_hrlvlcc_croptype_map(parallel_jobs=1, input_file="/vitodata/EEA_HRL_VLCC/data/production/jobsplit_laea20km_all_final.geojson",
                              status_file="/vitodata/EEA_HRL_VLCC/data/production/croptype_production.csv",
                              output_dir="/vitodata/EEA_HRL_VLCC/data/production", backends=["creodias"])


def produce_terrascope_test():
    produce_hrlvlcc_croptype_map(parallel_jobs=3, input_file="terrascope_test.geojson",
                              status_file="eu27_2021_terrascope_sample.csv", output_dir=".", backends=["terrascope"])


def produce_creo_test():
    produce_hrlvlcc_croptype_map(parallel_jobs=1, input_file="creo_test.geojson",
                              status_file="eu27_2021_creo_sample.csv", output_dir=".", backends=["creodias"])

def create_filename(base_dir, row, sub):
    year = str(row["year"])
    version = row["version"]
    name = row["name"]
    return f'{create_dir(base_dir, row, sub)}/CROP_{year}_{name}-03035-010m_{version}'

def create_dir(base_dir, row, sub):
    year = str(row["year"])
    version = row["version"]
    name = row["name"]
    dirname = base_dir / f'crop_{sub}/{version}/{year}/{name[:3]}{name[4:7]}/'
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    return dirname




def produce_hrlvlcc_croptype_map(parallel_jobs=20, input_file="terrascope_test.geojson", status_file="eu27_2021_terrascope_klein.csv", output_dir=".", backends=["terrascope"]):
    """
    Script to start and monitor jobs for the EU27 croptype map project in openEO platform CCN.
    The script can use multiple backends, to maximize throughput. Jobs are tracked in a CSV file, upon failure, the script can resume
    processing by pointing to the same csv file. Delete that file to start processing from scratch.

    @param provider: The data provider: terrascope - sentinelhub - creodias
    @param parallel_jobs:
    @param status_file: The local file where status should be tracked.
    @return:
    """

    with Path(input_file).open('r') as f:
        tiles_to_produce = gpd.GeoDataFrame.from_features(json.load(f))

    logger.info(
        f"Found {len(tiles_to_produce)} tiles to process.")

    class CustomJobManager(MultiBackendJobManager):

        def __init__(self, poll_sleep=5):
            super().__init__(poll_sleep)

        def on_job_error(self, job, row):
            logs = job.logs()
            error_logs = [l for l in logs if l.level.lower() == "error"]
            job_metadata = job.describe_job()

            title = job_metadata['title']
            base_dir = Path(output_dir)

            if len(error_logs) > 0:

                Path(f'{create_filename(base_dir, row,"metadata")}_{job_metadata["id"]}_errors.json').write_text(
                    json.dumps(error_logs, indent=2))
            else :
                Path(f'{create_filename(base_dir, row,"metadata")}_{job_metadata["id"]}_errors.json').write_text(
                    "Couldn't find any errors in the logs. Please check manually.")


        def on_job_done(self, job, row):

            base_dir = Path(output_dir)
            job_metadata = job.describe_job()

            results = job.get_results()
            title = job_metadata['title']
            metadata_file = Path(f'{create_filename(base_dir,row,"metadata")}_results.json')
            sub = "raw"
            cropclass_version = job_metadata['description'].split('cropclass ')[1].split(' ')[0]
            results.download_files(create_dir(base_dir, row,"masked"),include_stac_metadata=False)

            Path(f'{create_filename(base_dir,row,"metadata")}_job.json').write_text(json.dumps(job_metadata, indent=2))

            #copy_to_s3(create_filename(base_dir, row, sub)+'.tif',create_s3_bucket(row, sub),create_s3_obj(row, sub))

            STAC = results.get_metadata()
            #update STAC
            #add https://stac-extensions.github.io/processing/v1.1.0/schema.json
            #STAC['stac_extensions'].append("https://stac-extensions.github.io/processing/v1.1.0/schema.json")
            STAC['properties'] =  {"processing:software" :{"cropclass":cropclass_version,"hrl_vlcc_croptype_model": job_metadata['description'].split(' ')[-1][:-1]}}
            metadata_file.write_text(json.dumps(STAC, indent=2))

            #push STAC to catalog in RAW_products
            #push_STAC_to_catalog(STAC, row, catalog= 'CROPTYPE_RAW_v0')

            logs = job.logs()
            error_logs = [l for l in logs if l.level.lower(
            ) == "error" and "partial writes" not in l.message]
            orfeo_errors = [
                l for l in error_logs if "orfeo" in l.message.lower()]
            if len(orfeo_errors) > 0:
                logger.error(f"Job had orfeo error {row}")
            if len(error_logs) > 0:
                Path(f'{create_filename(base_dir, row,"metadata")}_{job_metadata["id"]}_errors_after_success.json').write_text(
                    json.dumps(error_logs, indent=2))
            return row

        def _update_statuses(self, df: pd.DataFrame):
            """Update status (and stats) of running jobs (in place)."""
            active = df.loc[
                (df.status == "created")
                | (df.status == "queued")
                | (df.status == "running")
                ]
            for i in active.index:
                job_id = df.loc[i, "id"]
                backend_name = df.loc[i, "backend_name"]

                try:
                    con = self._get_connection(backend_name)
                    the_job = con.job(job_id)
                    job_metadata = the_job.describe_job()
                    logger.info(
                        f"Status of job {job_id!r} (on backend {backend_name}) is {job_metadata['status']!r}"
                    )
                    if (
                            df.loc[i, "status"] == "running"
                            and job_metadata["status"] == "finished"
                    ):
                        self.on_job_done(the_job, df.loc[i])
                        df.loc[i, 'description'] = job_metadata['description']            #updating row to represent correct verions tag

                    if df.loc[i, "status"] != "error" and job_metadata["status"] == "error":
                        self.on_job_error(the_job, df.loc[i])

                    df.loc[i, "status"] = job_metadata["status"]
                    for key in job_metadata.get("usage", {}).keys():
                        df.loc[i, key] = _format_usage_stat(job_metadata, key)

                    #jobs should not run longer than 2H after that they're probably hanging. (no longer very confident about this)
                    if df.loc[i,"status"] == 'running'  and \
                            pd.to_datetime(df.loc[i,'start_time'])+ pd.DateOffset(minutes = 120) < pd.Timestamp.now():
                        pass
                        #the_job.stop_job()

                    #let's add an automatic vrt creation tool runs one per day around midnight
                    finished = df.loc[df.status=="finished"]
                    base_dir = Path(output_dir)
                    if False and pd.to_datetime(active.loc[i,"start_time"]).day \
                         != pd.to_datetime(active.loc[active.index[-1],"start_time"]).day:

                        #setup gpkg
                        gdf_wgs = gpd.GeoDataFrame(df, crs='epsg:4326')
                        gdf = gdf_wgs.to_crs({'init':'epsg:3035'})
                        gdf['visualize'] = gdf['start_time'].str.replace('-','').str[0:8]
                        gdf.loc[gdf['status'] == 'not_started','visualize' ]= -1
                        gdf['visualize'] = gdf['visualize'].astype('int32')

                        for year in [2017,2018,2019,2020,2021]:
                            try:
                                subprocess.check_call(f"gdalbuildvrt {base_dir}/CROP_RAW_{str(year)}_V100.vrt {base_dir}/crop_raw/V100/{str(year)}/*/*.tif",shell=True)
                            except:
                                logger.warning("something went wrong with vrt creation, however no reason to stop processing")

                            try:
                                fn_gpkg_year = f"croptype_production_{year:04d}.gpkg"
                                fp_gpkg_year = os.path.join(base_dir, fn_gpkg_year)
                                gdf_year = gdf[gdf['year'] == year]
                                gdf_year.to_file(fp_gpkg_year,encoding='utf-8',driver="GPKG")
                                gdf_year = None
                            except:
                                logger.warning("something went wrong with gpkg creation, however no reason to stop processing")



                except OpenEoApiError as e:
                    print(f"error for job {job_id!r} on backend {backend_name}")
                    print(e)

    def run(row, connection_provider, connection, provider):
        year = row['year']
        name = row['name']
        version= row['version']


        #allow using multiple 'creodias' endpoints
        if "creodias" in provider:
            provider = "creodias"

        job_options = get_job_options(provider)
        #to ignore orfeo erros
        #job_options['soft-errors']='true'
        processing_options = get_processing_options(provider)
        processing_options.update({'year':int(year)})

        #adding check to make sure that model-tag and software version where not yet set.
        if row.description.replace('MODEL_TAG',processing_options["modeltag"]) \
                .replace('SOFTWARE_VERSION',__version__) == row.description:
            logger.warning("It seems this tile already has a software version/model_tag. \n" + \
                           "This should not be possible please check input csv.")

        EXTENT_20KM = laea20km_id_to_extent(name)

        print(f"submitting job to {provider}")


        #Agera5 does not have a complete coverage that why we move data to nearest available data point: {org_name: used data}
        wrong_dict = {'E352N190':'E346N190','E340N172':'E338N172','E314N152':'E312N146','E312N152':'E312N146',
                      'E484N220':'E486N220','E162N104':'E164N104','E176N104':'E174N104','E096N278':'E094N276',
                      'E180N120':'E180N098','E182N120':'E180N098','E184N146':'E182N150','E184N148':'E182N150',
                      'E184N150':'E182N150','E186N152':'E182N152','E188N152':'E182N152','E494N412':'E492N412',
                      'E496N412':'E494N414','E496N414':'E494N414'}

        if name in wrong_dict.keys():
            meteo_name = wrong_dict.get(name)
        else: meteo_name = name

        with open(f'/vitodata/EEA_HRL_VLCC/data/ref/METEO/{str(year)}/METEO-{meteo_name}-{str(year)}') as meteo:
            METEO_json = json.load(meteo)

        #updating processing_options with meteo data
        processing_options.update({'METEO_data': METEO_json})

        clf_results = croptype_map(EXTENT_20KM,
                                   connection,provider, processing_options)

        job = clf_results.create_job(
            title=row.title,
            description=(row.description).replace('MODEL_TAG',processing_options["modeltag"])\
                                        .replace('SOFTWARE_VERSION',__version__),
            out_format="GTiff",
            job_options=job_options, overview_method="mode", filename_prefix=f"CROP_{year}_{name}-03035-010m_{version}")#colormap=cropclass.openeo.croptype_colors())

        return job

    #creo = openeo.connect("openeo.creo.vito.be", default_timeout=60).authenticate_oidc()
    # terrascope_dev = openeo.connect("openeo-dev.vito.be").authenticate_oidc()
    creo_prod = openeo.connect("openeo.openeo-vlcc-prod.vgt.vito.be", default_timeout=60).authenticate_oidc()
    creo_prod31 = openeo.connect("openeo-3-1.openeo-vlcc-prod.vgt.vito.be", default_timeout=60).authenticate_oidc()

    manager = CustomJobManager()
    if "terrascope" in backends:
        manager.add_backend(
            "terrascope", connection=terrascope, parallel_jobs=parallel_jobs)

    if "sentinelhub" in backends:
        manager.add_backend(
            "sentinelhub", connection=terrascope, parallel_jobs=parallel_jobs)
    if "creodias" in backends:
        #manager.add_backend("creodias_cdse", connection=creo, parallel_jobs=0)
        manager.add_backend("creodias32", connection=creo_prod, parallel_jobs=0)
        manager.add_backend("creodias31", connection=creo_prod31, parallel_jobs=10)


    manager.run_jobs(
        df=tiles_to_produce,
        start_job=run,
        output_file=Path(status_file)
    )


if __name__ == '__main__':
  fire.Fire(produce_on_creodias())