"""
Weather Data Provider Module

This module provides a data provider interface for weather data stored in parquet files.
It handles data fetching, filtering, trimming, and provides configuration for various
data processing and visualization options.

Key Features:
- Loads weather data from local parquet files
- Supports time-based data filtering and trimming with timezone awareness
- Provides custom graphing functions using Seaborn and Plotly
- Configures data processing options (smoothing, normalization)
- Includes robust error handling and logging

Data Flow:
1. Scan data folder for available parquet files (record IDs)
2. Extract available signals from the first data file
3. Provide fetch_data function for on-demand data loading
4. Provide custom graphing and processing functions
5. Return a complete provider configuration dictionary
"""
from __future__ import annotations

import logging
import traceback
from pathlib import Path
from typing import Any, Dict, List, Optional, TypedDict, Union

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import seaborn as sns
from numpy.typing import NDArray

from auto_labeling import autolabeling_dictionary
# layout_options will be imported dynamically in get_provider
# Import weather_utilities from the same directory to avoid conflicts with test versions
import importlib.util
import sys
# Clear any cached weather_utilities module that might be from TESTS directory
if 'weather_utilities' in sys.modules:
    del sys.modules['weather_utilities']
_weather_utilities_path = Path(__file__).parent / "weather_utilities.py"
_spec = importlib.util.spec_from_file_location("weather_utilities", _weather_utilities_path)
_weather_utilities = importlib.util.module_from_spec(_spec)
_spec.loader.exec_module(_weather_utilities)
# Register the module in sys.modules so subsequent imports use this version
sys.modules['weather_utilities'] = _weather_utilities
# Import directly from the loaded module
FetchDataResult = _weather_utilities.FetchDataResult
SignalRecord = _weather_utilities.SignalRecord
matplotlib_to_plotly_base64 = _weather_utilities.matplotlib_to_plotly_base64
robust_scaling = _weather_utilities.robust_scaling
simple_moving_average = _weather_utilities.simple_moving_average
trim_data = _weather_utilities.trim_data

matplotlib.use('Agg')  # Use non-interactive backend to avoid GUI warnings


# ---- Typed return payloads ---------------------------------------------------

class DataCoordinatorInfo(TypedDict, total=False):
    # Core data access
    fetch_data: Any                   # callable; leaving as Any to avoid recursive types
    dataset_id: str
    fetch_record_ids_for_dataset_id: Any

    # Schema / metadata
    all_possible_signals: List[str]
    is_date: bool
    data_folder: Path

    # Processing / analysis options
    custom_smoothing_options: Optional[Any]
    custom_normalizing_options: Optional[Any]
    trim_data: Optional[Any]

    # Labeling / classification
    auto_label_function_dictionary: Optional[Dict[str, Any]]
    all_labels: List[str]

    # Visualization / UI
    custom_grapher_dictionary: Optional[Dict[str, Any]]
    layout_options: Optional[Any]
    default_trim_1: Optional[str]
    default_trim_2: Optional[str]

def seaborn_plotly_example(
    app_control_parameters: Dict[str, Any],
    parameters: Dict[str, Any]
) -> go.Figure:
    """
    Generate a Plotly line chart with Seaborn-like styling.
    
    This function demonstrates creating a Plotly figure directly while
    mimicking the aesthetic of a Seaborn plot. It fetches data, applies a
    multiplier, and styles the chart.
    
    Args:
        app_control_parameters (dict): Global parameters from the application controller
        parameters (dict): Parameters specific to this graphing function
        
    Parameters Keys:
        - "seaborn_plotly_example_multiplier": float (optional)
        - "seaborn_plotly_example_signal_option": str (optional)
        
    Returns:
        plotly.graph_objects.Figure: A Plotly figure object
    """
    print(f"Parameters: {parameters}")
    
    multiplier = 1.0
    if "seaborn_plotly_example_multiplier" in parameters:
        multiplier = parameters["seaborn_plotly_example_multiplier"]

    signal_option = "Temperature"
    if "seaborn_plotly_example_signal_option" in parameters:
        signal_option = parameters["seaborn_plotly_example_signal_option"]
    
    record_data = app_control_parameters["data_coordinator"].fetch_data_async(
        app_control_parameters["data_coordinator"].data_folder,
        dataset_id=None,
        record_id=app_control_parameters["record_id"],
        signals=[signal_option],
        global_data_params={},
        trim_1=app_control_parameters["trim_t1"],
        trim_2=app_control_parameters["trim_t2"],
        timeout=10,
    )

    signal = record_data["signals"][0]["data"]
    times = record_data["signals"][0]["times"]

    # Ensure data is in a consistent numpy format
    signal = np.array(signal).flatten()
    times = np.array(times).flatten()

    # Apply multiplier
    signal = signal * multiplier

    # Create Plotly figure directly
    fig = go.Figure()
    
    # Add the line trace
    fig.add_trace(go.Scatter(
        x=times,
        y=signal,
        mode='lines',
        line=dict(width=2.5),
        name=signal_option
    ))
    
    # Update layout to match seaborn style
    fig.update_layout(
        title=dict(
            text=f"{signal_option} Over Time (Multiplier: {multiplier})",
            font=dict(size=16),
            y=0.95
        ),
        xaxis=dict(
            title="Time",
            showgrid=True,
            gridcolor='rgba(0,0,0,0.1)',
            zeroline=False
        ),
        yaxis=dict(
            title=signal_option,
            showgrid=True,
            gridcolor='rgba(0,0,0,0.1)',
            zeroline=False
        ),
        plot_bgcolor='rgba(0,0,0,0)',
        paper_bgcolor='rgba(0,0,0,0)',
        template="plotly_white" if not app_control_parameters["theme_value"] else "plotly_dark",
        margin=dict(l=50, r=50, t=80, b=50)
    )

    return fig

def seaborn_graph_example(
    app_control_parameters: Dict[str, Any],
    parameters: Dict[str, Any]
) -> go.Figure:
    """
    Generate a Seaborn plot and embed it as an image within a Plotly figure.
    
    This function acts as a wrapper around get_figure_for_callback to generate
    a plot using Matplotlib/Seaborn and then display it in the Plotly-based UI.
    
    Args:
        graph_type (str): The type of graph requested (unused)
        app_control_parameters (dict): Global parameters from the application controller
        parameters (dict): Parameters specific to this graphing function
        
    Returns:
        plotly.graph_objects.Figure: A Plotly figure containing the Seaborn plot image
    """
    print(f"Parameters: {parameters}")
    
    multiplier = 1.0
    if "seaborn_graph_example_multiplier" in parameters:
        multiplier = parameters["seaborn_graph_example_multiplier"]

    signal_option = "Temperature"
    if "seaborn_graph_example_signal_option" in parameters:
        signal_option = parameters["seaborn_graph_example_signal_option"]
    
    return get_figure_for_callback(
        multiplier=multiplier,
        signal_option=signal_option,
        record_id=app_control_parameters["record_id"],
        t1=app_control_parameters["t1"],
        t2=app_control_parameters["t2"],
        trim_t1=app_control_parameters["trim_t1"],
        trim_t2=app_control_parameters["trim_t2"],
        selection_toggle=app_control_parameters["selection_toggle_on"],
        display_labels_toggle=app_control_parameters["display_labels"],
        labels_table=app_control_parameters["label_data"],
        proposed_labels=app_control_parameters["proposed_labels_data"],
        labels_table_selected_rows=app_control_parameters["labels_table_selected_rows"],
        figure_rendered=app_control_parameters["figure_rendered"],
        label_value_combinations=app_control_parameters["label_value_combinations"],
        data_coordinator=app_control_parameters["data_coordinator"],
        theme_value=app_control_parameters["theme_value"],
    )

def get_figure_for_callback(
    multiplier: float,
    signal_option: str,
    record_id: str,
    t1: Any,
    t2: Any,
    trim_t1: Optional[str],
    trim_t2: Optional[str],
    selection_toggle: bool,
    display_labels_toggle: bool,
    labels_table: Any,
    proposed_labels: Any,
    labels_table_selected_rows: List[int],
    figure_rendered: bool,
    label_value_combinations: Any,
    data_coordinator: Any,
    theme_value: bool,
) -> go.Figure:
    """
    Core logic for creating a Seaborn plot and embedding it in Plotly.
    
    This function fetches data, processes it, generates a line plot with
    Seaborn, converts the plot to a base64 image, and embeds it into a
    Plotly figure for display.
    
    Args:
        (various): This function takes a large number of parameters from the
                   application controller to define the plot state.
                   
    Returns:
        plotly.graph_objects.Figure: Plotly figure with embedded image.
    """
    record_data = data_coordinator.fetch_data_async(
        data_coordinator.data_folder,
        dataset_id=None,
        record_id=record_id,
        signals=[signal_option],
        global_data_params={},
        trim_1=trim_t1,
        trim_2=trim_t2,
        timeout=10,
    )

    signal = record_data["signals"][0]["data"]
    times = record_data["signals"][0]["times"]

    # Force conversion to plain numpy arrays and flatten if needed
    signal = np.array(signal).flatten()
    times = np.array(times).flatten()

    # Apply multiplier
    signal = signal * multiplier

    # Create matplotlib figure with seaborn styling
    plt.style.use('seaborn-v0_8' if hasattr(plt.style, 'seaborn-v0_8') else 'default')
    fig_mpl, ax = plt.subplots(figsize=(10, 6))
    
    # Ensure completely clean data types for Seaborn
    times_clean = [pd.Timestamp(t).to_pydatetime() for t in times]
    signal_clean = [float(s) for s in signal]
    
    # Create DataFrame for seaborn
    df = pd.DataFrame({'Time': times_clean, 'Signal': signal_clean})
    
    # Use seaborn for plotting
    sns.lineplot(data=df, x='Time', y='Signal', ax=ax, linewidth=2.5)
    ax.set_title(f"{signal_option} Over Time (Multiplier: {multiplier})", fontsize=16, pad=20)
    ax.set_xlabel("Time", fontsize=12)
    ax.set_ylabel(signal_option, fontsize=12)
    ax.grid(True, alpha=0.3)
    
    # Convert to base64 and embed in Plotly
    img_base64 = matplotlib_to_plotly_base64(fig_mpl)
    
    # Create Plotly figure with embedded seaborn plot
    fig = go.Figure()
    fig.add_layout_image(
        dict(
            source=img_base64,
            xref="paper", yref="paper",
            x=0, y=1, sizex=1, sizey=1,
            sizing="contain", layer="below"
        )
    )
    
    # Update layout for a clean appearance
    fig.update_layout(
        xaxis=dict(showgrid=False, showticklabels=False, zeroline=False),
        yaxis=dict(showgrid=False, showticklabels=False, zeroline=False),
        margin=dict(l=0, r=0, t=0, b=0),
        template="plotly_white" if not theme_value else "plotly_dark"
    )

    return fig
    
def simple_graph(
    app_control_parameters: Dict[str, Any],
    parameters: Dict[str, Any]
) -> str:
    """
    A simple example of a custom graph function that returns text.
    
    This demonstrates the basic structure of a grapher function, which can
    return strings, dictionaries, or Plotly figures.
    
    Args:
        graph_type (str): The type of graph requested (unused)
        app_control_parameters (dict): Global parameters from the app (unused)
        parameters (dict): Parameters specific to this function
        
    Returns:
        str: A string with the text parameter value.
    """
    print(f"Parameters: {parameters}")
    target_key = "simple_graph_text_parameter_1"
    
    text_to_return = "Simple Graph"
    if target_key in parameters:
        text_to_return = f"Simple Graph: {parameters[target_key]}"
    
    return text_to_return

# pylint: disable=too-many-statements
def get_provider(_: Any) -> DataCoordinatorInfo:
    """
    Get the complete data provider configuration for the Weather App.
    
    This function sets up and returns a comprehensive configuration dictionary
    that defines the entire data provider interface, including data access,
    processing options, and UI elements.
    
    Args:
        _ (Any): Unused parameter for compatibility with the interface.
        
    Returns:
        DataCoordinatorInfo: A dictionary containing the full provider configuration.
        
    Raises:
        IndexError: If no parquet files are found in the data directory.
        Exception: Re-raises any exceptions caught during setup after logging.
    """
    # --- Logging Setup ---
    log_file_path = Path(__file__).parent / "weather_data_provider_errors.log"
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.ERROR)
    # Prevent duplicate handlers if get_provider is called multiple times
    if not logger.handlers:
        file_handler = logging.FileHandler(log_file_path)
        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        file_handler.setFormatter(formatter)
        logger.addHandler(file_handler)
    # --- End Logging Setup ---

    try:
        def fetch_record_ids_for_dataset_id(
            data_folder: Path,
            _: Optional[Any] = None
        ) -> List[str]:
            """
            Fetch all available record IDs from the data folder.
            
            Record IDs are derived from parquet filenames (without extension).
            """
            return [file.stem for file in data_folder.glob("*.parquet")]

        def fetch_data(
            data_folder: Path,
            _dataset_id: Optional[str],
            record_id: Optional[str],
            signals: Optional[List[str]],
            _global_data_params: Optional[Dict[str, Any]],
            data_trim_1: Optional[str] = None,
            data_trim_2: Optional[str] = None,
        ) -> Union[FetchDataResult, List[Any]]:
            """
            Fetch and process data for a specific record ID and signals.
            
            Handles data loading, time-based filtering with timezone awareness,
            and signal extraction.
            """
            if record_id is None:
                return []
            if signals is None:
                signals = [all_possible_signals[0]] if all_possible_signals else []
            
            path = Path(data_folder) / f"{record_id}.parquet"

            # Get timezone info from parquet file (assume same timezone for all data)
            data_timezone = None
            try:
                sample_df = pd.read_parquet(path, columns=["time"] if "time" in pd.read_parquet(path).columns else None)
                first_idx = sample_df.index[0] if isinstance(sample_df.index, pd.DatetimeIndex) else None
                data_timezone = first_idx.tz if first_idx is not None else None
            except Exception as e:
                print(f"Error checking timestamp format: {e}")
                data_timezone = None

            # Build timezone-aware filters using simple pd.to_datetime()
            filters = []
            if data_trim_1 is not None:
                trim1 = pd.to_datetime(data_trim_1)
                if data_timezone is not None and trim1.tz is None:
                    # If data has timezone but trim value doesn't, assume same timezone
                    trim1 = trim1.tz_localize(data_timezone)
                elif data_timezone is None and trim1.tz is not None:
                    # If data has no timezone but trim value does, remove timezone
                    trim1 = trim1.tz_localize(None)
                filters.append(("time", ">=", trim1))

            if data_trim_2 is not None:
                trim2 = pd.to_datetime(data_trim_2)
                if data_timezone is not None and trim2.tz is None:
                    # If data has timezone but trim value doesn't, assume same timezone
                    trim2 = trim2.tz_localize(data_timezone)
                elif data_timezone is None and trim2.tz is not None:
                    # If data has no timezone but trim value does, remove timezone
                    trim2 = trim2.tz_localize(None)
                filters.append(("time", "<=", trim2))

            # Read data with filters
            try:
                df = pd.read_parquet(path, filters=filters if filters else None)
            except Exception as e:
                print(f"Error reading parquet with filters: {e}")
                return {"id": record_id, "signals": [], "errored_signals": signals}

            times: NDArray[Any] = df.index.to_numpy()
            data_array: List[SignalRecord] = []
            errored_signals: List[str] = []

            for signal in signals:
                if signal in df.columns:
                    signal_data: NDArray[Any] = df[signal].to_numpy()
                    record: SignalRecord = {
                        "data": signal_data,
                        "data_name": signal,
                        "times": times,
                        "errored": False,
                    }
                    data_array.append(record)
                else:
                    errored_signals.append(signal)

            return {"id": record_id, "signals": data_array, "errored_signals": errored_signals}

        # --- Module Initialization ---
        data_folder: Path = Path(__file__).parent / "weather_data"
        existing_record_ids: List[str] = fetch_record_ids_for_dataset_id(data_folder)

        if not existing_record_ids:
            logger.error(f"No parquet files found in {data_folder}. Cannot proceed.")
            raise IndexError(f"No parquet files found in {data_folder}, existing_record_ids is empty.")

        # Extract available signals from the first data file
        first_record: str = existing_record_ids[0]
        file_path: Path = Path(data_folder) / f"{first_record}.parquet"
        first_df: pd.DataFrame = pd.read_parquet(file_path)
        all_possible_signals: List[str] = first_df.columns.to_list()
        all_possible_signals.append("example_nonexistent_data")

        # --- Configuration Dictionaries ---
        custom_smoothing_dictionary = {
            "simple_moving_average": {
                "display_name": "Simple Moving Average",
                "parameters": {
                    "moving_average_window_size": {"default": 1, "min": 0, "max": None, "display_name": "Window Size"}
                },
                "function": simple_moving_average,
            },
        }

        custom_grapher_dictionary = {
            "simple_graph": {
                "display_name": "Simple Graph",
                "parameters": {
                    "text_parameter_1": {"default": "Hello", "display_name": "Text Parameter"}
                },
                "function": simple_graph,
            },
            "seaborn_graph_example": {
                "display_name": "Seaborn Graph Example",
                "parameters": {
                   "multiplier": {"default": 1.0, "min": 0.0, "max": None, "display_name": "Multiplier"},
                   "signal_option": {"default": "Temperature", "options": {"Temperature": "Temperature", "Pressure": "Pressure", "Precipitation": "Precipitation"}, "display_name": "Signal Option"}
                },
                "function": seaborn_graph_example,
            },
            "seaborn_plotly_example": {
                "display_name": "Seaborn Plotly Example",
                "parameters": {
                    "multiplier": {"default": 1.0, "min": 0.0, "max": None, "display_name": "Multiplier"},
                    "signal_option": {"default": "Temperature", "options": {"Temperature": "Temperature", "Pressure": "Pressure", "Precipitation": "Precipitation"}, "display_name": "Signal Option"}
                },
                "function": seaborn_plotly_example,
            },
        }

        custom_normalizing_dictionary = {
            "robust_scaling": {"display_name": "Robust Scaling", "parameters": None, "function": robust_scaling}
        }
        
        # Import layout_options dynamically to avoid module caching issues
        import importlib.util
        template_layouts_path = Path(__file__).parent / "template_layouts.py"
        spec = importlib.util.spec_from_file_location("template_layouts", template_layouts_path)
        template_layouts_module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(template_layouts_module)
        layout_options = template_layouts_module.layout_options

        # --- Final Provider Assembly ---
        data_coordinator_info: DataCoordinatorInfo = {
            "fetch_data": fetch_data,
            "dataset_id": "weather_data_2",
            "fetch_record_ids_for_dataset_id": fetch_record_ids_for_dataset_id,
            "all_possible_signals": all_possible_signals,
            "custom_smoothing_options": custom_smoothing_dictionary,
            "custom_normalizing_options": custom_normalizing_dictionary,
            "auto_label_function_dictionary": autolabeling_dictionary,
            "all_labels": ["Peak", "Dip", "Anomaly"],
            "custom_grapher_dictionary": custom_grapher_dictionary,
            "is_date": True,
            "trim_data": trim_data,
            "data_folder": data_folder,
            "layout_options": layout_options
        }
        return data_coordinator_info
    except Exception as e:
        logger.error("Exception occurred in get_provider:")
        logger.error(traceback.format_exc())
        raise # Re-raise the exception to be caught by the caller
