import numpy as np
import pandas as pd

# Plotly is used to generate interactive HTML plots for quick checking of the detected start point
import plotly.io as pio
pio.renderers.default = 'browser' # Open plots in the browser by default
import plotly.express as px
import plotly.graph_objects as go

# READ ME!
# Always change these inputs:
# 1) Raw data file name (variable: file)
# 2) Output Excel file name (ExcelWriter(...) line)
# 3) Texture Analyser starting position (TA_starting_position, in mm)
# 4) Adjust the cutting criteria if necessary (threshold and block_size_half)


# -----------------------------
# 1) Load the raw Excel workbook
# -----------------------------
file = "Test.xlsx"  # INPUT: raw data Excel file (must exist in your working directory) 
xlsx = pd.ExcelFile(file)

# This list will collect one row per particle:
# [particle_name, sheet_name, computed_real_starting_distance]
starting_distances_data = []

# INPUT: Texture Analyser starting position (in mm)
TA_starting_position = 3 

# -----------------------------------------
# 2) Create an output workbook and fill it
# -----------------------------------------
with pd.ExcelWriter("Cutted_Test.xlsx") as writer: # INPUT: output Excel file name
    
    for sheet_name in xlsx.sheet_names:
        # Read the current sheet into a DataFrame
        raw_data = pd.read_excel(file, sheet_name=sheet_name)
        particle_names = set([v.strip() for v in raw_data.iloc[0, :].tolist()])

        # Store processed (cut + shifted) distance/force arrays for all particles in this sheet
        # Keys look like: "<particle>_Distance" and "<particle>_Forces"
        all_particles_data = {}
        max_length = 0  # Track the maximum length of any particle's data
        
        def analyze_particle(particle_name):

            # Extracts one particle's force-distance data from the sheet, detects a 'start' index based on
            # a running-average force threshold, plots the detection result, and returns cut data.
        
            # Running average window:
            # We average forces from (i - block_size_half) to (i + block_size_half),
            # so the full window length is roughly 2*block_size_half !
            block_size_half = 5  
            running_average = []
            distance_running_average = []
            # Find all columns whose first-row label equals particle_name
            # Assumption: exactly 4 columns belong to a particle
            column_indices = np.argwhere([np.array([v.strip() for v in raw_data.iloc[0, :].tolist()]) == particle_name])[:, 1]
            assert len(column_indices) == 4 # Hard assumption about the raw Excel layout!

            # The script assumes:
            # - Force is stored in the first of the 4 columns
            # - Distance is stored in the second of the 4 columns
            # Data starts at row index 4 (i.e., skip the first 4 rows which contain headers/metadata)
            # Also: replace comma decimals (e.g., "0,12") with dot decimals ("0.12")

            distances = raw_data.iloc[4:, column_indices[1]].astype(str).str.replace(',', '.').astype(float)  # Distance
            forces = raw_data.iloc[4:, column_indices[0]].astype(str).str.replace(',', '.').astype(float)  # Force
            starting_position = 0

            # Running average with specified current threshold (curr_running_average);
            # Determination of the starting position of the Texture Analyser measurement
            for i in range(block_size_half, distances.size):
                curr_running_average = np.mean(forces.iloc[i - block_size_half:i + block_size_half])  # Running average
                running_average.append(curr_running_average)
                distance_running_average.append(distances.iloc[i - block_size_half])
                # ---------------------------------------------------------
                # Detect start: when running-average force exceeds threshold
                # ---------------------------------------------------------
                #
                # For each i, compute running-average force around i.
                # Here: The first time the running average crosses 0.018 N (threshold), store that as the start.
                if starting_position == 0 and curr_running_average >= 0.018:
                    starting_position = i - block_size_half
                    print(f"Starting position for {particle_name} in {sheet_name}: {starting_position}")
                    print(f"Starting distance: {distances.iloc[starting_position]}")

            # ---------------------------------------------------------
            # Plot force-distance + running average + detected start line
            # ---------------------------------------------------------
            fig_1 = px.scatter(template="presentation")
            fig_1.add_scatter(x=distances, y=forces, name="actual data")
            fig_1.add_scatter(x=distance_running_average, y=running_average, name="running average")
            # Vertical line at detected start distance
            x_starting_value = distances.iloc[starting_position]
            fig_1.add_trace(go.Scatter(x=[x_starting_value, x_starting_value], y=[-1, 25], 
                                       mode="lines", name=f"current starting position: {distances.iloc[starting_position]} mm"))
            # Set plot labels + view limits (these are “visual preferences”)
            fig_1.update_layout(font_size=20, xaxis_title="Distance / mm", yaxis_title="Force / N", title_text=particle_name)
            fig_1.update_layout(legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.05))
            fig_1.update_xaxes(range=[0, 8])
            fig_1.update_yaxes(range=[-0.02, 0.2])
            
            # Save the plot as an HTML file
            plot_filename = f"Plot_{sheet_name}_{particle_name}.html"
            fig_1.write_html(plot_filename)

            # ---------------------------------------------------------
            # Return a "cut" DataFrame starting at detected start index
            # ---------------------------------------------------------
            df = pd.DataFrame()
            df['Distance'] = distances.iloc[starting_position:]
            df['Forces'] = forces.iloc[starting_position:]
            return df, distances.iloc[starting_position]  

        # -----------------------------------------------
        # 3) For each particle: cut data + shift distance
        # -----------------------------------------------
        for particle_name in particle_names:
            
            cutted_data_for_component, starting_distance = analyze_particle(particle_name)
            # Compute “real” starting distance relative to the TA starting position
            # (naming note: this is an offset value, not necessarily a "distance" series)
            real_starting_distance = TA_starting_position - starting_distance 

            # Append particle name and starting distance to the list
            starting_distances_data.append([particle_name, sheet_name, real_starting_distance])

            # Shift the distance axis so the detected start becomes 0 mm
            cutted_data_for_component['Distance'] -= starting_distance

            # Track max length so we can pad all particles to same length for Excel export
            max_length = max(max_length, cutted_data_for_component.shape[0])

            # Add processed data for this particle to the dictionary
            all_particles_data[f'{particle_name}_Distance'] = cutted_data_for_component['Distance'].values
            all_particles_data[f'{particle_name}_Forces'] = cutted_data_for_component['Forces'].values

         # ---------------------------------------------------------
        # 4) Pad shorter particles with NaNs so columns align
        # ---------------------------------------------------------
        #
        # Excel tables must be rectangular; different particles can have different cut lengths.
        # Padding with NaN keeps the data aligned without inventing values.
        for key in all_particles_data:
            data = all_particles_data[key]
            if len(data) < max_length:
                all_particles_data[key] = np.pad(data, (0, max_length - len(data)), constant_values=np.nan)

        # Convert the particle data dictionary into a DataFrame
        combined_particles_df = pd.DataFrame.from_dict(all_particles_data)

        
        # ---------------------------------------------------------
        # 5) Create 3 header rows (label / unit / particle name)
        # ---------------------------------------------------------
        #
        # This produces an Excel layout like:
        # Row 1: Distance | Force | Distance | Force | ...
        # Row 2: mm       | N     | mm       | N     | ...
        # Row 3: P1       | P1    | P2       | P2    | ...
        header_row_1 = []
        header_row_2 = []
        header_row_3 = []

        for particle_name in particle_names:
            header_row_1.extend(["Distance", "Force"])  
            header_row_2.extend(["mm", "N"])  
            header_row_3.extend([particle_name, particle_name])  

        headers_df = pd.DataFrame([header_row_1, header_row_2, header_row_3])
        # Make both DataFrames have generic integer column names so concat works cleanly
        headers_df.columns = range(headers_df.shape[1])
        combined_particles_df.columns = range(combined_particles_df.shape[1])

        # Stack header rows on top of the numeric data
        combined_with_headers = pd.concat([headers_df, combined_particles_df], axis=0, ignore_index=True)

        # Write the combined data (with headers) for this sheet to the corresponding sheet in the output file
        combined_with_headers.to_excel(writer, sheet_name=sheet_name[:31], index=False, header=False)

    # ---------------------------------------------------------
    # 6) Write the summary table of starting distances
    # ---------------------------------------------------------
    starting_distances_df = pd.DataFrame(starting_distances_data, columns=["Particle Name", "Sheet Name", "Starting Distance"])
    starting_distances_df.to_excel(writer, sheet_name="Starting Distances", index=False)

