Source code for OpenPinch.utils.wkbook_to_json

"""Excel workbook ingestion helpers for OpenPinch problem/result data."""

from __future__ import annotations

import json

import pandas as pd
from pandas.errors import EmptyDataError

from ._tabular_input import (
    get_column_names_and_units as _get_column_names_and_units,
)
from ._tabular_input import (
    problem_records_from_frame as _write_problem_to_dict_and_list,
)
from ._tabular_input import (
    target_records_from_frame as _write_targets_to_dict_and_list,
)
from .input_validation import validate_stream_data, validate_utility_data

__all__ = ["get_problem_from_excel", "get_results_from_excel"]



[docs]
def get_problem_from_excel(excel_file, output_json=None):
    """Read workbook stream/utility sheets and return OpenPinch problem JSON."""
    try:
        streams_data = _parse_sheet_with_units(excel_file, sheet_name="Stream Data")
        streams_data = validate_stream_data(streams_data)

        utilities_data = _parse_sheet_with_units(excel_file, sheet_name="Utility Data")
        utilities_data = validate_utility_data(utilities_data)

        options_data = _parse_options_sheet(excel_file, sheet_name="Options")
    except FileNotFoundError as exc:
        raise FileNotFoundError(f"Workbook file not found: {exc.filename}") from exc
    except (ValueError, EmptyDataError) as exc:
        raise ValueError(f"Failed to read workbook problem inputs: {exc}") from exc

    output_dict = {
        "streams": streams_data,
        "utilities": utilities_data,
        "options": options_data,
    }

    if isinstance(output_json, str):
        with open(output_json, "w", encoding="utf-8") as handle:
            json.dump(output_dict, handle, indent=4)

    return output_dict




[docs]
def get_results_from_excel(excel_file, output_json, project_name):
    """Read workbook summary results and return structured target JSON."""
    try:
        results_data = _parse_sheet_with_units(
            excel_file,
            sheet_name="Summary",
            row_units=2,
            row_data=4,
            project_name=project_name,
        )
    except FileNotFoundError as exc:
        raise FileNotFoundError(f"Workbook file not found: {exc.filename}") from exc
    except (ValueError, EmptyDataError) as exc:
        raise ValueError(f"Failed to read workbook summary inputs: {exc}") from exc

    output_dict = {"targets": results_data}

    if isinstance(output_json, str):
        with open(output_json, "w", encoding="utf-8") as handle:
            json.dump(output_dict, handle, indent=4)

    return output_dict



def _parse_sheet_with_units(
    excel_file,
    sheet_name,
    row_units=1,
    row_data=2,
    project_name=None,
):
    """Read one workbook sheet and convert it to row dictionaries."""
    with pd.ExcelFile(excel_file) as workbook:
        if sheet_name not in workbook.sheet_names:
            available = ", ".join(workbook.sheet_names)
            raise ValueError(
                f"Workbook is missing required sheet '{sheet_name}'. "
                f"Available sheets: {available or '<none>'}."
            )
        df_full = pd.read_excel(workbook, sheet_name=sheet_name, header=None)

    if df_full.empty:
        raise ValueError(f"Worksheet '{sheet_name}' is empty.")
    min_required_rows = max(row_units, row_data) + 1
    if len(df_full.index) < min_required_rows:
        raise ValueError(
            f"Worksheet '{sheet_name}' must include at least {min_required_rows} rows "
            "(names, units, and data rows)."
        )

    col_names, col_units = _get_column_names_and_units(
        df_full,
        sheet_name,
        row_units,
    )
    df_data: pd.DataFrame = df_full.iloc[row_data:].copy()

    for index in range(len(df_data.columns), len(col_names), -1):
        df_data = df_data.drop(columns=index - 1)
    df_data.columns = col_names[: len(df_data.columns)]
    for index in range(len(df_data.columns), len(col_names)):
        df_data[col_names[index]] = 0

    units_map = dict(zip(col_names, col_units))
    if sheet_name != "Summary":
        keep = ["t_supply", "t_target", "heat_flow", "dt_cont", "htc"]
        units_map = {key: units_map[key] for key in keep if key in units_map}

    if sheet_name == "Summary":
        return _write_targets_to_dict_and_list(df_data, units_map, project_name)
    return _write_problem_to_dict_and_list(df_data, units_map)


def _parse_options_sheet(
    excel_file,
    sheet_name: str = "Options",
):
    """Read key/value option rows from one workbook sheet."""
    with pd.ExcelFile(excel_file) as workbook:
        if sheet_name not in workbook.sheet_names:
            return {}
        df_full = pd.read_excel(workbook, sheet_name=sheet_name, header=None)

    col0 = df_full.iloc[:, 0]
    col1 = df_full.iloc[:, 1]
    df_full = df_full.loc[col1.notna() & ~col0.astype(str).str.startswith("###")]
    if df_full.empty:
        return {}

    keys = df_full.iloc[:, 0].tolist()
    values = df_full.iloc[:, 1].tolist()

    def to_python(value):
        if pd.isna(value):
            return None
        if hasattr(value, "item"):
            try:
                return value.item()
            except AttributeError, RuntimeError, TypeError, ValueError:
                pass
        return value

    return {
        (key.strip() if isinstance(key, str) else key): to_python(value)
        for key, value in zip(keys, values)
    }