import pandas as pd
import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# === 步骤1：读取CSV并预处理 ===
csv_path = 'C:\\Users\\Administrator\\Desktop\\defrost\\feedback_data.csv'
df = pd.read_csv(csv_path, parse_dates=["t_formula", "t_real"], encoding='gbk')

# 确保类型一致
df["material_name"] = df["material_name"].astype(str)
df["manufactured_goods"] = df["manufactured_goods"].astype(str)

# 计算真实解冻时长（单位：小时）
df["t_real_hours"] = (df["t_real"] - df["t_formula"]).dt.total_seconds() / 3600

# 特征列（不包括物料名称和产品名称）
feature_columns = [
    "w", "rho_coal", "rho_ice", "C_coal", "C_ice", "L", "k_coal", "k_ice", "h",
    "T_air", "T_initial", "T_m", "a", "b", "c"
]

# 模型输入和标签
X = df[feature_columns].copy()
y = df["t_real_hours"]

# === 步骤2：训练模型 ===
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# === 步骤3：保存模型 ===
joblib.dump(model, "defrost_time_corrector.pkl")
print("模型训练完成并已保存为 defrost_time_corrector.pkl")

# === 步骤4：测试一个新样本并判断是否为相同样本类型 ===
new_sample_info = {
    "material_name": "国产动力煤",
    "manufactured_goods": "龙家堡洗混煤-5206",
    "w": 12,
    "rho_coal": 3000,
    "rho_ice": 917,
    "C_coal": 800,
    "C_ice": 2100,
    "L": 334000,
    "k_coal": 20,
    "k_ice": 2.2,
    "h": 300,
    "T_air": 90,
    "T_initial": -20,
    "T_m": 0,
    "a": 13,
    "b": 2.72,
    "c": 1.6
}

# 构造 DataFrame
new_sample = pd.DataFrame([new_sample_info])

# 一致性判断
is_known = ((df["material_name"] == new_sample_info["material_name"]) &
            (df["manufactured_goods"] == new_sample_info["manufactured_goods"])).any()

# 只传入特征列用于模型预测
X_new = new_sample[feature_columns]
predicted_time = model.predict(X_new)[0]

print(f"\n📊 预测真实解冻时间: {predicted_time:.2f} 小时")
if is_known:
    print("该样本与历史数据中存在相同物料和制造品，可以认为是同一类样本。")
else:
    print("该样本是新的物料或产品组合，可能存在偏差，请注意验证。")