import pandas as pd
import joblib
from sklearn.ensemble import RandomForestRegressor

# === 配置路径 ===
csv_path = 'C:\\Users\\Administrator\\Desktop\\defrost\\feedback_data.csv'  # 你的csv
model_save_path = "defrost_time_corrector.pkl"  # 模型保存路径

# === 特征列定义 ===
feature_columns = [
    "w", "rho_coal", "rho_ice", "C_coal", "C_ice", "L", "k_coal", "k_ice", "h",
    "T_air", "T_initial", "T_m", "a", "b", "c"
]

# === 1. 读取CSV并预处理 ===
try:
    df = pd.read_csv(csv_path, parse_dates=["t_formula", "t_real"], encoding='utf-8')
    print(f"✅ 成功读取CSV文件，共{len(df)}条数据")
except Exception as e:
    print(f"❌ 读取CSV失败: {e}")
    exit(1)

# 确保字段类型正确（如果这两列存在）
for col in ["material_name", "manufactured_goods"]:
    if col in df.columns:
        df[col] = df[col].astype(str)

# 计算真实解冻时长（小时）
df["t_real_hours"] = (df["t_real"] - df["t_formula"]).dt.total_seconds() / 3600

# 检查有没有缺失特征
missing_features = [col for col in feature_columns if col not in df.columns]
if missing_features:
    print(f"❌ 缺少必要特征列: {missing_features}")
    exit(1)

# === 2. 智能训练模型 ===
X = df[feature_columns]
y = df["t_real_hours"]

if len(X) >= 10:
    # 数据够多，做train_test_split
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    print(f"📚 数据量 {len(X)}，已划分训练集和测试集")
else:
    # 数据少，直接全量训练
    X_train, y_train = X, y
    X_test, y_test = None, None
    print(f"⚠️ 数据量太少（{len(X)}条），直接全量训练")

# 建立随机森林回归模型
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 保存模型
joblib.dump(model, model_save_path)
print(f"✅ 模型训练完成，已保存为 {model_save_path}")

# === 3. 预测最新一条数据 ===
new_sample = df.tail(1)  # 取最后一行

X_new = new_sample[feature_columns]
predicted_time = model.predict(X_new)[0]

# 把预测值写回DataFrame
df.loc[new_sample.index, "predicted_t_real_hours"] = predicted_time

# === 4. 保存带预测值的CSV ===
try:
    df.to_csv(csv_path, encoding='utf-8', index=False)
    print(f"✅ 最新数据预测完成，已更新到 {csv_path}")
except Exception as e:
    print(f"❌ 保存CSV失败: {e}")

# === 5. 打印最终预测结果 ===
print(f"\n📊 预测最后一条数据真实解冻时间为：{predicted_time:.2f} 小时")