import pandas as pd import joblib from apscheduler.schedulers.blocking import BlockingScheduler from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import train_test_split # === 配置路径 === csv_path = 'C:\\Users\\Administrator\\Desktop\\defrost\\feedback_data.csv' # 你的csv model_save_path = "defrost_time_corrector.pkl" # 模型保存路径 # === 特征列定义 === feature_columns = [ "w", "rho_coal", "rho_ice", "C_coal", "C_ice", "L", "k_coal", "k_ice", "h", "T_air", "T_initial", "T_m", "a", "b", "c" ] # 定义定时任务的训练函数1 def train_and_save_model(): print("🔄 定时任务开始:重新训练模型...") # === 1. 读取CSV并预处理 === try: df = pd.read_csv(csv_path, parse_dates=["t_formula", "t_real"], encoding='utf-8') print(f"✅ 成功读取CSV文件,共{len(df)}条数据") except Exception as e: print(f"❌ 读取CSV失败: {e}") return # 确保字段类型正确(如果这两列存在) for col in ["material_name", "manufactured_goods"]: if col in df.columns: df[col] = df[col].astype(str) # 计算真实解冻时长(小时) df["t_real_hours"] = (df["t_real"] - df["t_formula"]).dt.total_seconds() / 3600 # 检查有没有缺失特征 missing_features = [col for col in feature_columns if col not in df.columns] if missing_features: print(f"❌ 缺少必要特征列: {missing_features}") return # === 2. 智能训练模型 === X = df[feature_columns] y = df["t_real_hours"] if len(X) >= 10: # 数据够多,做train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) print(f"📚 数据量 {len(X)},已划分训练集和测试集") else: # 数据少,直接全量训练 X_train, y_train = X, y X_test, y_test = None, None print(f"⚠️ 数据量太少({len(X)}条),直接全量训练") # 建立随机森林回归模型 model = RandomForestRegressor(n_estimators=100, random_state=42) model.fit(X_train, y_train) # 保存模型 joblib.dump(model, model_save_path) print(f"✅ 模型训练完成,已保存为 {model_save_path}") # === 启动定时任务调度器 === if __name__ == '__main__': # 先执行一次 train_and_save_model() # 然后设置每小时定时执行 scheduler = BlockingScheduler() scheduler.add_job(train_and_save_model, 'interval', hours=1) print("⏰ 启动定时任务调度器:每小时自动训练模型...") scheduler.start()