|
@@ -1,6 +1,8 @@
|
|
|
import pandas as pd
|
|
|
import joblib
|
|
|
+from apscheduler.schedulers.blocking import BlockingScheduler
|
|
|
from sklearn.ensemble import RandomForestRegressor
|
|
|
+from sklearn.model_selection import train_test_split
|
|
|
|
|
|
# === 配置路径 ===
|
|
|
csv_path = 'C:\\Users\\Administrator\\Desktop\\defrost\\feedback_data.csv' # 你的csv
|
|
@@ -12,66 +14,69 @@ feature_columns = [
|
|
|
"T_air", "T_initial", "T_m", "a", "b", "c"
|
|
|
]
|
|
|
|
|
|
-# === 1. 读取CSV并预处理 ===
|
|
|
-try:
|
|
|
- df = pd.read_csv(csv_path, parse_dates=["t_formula", "t_real"], encoding='utf-8')
|
|
|
- print(f"✅ 成功读取CSV文件,共{len(df)}条数据")
|
|
|
-except Exception as e:
|
|
|
- print(f"❌ 读取CSV失败: {e}")
|
|
|
- exit(1)
|
|
|
-
|
|
|
-# 确保字段类型正确(如果这两列存在)
|
|
|
-for col in ["material_name", "manufactured_goods"]:
|
|
|
- if col in df.columns:
|
|
|
- df[col] = df[col].astype(str)
|
|
|
-
|
|
|
-# 计算真实解冻时长(小时)
|
|
|
-df["t_real_hours"] = (df["t_real"] - df["t_formula"]).dt.total_seconds() / 3600
|
|
|
-
|
|
|
-# 检查有没有缺失特征
|
|
|
-missing_features = [col for col in feature_columns if col not in df.columns]
|
|
|
-if missing_features:
|
|
|
- print(f"❌ 缺少必要特征列: {missing_features}")
|
|
|
- exit(1)
|
|
|
-
|
|
|
-# === 2. 智能训练模型 ===
|
|
|
-X = df[feature_columns]
|
|
|
-y = df["t_real_hours"]
|
|
|
-
|
|
|
-if len(X) >= 10:
|
|
|
- # 数据够多,做train_test_split
|
|
|
- from sklearn.model_selection import train_test_split
|
|
|
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
|
- print(f"📚 数据量 {len(X)},已划分训练集和测试集")
|
|
|
-else:
|
|
|
- # 数据少,直接全量训练
|
|
|
- X_train, y_train = X, y
|
|
|
- X_test, y_test = None, None
|
|
|
- print(f"⚠️ 数据量太少({len(X)}条),直接全量训练")
|
|
|
-
|
|
|
-# 建立随机森林回归模型
|
|
|
-model = RandomForestRegressor(n_estimators=100, random_state=42)
|
|
|
-model.fit(X_train, y_train)
|
|
|
-
|
|
|
-# 保存模型
|
|
|
-joblib.dump(model, model_save_path)
|
|
|
-print(f"✅ 模型训练完成,已保存为 {model_save_path}")
|
|
|
-
|
|
|
-# === 3. 预测最新一条数据 ===
|
|
|
-new_sample = df.tail(1) # 取最后一行
|
|
|
-
|
|
|
-X_new = new_sample[feature_columns]
|
|
|
-predicted_time = model.predict(X_new)[0]
|
|
|
-
|
|
|
-# 把预测值写回DataFrame
|
|
|
-df.loc[new_sample.index, "predicted_t_real_hours"] = predicted_time
|
|
|
-
|
|
|
-# === 4. 保存带预测值的CSV ===
|
|
|
-try:
|
|
|
- df.to_csv(csv_path, encoding='utf-8', index=False)
|
|
|
- print(f"✅ 最新数据预测完成,已更新到 {csv_path}")
|
|
|
-except Exception as e:
|
|
|
- print(f"❌ 保存CSV失败: {e}")
|
|
|
-
|
|
|
-# === 5. 打印最终预测结果 ===
|
|
|
-print(f"\n📊 预测最后一条数据真实解冻时间为:{predicted_time:.2f} 小时")
|
|
|
+# 定义定时任务的训练函数
|
|
|
+def train_and_save_model():
|
|
|
+ print("🔄 定时任务开始:重新训练模型...")
|
|
|
+
|
|
|
+ # === 1. 读取CSV并预处理 ===
|
|
|
+ try:
|
|
|
+ df = pd.read_csv(csv_path, parse_dates=["t_formula", "t_real"], encoding='utf-8')
|
|
|
+ print(f"✅ 成功读取CSV文件,共{len(df)}条数据")
|
|
|
+ except Exception as e:
|
|
|
+ print(f"❌ 读取CSV失败: {e}")
|
|
|
+ return
|
|
|
+
|
|
|
+ # 确保字段类型正确(如果这两列存在)
|
|
|
+ for col in ["material_name", "manufactured_goods"]:
|
|
|
+ if col in df.columns:
|
|
|
+ df[col] = df[col].astype(str)
|
|
|
+
|
|
|
+ # 计算真实解冻时长(小时)
|
|
|
+ df["t_real_hours"] = (df["t_real"] - df["t_formula"]).dt.total_seconds() / 3600
|
|
|
+
|
|
|
+ # 检查有没有缺失特征
|
|
|
+ missing_features = [col for col in feature_columns if col not in df.columns]
|
|
|
+ if missing_features:
|
|
|
+ print(f"❌ 缺少必要特征列: {missing_features}")
|
|
|
+ return
|
|
|
+
|
|
|
+ # === 2. 智能训练模型 ===
|
|
|
+ X = df[feature_columns]
|
|
|
+ y = df["t_real_hours"]
|
|
|
+
|
|
|
+ if len(X) >= 10:
|
|
|
+ # 数据够多,做train_test_split
|
|
|
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
|
+ print(f"📚 数据量 {len(X)},已划分训练集和测试集")
|
|
|
+ else:
|
|
|
+ # 数据少,直接全量训练
|
|
|
+ X_train, y_train = X, y
|
|
|
+ X_test, y_test = None, None
|
|
|
+ print(f"⚠️ 数据量太少({len(X)}条),直接全量训练")
|
|
|
+
|
|
|
+ # 建立随机森林回归模型
|
|
|
+ model = RandomForestRegressor(n_estimators=100, random_state=42)
|
|
|
+ model.fit(X_train, y_train)
|
|
|
+
|
|
|
+ # 保存模型
|
|
|
+ joblib.dump(model, model_save_path)
|
|
|
+ print(f"✅ 模型训练完成,已保存为 {model_save_path}")
|
|
|
+
|
|
|
+ # 设置定时任务调度器
|
|
|
+ scheduler = BlockingScheduler()
|
|
|
+
|
|
|
+ # 每小时执行一次 train_and_save_model 函数
|
|
|
+ scheduler.add_job(train_and_save_model, 'interval', hours=1)
|
|
|
+
|
|
|
+ # 启动定时任务调度器
|
|
|
+ print("⏰ 启动定时任务调度器,每小时自动训练模型...")
|
|
|
+
|
|
|
+# 设置定时任务调度器
|
|
|
+scheduler = BlockingScheduler()
|
|
|
+
|
|
|
+# 每小时执行一次 train_and_save_model 函数
|
|
|
+scheduler.add_job(train_and_save_model, 'interval', hours=1)
|
|
|
+
|
|
|
+# 启动定时任务调度器
|
|
|
+print("⏰ 启动定时任务调度器,每小时自动训练模型...")
|
|
|
+scheduler.start()
|