In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader
from datetime import datetime, timedelta

from data_processor import load_and_check_data, create_sequences, normalize_data
from models import CosmicRayDataset, lstm_model, train_model
from predictions import create_prediction_dates, predict_cosmic_ray_extended
from visualization import plot_comprehensive_results
from evaluation import calculate_metrics_and_stats, save_complete_results, save_metrics_to_txt

# 设置随机种子
torch.manual_seed(42)
np.random.seed(42)

In [2]:
# 1. 加载数据
solar_data, cosmic_data = load_and_check_data()

=== Data Alignment Debug ===
Solar data range: 1985-01-01 00:00:00 to 2033-01-01 00:00:00
Cosmic data range: 2011-05-20 00:00:00 to 2019-10-29 00:00:00
Total solar days: 17533
Total cosmic days before interpolation: 2824
Missing solar days: 0
Missing cosmic days: 261
First 5 missing cosmic dates: [Timestamp('2011-06-04 00:00:00'), Timestamp('2012-11-01 00:00:00'), Timestamp('2013-09-10 00:00:00'), Timestamp('2013-09-11 00:00:00'), Timestamp('2013-10-21 00:00:00')]
Duplicate solar dates: 0
Duplicate cosmic dates: 0
Interpolating cosmic data...
Total cosmic days after interpolation: 3085
Final data summary:
Solar data: 17533 days (1985-01-01 00:00:00 - 2033-01-01 00:00:00)
Cosmic data: 3085 days (2011-05-20 00:00:00 - 2019-10-29 00:00:00)


In [3]:
# 2. 创建序列, 大写 X 表示“特征矩阵”(是二维数组)
X, y, dates = create_sequences(solar_data, cosmic_data, sequence_length=5)


=== 创建 5 天序列（太阳参数+宇宙线流强） ===

处理样例: 输入 2011-05-20 00:00:00 到 2011-05-24 00:00:00, 输出 2011-05-25 00:00:00
  日期 2011-05-20 00:00:00 的数据:
 --------------- solar_mask : 0        False
1        False
2        False
3        False
4        False
         ...  
17528    False
17529    False
17530    False
17531    False
17532    False
Name: date, Length: 17533, dtype: bool -------------------- 
  solar_row: [  4.2  357.    57.18  -1.    35.  ]
  helium_flux: 48.02
  input_rows: [array([  4.2 , 357.  ,  57.18,  -1.  ,  35.  ,  48.02])]
  日期 2011-05-21 00:00:00 的数据:
 --------------- solar_mask : 0        False
1        False
2        False
3        False
4        False
         ...  
17528    False
17529    False
17530    False
17531    False
17532    False
Name: date, Length: 17533, dtype: bool -------------------- 
  solar_row: [  6.8 373.   56.9  -1.   36. ]
  helium_flux: 49.41
  input_rows: [array([  4.2 , 357.  ,  57.18,  -1.  ,  35.  ,  48.02]), array([  6.8 , 373.  ,  56.9 ,  -1.  ,  3

In [4]:
solar_data

Unnamed: 0,date,HMF,wind_speed,SSN,polarity,HCS_tilt
0,1985-01-01,6.200000,701.000000,0.000000,-1.0,11.350000
1,1985-01-02,5.700000,650.000000,0.000000,-1.0,11.250000
2,1985-01-03,5.500000,551.000000,0.000000,-1.0,11.150000
3,1985-01-04,5.300000,452.000000,0.000000,-1.0,11.060000
4,1985-01-05,6.900000,421.000000,0.000000,-1.0,10.960000
...,...,...,...,...,...,...
17528,2032-12-28,16.842999,420.444606,43.000000,-1.0,16.078885
17529,2032-12-29,11.534508,461.641845,42.132462,-1.0,16.272183
17530,2032-12-30,9.514021,513.857852,32.512169,-1.0,16.458131
17531,2032-12-31,7.201453,499.590628,29.286233,-1.0,16.642753
