# Generate Random Fourier Features (RFF)

This notebook is used for generating RFF from the preprocessed dataset. The generated RFF will be used as input for the model training.

In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv("../data/processed/processed_data.csv", index_col=0, parse_dates=True)
data


Unnamed: 0_level_0,dfy,infl,svar,de,lty,tms,tbl,dfr,dp,dy,ltr,ep,b/m,ntis,mr,R
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1929-12-31,9.137888,0.000000,0.996113,-9.573097,18.694918,0.613196,4.226658,-2.406510,-19.297847,-18.788349,-0.713609,-15.245790,4.647688,4.444831,-2.065459,0.680051
1930-01-31,8.744785,-0.902638,0.141551,-9.047697,19.317084,0.133301,4.772245,-4.743267,-19.925824,-19.125106,-0.459937,-16.004079,4.371450,3.996735,0.463390,0.277740
1930-02-28,8.262224,-0.912736,0.172508,-8.422173,19.120484,0.050347,4.769870,-4.435438,-20.320173,-19.749430,1.046632,-16.559002,4.351337,3.982363,1.002548,0.810971
1930-03-31,7.704174,-0.923055,0.152278,-7.642985,19.110364,0.665901,4.168087,-3.191948,-21.040377,-20.137156,0.680915,-17.334023,4.527337,3.567818,0.443443,-0.190234
1930-04-30,7.703659,-0.933578,0.198184,-6.673451,19.519002,0.629079,4.231462,-4.139428,-21.215671,-20.826976,-0.132625,-17.538603,4.694321,3.167620,1.234924,-0.153955
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-07-31,1.692906,2.603820,0.530717,-3.309497,1.031373,0.518760,0.726373,0.096812,-8.744586,-8.600267,1.086164,-7.308629,0.813942,-0.238172,-1.507158,-0.745281
2022-08-31,1.590550,-0.022398,0.557793,-3.260327,1.031579,0.209036,0.857020,-1.674411,-8.634899,-8.764461,-1.693465,-7.228964,0.848243,-0.378696,1.729037,-1.524433
2022-09-30,1.620710,-0.067223,0.822067,-3.211241,1.252524,0.301941,1.020395,-2.518655,-8.412818,-8.654442,-3.080439,-7.020210,0.930366,-0.439382,-0.743272,1.237698
2022-10-31,1.709853,0.408143,1.084012,-3.107040,1.416746,0.201277,1.213255,-1.439746,-8.553506,-8.428358,-0.558489,-7.265950,0.816268,-0.593365,-1.685470,0.792841


A pair of RRFs is generated as:

$$S_{i,t} = [\sin (A_{i,t}), \cos (A_{i,t})]', \qquad \omega_i' \sim i.i.d.N(0,I_{15\times15}) $$

Where:
- $A_{i,t} = \gamma \omega_i' G_t$, is a matrix of dimension $T \times P/2$ 
- $S_{i,t}$ is the $i$-th pair of RFF at time $t$.
- $\gamma$ is the scaling factor. [VoC]: We set $\gamma = 2$. Our results are generally insensitive to $\gamma$.
- $\omega_i$ is the random vector.
- $G_t$ is the input feature at time $t$.
- $I_{15\times15}$ is the identity matrix of size $15\times15$.

In VoC they generated P=12,000 RFFs (6,000 pairs). 

In [3]:
P=12000 # number of pairs
gamma=2
np.random.seed(59148)
omegas = np.random.normal(0, 1, (15, int(P/2)))

G=data.iloc[:,:-1]#.values
A = gamma * G @ omegas
A

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1929-12-31,-76.025069,51.553225,107.096224,-1.664750,-52.507116,-76.642613,12.939082,208.263801,-104.256291,6.650842,...,-133.551303,-26.558696,-1.001721,-13.307804,-26.776769,-72.673683,49.054726,-21.577520,42.364072,-24.254941
1930-01-31,-76.401171,62.124147,106.865531,-5.104752,-54.839019,-64.249694,17.957056,203.220363,-103.491001,0.425708,...,-139.111636,-40.615087,9.091220,-4.263736,-41.488080,-64.599262,59.819380,-19.695814,44.673851,-32.443135
1930-02-28,-71.422843,70.331588,102.497030,-7.183819,-57.913579,-62.749654,22.856436,207.589835,-103.694807,1.411263,...,-142.799771,-45.789078,7.543053,-9.097442,-40.605931,-63.353708,64.004877,-13.954080,42.052843,-38.879400
1930-03-31,-70.875206,70.480495,105.783608,-8.614270,-66.022678,-67.593277,24.628498,216.134324,-98.138321,-0.972042,...,-150.642744,-39.803998,8.462633,-15.616347,-39.842503,-67.271326,66.554033,-6.438972,41.171592,-39.820442
1930-04-30,-70.408566,73.988334,106.387203,-14.364443,-72.432901,-64.790610,28.438940,214.259887,-93.963973,-4.864813,...,-154.788007,-45.290939,14.312790,-14.988842,-46.351039,-65.938960,70.854166,-2.988126,44.580515,-38.775039
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-07-31,-15.067692,30.434859,36.395388,10.998972,-19.574810,-23.971757,31.167428,108.116572,-61.384995,-11.335094,...,-40.513537,-5.685132,-22.049006,4.514740,-2.867514,-20.240660,9.133981,26.904058,-12.492422,-16.491724
2022-08-31,-17.599318,30.544495,39.475086,12.441712,-27.013674,-17.042512,23.026924,100.147585,-51.929608,-23.711703,...,-46.975957,-17.020853,10.237193,17.538033,-30.920611,-11.643572,22.885775,28.006394,-12.576549,-18.367571
2022-09-30,-23.455243,26.293856,44.784438,11.725742,-27.042850,-22.743695,24.582046,103.248222,-46.583355,-25.516485,...,-47.820548,-7.414487,11.261369,15.455104,-28.405484,-15.084225,8.275550,26.550181,-6.483149,-13.488895
2022-10-31,-18.768517,27.326161,41.488806,13.965853,-22.491327,-22.310787,26.572296,107.769640,-47.592353,-17.910261,...,-47.061259,-4.925521,-4.698520,5.445550,-15.505253,-17.603327,4.038286,25.393221,-8.345631,-17.209315


In [4]:
A_sin = np.sin(A).rename(columns=lambda x: int(x*2))
A_cos = np.cos(A).rename(columns=lambda x: int(x*2+1))
A_combined = pd.concat([A_sin, A_cos], axis=1)
A_combined

Unnamed: 0_level_0,0,2,4,6,8,10,12,14,16,18,...,11981,11983,11985,11987,11989,11991,11993,11995,11997,11999
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1929-12-31,-0.586592,0.960207,0.278348,-0.995590,-0.783301,-0.947200,0.364142,0.794805,0.551143,0.359430,...,-0.033609,0.144336,0.538853,0.737501,-0.073166,-0.914287,0.352312,-0.915668,-0.047411,0.638845
1930-01-31,-0.843060,-0.650093,0.051358,0.924008,0.990375,-0.988325,-0.778643,0.832253,-0.180560,0.412965,...,0.635949,-0.974656,-0.944883,-0.433753,-0.797669,-0.195348,-0.991676,0.662790,0.770256,0.517210
1930-02-28,-0.740483,0.937908,0.922893,-0.783720,-0.978881,0.082106,-0.761281,0.242285,0.022248,0.987302,...,-0.142211,-0.233801,0.305943,-0.946902,-0.972567,0.866896,0.387366,0.182066,-0.351018,0.380658
1930-03-31,-0.982123,0.978992,-0.857578,-0.724637,0.049213,0.998798,-0.483145,0.593635,0.680869,-0.826038,...,0.988211,-0.509058,-0.571761,-0.995806,-0.541815,-0.269540,-0.836141,0.987890,-0.945754,-0.523143
1930-04-30,-0.961807,-0.987082,-0.414094,-0.974284,0.175359,-0.925683,-0.163864,0.590426,0.280012,0.988406,...,-0.660008,0.259162,-0.174721,-0.752385,-0.715923,-0.999405,-0.167538,-0.988247,0.826341,0.474917
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-07-31,-0.597413,-0.831091,-0.964548,-0.999994,-0.663326,0.917196,-0.245949,0.964203,0.992330,0.942915,...,-0.946956,0.826433,-0.998327,-0.196365,-0.962675,0.178726,-0.958016,-0.199179,0.997267,-0.708264
2022-08-31,0.949060,-0.765251,0.979025,-0.124336,-0.952292,0.972223,-0.860258,-0.374057,-0.995648,0.988807,...,-0.989078,-0.255057,-0.687747,0.256378,0.879819,0.603592,-0.625810,-0.964318,0.999948,0.886077
2022-09-30,0.994313,0.917248,0.718846,-0.745063,-0.942984,0.683500,-0.523280,0.411715,-0.514594,-0.374395,...,-0.766991,0.425482,0.262676,-0.968201,-0.991412,-0.811701,-0.409192,0.152756,0.980074,0.603810
2022-10-31,0.080950,0.812351,-0.603674,0.985362,0.479582,0.314223,0.991400,0.816596,0.451516,0.807142,...,-0.998039,0.211522,-0.013868,0.669222,-0.979524,0.318899,-0.624197,0.966266,-0.472080,-0.069389


In [5]:
S = A_combined.reindex(np.arange(P), axis=1,copy=True) # reorder columns by index
S

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,11990,11991,11992,11993,11994,11995,11996,11997,11998,11999
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1929-12-31,-0.586592,0.809882,0.960207,0.279289,0.278348,0.960480,-0.995590,-0.093816,-0.783301,-0.621642,...,0.405067,-0.914287,-0.935883,0.352312,-0.401934,-0.915668,-0.998875,-0.047411,0.769336,0.638845
1930-01-31,-0.843060,0.537820,-0.650093,0.759855,0.051358,0.998680,0.924008,0.382373,0.990375,-0.138407,...,-0.980734,-0.195348,-0.128761,-0.991676,-0.748806,0.662790,0.637735,0.770256,-0.855858,0.517210
1930-02-28,-0.740483,-0.672075,0.937908,0.346884,0.922893,-0.385056,-0.783720,0.621114,-0.978881,0.204433,...,-0.498489,0.866896,0.921926,0.387366,-0.983286,0.182066,-0.936369,-0.351018,-0.924716,0.380658
1930-03-31,-0.982123,-0.188241,0.978992,0.203899,-0.857578,0.514354,-0.724637,-0.689131,0.049213,-0.998788,...,0.962989,-0.269540,-0.548515,-0.836141,-0.155157,0.987890,-0.324882,-0.945754,-0.852245,-0.523143
1930-04-30,-0.961807,0.273730,-0.987082,0.160213,-0.414094,0.910234,-0.974284,-0.225324,0.175359,-0.984505,...,-0.034479,-0.999405,0.985866,-0.167538,-0.152865,-0.988247,0.563171,0.826341,-0.880031,0.474917
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-07-31,-0.597413,-0.801934,-0.831091,0.556136,-0.964548,0.263909,-0.999994,0.003398,-0.663326,0.748331,...,-0.983899,0.178726,0.286716,-0.958016,0.979963,-0.199179,0.073882,0.997267,0.705948,-0.708264
2022-08-31,0.949060,0.315096,-0.765251,0.643732,0.979025,-0.203741,-0.124336,0.992240,-0.952292,-0.305188,...,0.797294,0.603592,-0.779976,-0.625810,0.264745,-0.964318,-0.010178,0.999948,0.463539,0.886077
2022-09-30,0.994313,-0.106500,0.917248,0.398317,0.718846,0.695169,-0.745063,0.666994,-0.942984,-0.332839,...,-0.584073,-0.811701,0.912448,-0.409192,0.988264,0.152756,-0.198633,0.980074,-0.797129,0.603810
2022-10-31,0.080950,0.996718,0.812351,-0.583168,-0.603674,-0.797231,0.985362,0.170477,0.479582,-0.877497,...,0.947789,0.318899,-0.781267,-0.624197,0.257545,0.966266,-0.881556,-0.472080,0.997590,-0.069389


In [7]:
def make_RFF(data, P, gamma=2, seed=59148):
    np.random.seed(seed)
    omegas = np.random.normal(0, 1, (15, int(P/2)))
    G=data.iloc[:,:-1]
    A = gamma * G @ omegas
    A_sin = np.sin(A).rename(columns=lambda x: int(x*2)) 
    A_cos = np.cos(A).rename(columns=lambda x: int(x*2+1)) 
    A_combined = pd.concat([A_sin, A_cos], axis=1)
    S = A_combined.reindex(np.arange(P), axis=1,copy=True)  # reorder columns by index
    return S