# Generate Random Fourier Features (RFF)

Here we inspect steps for generating Random Fourier Features (RFF) for a given dataset. Steps are then implemented in `make_rff` function.

In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv("../data/processed/processed_data.csv", index_col=0, parse_dates=True)
data

Unnamed: 0_level_0,dfy,infl,svar,de,lty,tms,tbl,dfr,dp,dy,ltr,ep,b/m,ntis,mr,R
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1929-12-31,9.137888,0.000000,0.996113,-9.573097,18.694918,0.613196,4.226658,2.206750,-19.297847,-18.788349,-0.713609,-15.245790,4.647688,4.444831,-2.065459,0.694734
1930-01-31,8.744785,-0.902638,0.141551,-9.047697,19.317084,0.133301,4.772245,0.913696,-19.925824,-19.125106,-0.459937,-16.004079,4.371450,3.996735,0.463390,0.274144
1930-02-28,8.262224,-0.912736,0.172508,-8.422173,19.120484,0.050347,4.769870,-0.453574,-20.320173,-19.749430,1.046632,-16.559002,4.351337,3.982363,1.002548,0.797222
1930-03-31,7.704174,-0.923055,0.152278,-7.642985,19.110364,0.665901,4.168087,0.442426,-21.040377,-20.137156,0.680915,-17.334023,4.527337,3.567818,0.443443,-0.185709
1930-04-30,7.703659,-0.933578,0.198184,-6.673451,19.519002,0.629079,4.231462,0.808742,-21.215671,-20.826976,-0.132625,-17.538603,4.694321,3.167620,1.234924,-0.160907
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-07-31,1.692906,2.603820,0.530717,-3.309497,1.031373,0.518760,0.726373,0.384379,-8.744586,-8.600267,1.086164,-7.308629,0.813942,-0.238172,-1.507158,-0.740901
2022-08-31,1.590550,-0.022398,0.557793,-3.260327,1.031579,0.209036,0.857020,0.907315,-8.634899,-8.764461,-1.693465,-7.228964,0.848243,-0.378696,1.729037,-1.516081
2022-09-30,1.620710,-0.067223,0.822067,-3.211241,1.252524,0.301941,1.020395,1.728163,-8.412818,-8.654442,-3.080439,-7.020210,0.930366,-0.439382,-0.743272,1.250263
2022-10-31,1.709853,0.408143,1.084012,-3.107040,1.416746,0.201277,1.213255,0.255439,-8.553506,-8.428358,-0.558489,-7.265950,0.816268,-0.593365,-1.685470,0.800499


A pair of RRFs is generated as:

$$S_{i,t} = [\sin (A_{i,t}), \cos (A_{i,t})]', \qquad \omega_i' \sim i.i.d.N(0,I_{15\times15}) $$

Where:
- $A_{i,t} = \gamma \omega_i' G_t$, is a matrix of dimension $T \times P/2$ 
- $S_{i,t}$ is the $i$-th pair of RFF at time $t$.
- $\gamma$ is the scaling factor. [VoC]: We set $\gamma = 2$. Our results are generally insensitive to $\gamma$.
- $\omega_i$ is the random vector.
- $G_t$ is the input feature at time $t$.
- $I_{15\times15}$ is the identity matrix of size $15\times15$.

In VoC they generated P=12,000 RFFs (6,000 pairs). 

In [3]:
P=12000 # number of pairs
gamma=2
np.random.seed(59148)
omegas = np.random.normal(0, 1, (15, int(P/2)))

G=data.iloc[:,:-1]#.values
A = gamma * G @ omegas
A

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1929-12-31,-72.345339,46.581247,107.532020,3.416479,-50.376291,-87.518686,4.443008,212.437006,-102.305903,13.623456,...,-139.694833,-18.538833,-4.358554,-27.151375,-14.372072,-79.366861,57.562738,-20.621100,36.424672,-18.328789
1930-01-31,-71.888939,56.027308,107.399922,1.126053,-52.226115,-77.586368,7.538832,208.337713,-101.099358,8.975805,...,-146.645077,-30.780811,4.974938,-21.239274,-26.276946,-72.806704,70.252242,-18.523014,37.390723,-25.176252
1930-02-28,-68.246740,66.040101,102.873181,-2.798034,-56.074390,-72.137169,15.523181,211.191873,-102.011360,7.429568,...,-148.102464,-38.866857,4.645655,-21.046307,-29.899008,-69.130822,71.348436,-13.128560,36.926341,-33.764334
1930-03-31,-67.976276,66.563519,106.126933,-4.611225,-64.343992,-76.161561,17.935202,219.422019,-96.601785,4.521056,...,-155.482680,-33.485866,5.818085,-26.522457,-30.069953,-72.544283,73.256733,-5.685494,36.492471,-35.151759
1930-04-30,-66.461697,68.655403,106.854637,-8.914330,-70.147383,-76.456257,19.326074,218.736056,-91.871992,2.613994,...,-161.377540,-36.688855,10.712260,-29.837420,-33.045795,-73.118045,79.979837,-1.962273,38.209931,-32.418665
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-07-31,-14.838317,30.124932,36.422553,11.315710,-19.441985,-24.649716,30.637826,108.376708,-61.263418,-10.900457,...,-40.896493,-5.185215,-22.258254,3.651803,-2.094269,-20.657879,9.664327,26.963676,-12.862653,-16.122318
2022-08-31,-15.540024,27.762019,39.718971,15.285328,-25.821196,-23.129107,18.272253,102.483042,-50.838110,-19.809608,...,-50.414070,-12.532684,8.358604,9.790733,-23.978550,-15.389287,27.647127,28.541637,-15.900425,-15.051109
2022-09-30,-20.067802,21.716813,45.185618,16.403357,-25.081281,-32.755857,16.760835,107.089940,-44.787891,-19.097722,...,-53.476082,-0.031660,8.171178,2.711160,-16.986121,-21.245749,16.107750,27.430631,-11.950768,-8.033472
2022-10-31,-17.416366,25.499162,41.648944,15.832998,-21.708336,-26.307300,23.450336,109.303122,-46.875665,-15.348110,...,-49.318756,-1.978549,-5.932019,0.358603,-10.947033,-20.062798,7.164632,25.744667,-10.528117,-15.031696


In [4]:
A_sin = np.sin(A).rename(columns=lambda x: int(x*2))
A_cos = np.cos(A).rename(columns=lambda x: int(x*2+1))
A_combined = pd.concat([A_sin, A_cos], axis=1)
A_combined

Unnamed: 0_level_0,0,2,4,6,8,10,12,14,16,18,...,11981,11983,11985,11987,11989,11991,11993,11995,11997,11999
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1929-12-31,0.088591,0.516401,0.657782,-0.271437,-0.110582,0.431278,-0.963936,-0.928849,-0.979235,0.870927,...,0.105842,0.952113,-0.346498,-0.433017,-0.232751,-0.677053,0.528410,-0.199402,0.292038,0.867438
1930-01-31,-0.359463,-0.498059,0.552842,0.902721,-0.924971,-0.815416,0.950750,0.837449,-0.538279,0.434040,...,-0.532049,0.805003,0.259543,-0.730410,0.413770,-0.852486,0.420134,0.947157,0.952824,0.999054
1930-02-28,0.763230,-0.066606,0.716922,-0.336840,0.456696,-0.119178,0.183733,-0.648160,-0.995917,0.911280,...,-0.901484,0.392227,-0.066685,-0.585872,0.053852,0.999875,-0.615170,0.846090,0.715979,-0.701579
1930-03-31,0.908116,-0.556422,-0.634389,0.994887,-0.998280,-0.691337,-0.792168,-0.470156,-0.708653,-0.981752,...,-0.026153,-0.478673,0.893777,0.180093,0.222934,-0.958913,-0.540216,0.826637,0.356160,-0.828575
1930-04-30,0.469082,-0.443621,0.040475,-0.488568,-0.858504,-0.871392,0.458688,-0.922855,0.693118,0.503460,...,-0.402715,0.531643,-0.279539,-0.007710,-0.059037,-0.651365,-0.130404,-0.381554,0.872344,0.537996
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-07-31,-0.764101,-0.961110,-0.957024,-0.949193,-0.558378,0.464461,-0.701928,0.999966,0.999997,0.995480,...,-0.998444,0.455404,-0.964539,-0.872642,-0.499891,-0.235299,-0.971445,-0.257215,0.956428,-0.915376
2022-08-31,-0.167151,0.490197,0.900854,0.410165,-0.635345,0.907779,-0.545766,0.928189,-0.541842,-0.819221,...,0.988981,0.999433,-0.483476,-0.933782,0.404658,-0.949651,-0.809670,-0.964487,-0.981536,-0.791917
2022-09-30,-0.938495,0.270907,0.933237,-0.640688,0.051438,-0.973469,-0.868848,0.272306,-0.721242,-0.245626,...,-0.997620,0.999499,-0.311904,-0.908785,-0.288479,-0.734817,-0.921144,-0.664701,0.816425,-0.178528
2022-10-31,0.990547,0.358276,-0.723072,-0.124709,-0.279058,-0.922520,-0.993778,0.607271,-0.245683,-0.352137,...,0.584343,-0.396547,0.938972,0.936388,-0.048522,0.349985,0.636035,0.818543,-0.450617,-0.779914


In [5]:
S = A_combined.reindex(np.arange(P), axis=1,copy=True) # reorder columns by index
S

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,11990,11991,11992,11993,11994,11995,11996,11997,11998,11999
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1929-12-31,0.088591,-0.996068,0.516401,-0.856347,0.657782,0.753208,-0.271437,-0.962456,-0.110582,0.993867,...,0.735934,-0.677053,0.848989,0.528410,-0.979918,-0.199402,-0.956407,0.292038,0.497545,0.867438
1930-01-31,-0.359463,-0.933159,-0.498059,0.867143,0.552842,0.833286,0.902721,0.430226,-0.924971,-0.380037,...,0.522750,-0.852486,0.907462,0.420134,0.320770,0.947157,-0.303524,0.952824,-0.043497,0.999054
1930-02-28,0.763230,0.646126,-0.066606,-0.997779,0.716922,-0.697153,-0.336840,-0.941562,0.456696,0.889623,...,-0.015783,0.999875,0.788395,-0.615170,-0.533040,0.846090,-0.698122,0.715979,-0.712592,-0.701579
1930-03-31,0.908116,0.418719,-0.556422,-0.830900,-0.634389,0.773014,0.994887,-0.100991,-0.998280,0.058624,...,0.283701,-0.958913,-0.841526,-0.540216,0.562735,0.826637,-0.934425,0.356160,0.559879,-0.828575
1930-04-30,0.469082,-0.883154,-0.443621,0.896214,0.040475,0.999181,-0.488568,-0.872526,-0.858504,0.512807,...,0.758765,-0.651365,-0.991461,-0.130404,-0.924347,-0.381554,0.488892,0.872344,-0.842947,0.537996
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-07-31,-0.764101,-0.645097,-0.961110,0.276165,-0.957024,0.290010,-0.949193,0.314695,-0.558378,0.829587,...,-0.971923,-0.235299,-0.237264,-0.971445,0.966354,-0.257215,-0.291967,0.956428,0.402599,-0.915376
2022-08-31,-0.167151,-0.985931,0.490197,-0.871612,0.900854,-0.434122,0.410165,-0.912011,-0.635345,0.772229,...,-0.313310,-0.949651,0.586885,-0.809670,-0.264131,-0.964487,0.191276,-0.981536,-0.610628,-0.791917
2022-09-30,-0.938495,0.345292,0.270907,-0.962605,0.933237,0.359260,-0.640688,-0.767802,0.051438,0.998676,...,-0.678265,-0.734817,-0.389222,-0.921144,0.747110,-0.664701,0.577451,0.816425,-0.983935,-0.178528
2022-10-31,0.990547,0.137173,0.358276,0.933616,-0.723072,-0.690773,-0.124709,-0.992193,-0.279058,-0.960274,...,-0.936755,0.349985,0.771660,0.636035,0.574445,0.818543,0.892717,-0.450617,-0.625886,-0.779914


In [6]:
def make_RFF(data, P, gamma=2, seed=59148):
    np.random.seed(seed)
    omegas = np.random.normal(0, 1, (15, int(P/2)))
    G=data.iloc[:,:-1]
    A = gamma * G @ omegas
    A_sin = np.sin(A).rename(columns=lambda x: int(x*2)) 
    A_cos = np.cos(A).rename(columns=lambda x: int(x*2+1)) 
    A_combined = pd.concat([A_sin, A_cos], axis=1)
    S = A_combined.reindex(np.arange(P), axis=1,copy=True)  # reorder columns by index
    return S