In [8]:
import pandas as pd
import numpy as np

In [18]:
data = pd.read_table('wind.txt', sep='\s+')

In [19]:
def fix_year(year):
    if year > 1989:
        return year - 100
    return year

data['Date'] = pd.to_datetime(data[['Yr', 'Mo', 'Dy']].apply(lambda x: fix_year(x['Yr']), axis=1).astype(str) + '-' +
                               data['Mo'].astype(str) + '-' +
                               data['Dy'].astype(str), format='%y-%m-%d')

data.set_index('Date', inplace=True)

In [20]:
# Step 5

data.index = pd.to_datetime(data.index)
data.index = data.index.astype('datetime64[ns]')

In [31]:
# Step 6

missing_values_per_location = data.isnull().sum()
print("Missing Values per Location:")
print(missing_values_per_location)

Missing Values per Location:
Yr     0
Mo     0
Dy     0
RPT    6
VAL    3
ROS    2
KIL    5
SHA    2
BIR    0
DUB    3
CLA    2
MUL    3
CLO    1
BEL    0
MAL    4
dtype: int64


In [32]:
# Step 7

non_missing_count = data.notnull().sum().sum()
print("\nTotal Non-Missing Values:", non_missing_count)


Total Non-Missing Values: 98579


In [33]:
# Step 8

mean_windspeed = data.mean().mean()
print("\nMean Windspeeds over all Locations and Times:", mean_windspeed)


Mean Windspeeds over all Locations and Times: 14.299199599246569


In [34]:
# Step 9

loc_stats = pd.DataFrame({
    'Min': data.min(),
    'Max': data.max(),
    'Mean': data.mean(),
    'Std': data.std()
})

print("\nLocation Statistics:")
print(loc_stats)


Location Statistics:
       Min    Max       Mean       Std
Yr   61.00  78.00  69.500304  5.188131
Mo    1.00  12.00   6.523274  3.448871
Dy    1.00  31.00  15.728628  8.800335
RPT   0.67  35.80  12.362987  5.618413
VAL   0.21  33.37  10.644314  5.267356
ROS   1.50  33.84  11.660526  5.008450
KIL   0.00  28.46   6.306468  3.605811
SHA   0.13  37.54  10.455834  4.936125
BIR   0.00  26.16   7.092254  3.968683
DUB   0.00  30.37   9.797343  4.977555
CLA   0.00  31.08   8.495053  4.499449
MUL   0.00  25.88   8.493590  4.166872
CLO   0.04  28.21   8.707332  4.503954
BEL   0.13  42.38  13.121007  5.835037
MAL   0.67  42.54  15.599079  6.699794


In [35]:
# Step 10

day_stats = pd.DataFrame({
    'Min': data.min(axis=1),
    'Max': data.max(axis=1),
    'Mean': data.mean(axis=1),
    'Std': data.std(axis=1)
})

print("\nDay Statistics:")
print(day_stats)


Day Statistics:
             Min   Max       Mean        Std
Date                                        
2061-01-01  1.00  61.0  14.728571  14.220976
2061-01-02  1.00  61.0  13.478571  14.406022
2061-01-03  1.00  61.0  13.790000  14.398500
2061-01-04  1.00  61.0   9.695333  14.555991
2061-01-05  1.00  61.0  12.970667  13.745564
...          ...   ...        ...        ...
1978-12-27  8.08  78.0  21.166667  17.459293
1978-12-28  5.00  78.0  19.986667  18.529258
1978-12-29  8.71  78.0  19.845333  17.292730
1978-12-30  9.13  78.0  20.294000  17.158682
1978-12-31  9.59  78.0  20.388667  17.233853

[6574 rows x 4 columns]


In [36]:
# Step 11

january_avg_per_location = data[data.index.month == 1].mean()

print("\nAverage Windspeed in January for Each Location:")
print(january_avg_per_location)


Average Windspeed in January for Each Location:
Yr     69.500000
Mo      1.000000
Dy     16.000000
RPT    14.847325
VAL    12.914560
ROS    13.299624
KIL     7.199498
SHA    11.667734
BIR     8.054839
DUB    11.819355
CLA     9.512047
MUL     9.543208
CLO    10.053566
BEL    14.550520
MAL    18.028763
dtype: float64


In [37]:
# Step 12

yearly_data = data.resample('Y').mean()

print("\nYearly Data:")
print(yearly_data)


Yearly Data:
              Yr        Mo         Dy        RPT        VAL        ROS  \
Date                                                                     
1969-12-31  69.0  6.526027  15.720548  11.166356   9.723699  10.902000   
1970-12-31  70.0  6.526027  15.720548  12.600329  10.726932  11.730247   
1971-12-31  71.0  6.526027  15.720548  11.273123   9.095178  11.088329   
1972-12-31  72.0  6.513661  15.756831  12.463962  10.561311  12.058333   
1973-12-31  73.0  6.526027  15.720548  11.828466  10.680493  10.680493   
...          ...       ...        ...        ...        ...        ...   
2064-12-31  64.0  6.513661  15.756831  12.363661  10.920164  12.104372   
2065-12-31  65.0  6.526027  15.720548  12.451370  11.075534  11.848767   
2066-12-31  66.0  6.526027  15.720548  13.461973  11.557205  12.020630   
2067-12-31  67.0  6.526027  15.720548  12.737151  10.990986  11.739397   
2068-12-31  68.0  6.513661  15.756831  11.835628  10.468197  11.409754   

                 KIL   

In [38]:
# Step 13

monthly_data = data.resample('M').mean()

print("\nMonthly Data:")
print(monthly_data)


Monthly Data:
              Yr    Mo    Dy        RPT        VAL        ROS       KIL  \
Date                                                                      
1969-01-31  69.0   1.0  16.0  12.824839  12.113871  11.403871  6.708710   
1969-02-28  69.0   2.0  14.5  13.504643  10.426786  14.240714  7.326786   
1969-03-31  69.0   3.0  16.0  10.398710   8.533226  12.277419  6.125161   
1969-04-30  69.0   4.0  15.5  12.591333   9.769333  10.828000  6.937333   
1969-05-31  69.0   5.0  16.0   9.419032   7.941935   9.298387  5.137097   
...          ...   ...   ...        ...        ...        ...       ...   
2068-08-31  68.0   8.0  16.0   9.231935   6.870323  13.075161  6.074839   
2068-09-30  68.0   9.0  15.5  12.137000  11.160667  12.008000  6.886000   
2068-10-31  68.0  10.0  16.0  12.521290  11.755484  11.235161  6.433871   
2068-11-30  68.0  11.0  15.5  14.604000  14.257333  14.422333  6.828000   
2068-12-31  68.0  12.0  16.0  13.090645  11.132903  13.237742  6.070000   

         

In [39]:
# Step 14

weekly_data = data.resample('W').mean()

print("\nWeekly Data:")
print(weekly_data)


Weekly Data:
              Yr         Mo         Dy        RPT        VAL        ROS  \
Date                                                                      
1969-01-05  69.0   1.000000   3.000000   5.884000   3.958000   6.758000   
1969-01-12  69.0   1.000000   9.000000  14.708571  12.521429  15.135714   
1969-01-19  69.0   1.000000  16.000000  15.030000  16.452857  10.778571   
1969-01-26  69.0   1.000000  23.000000  14.438571  14.267143  13.017143   
1969-02-02  69.0   1.285714  21.142857  13.510000  11.917143  10.702857   
...          ...        ...        ...        ...        ...        ...   
2068-12-09  68.0  12.000000   6.000000   9.251429  10.535714  12.772857   
2068-12-16  68.0  12.000000  13.000000  12.047143  10.464286  10.857143   
2068-12-23  68.0  12.000000  20.000000  18.220000  15.165714  15.737143   
2068-12-30  68.0  12.000000  27.000000  13.251429   9.327143  14.167143   
2069-01-06  68.0  12.000000  31.000000   9.130000   2.130000   7.380000   

          

In [40]:
# Step 15

weekly_stats = weekly_data.iloc[:52].agg(['min', 'max', 'mean', 'std'])

print("\nStatistics for the First 52 Weeks:")
print(weekly_stats)


Statistics for the First 52 Weeks:
        Yr         Mo         Dy        RPT        VAL        ROS       KIL  \
min   69.0   1.000000   3.000000   4.648571   3.571429   6.758000  2.752000   
max   69.0  12.000000  28.000000  15.875714  16.452857  20.135714  9.310000   
mean  69.0   6.450549  15.532967  11.093841   9.653918  10.775374  5.739874   
std    0.0   3.469110   7.144768   2.834148   3.004006   2.404362  1.579709   

            SHA        BIR        DUB        CLA        MUL        CLO  \
min    5.274286   2.075714   3.608571   2.387143   2.624286   2.967143   
max   14.368571  12.264286  14.870000  13.041429  12.238571  12.328571   
mean   9.832835   6.142511   8.509978   7.663615   7.881016   7.737747   
std    2.368833   1.979000   2.365333   2.313503   2.129633   2.080378   

            BEL        MAL  
min    6.477143   8.025714  
max   19.171429  25.631429  
mean  12.619093  15.701610  
std    3.082767   4.262298  
