import os
import os.path
import numpy as np
import datetime

import sys
sys.path.append("..")
import grading

try:
    import matplotlib.pyplot as plt
    %matplotlib inline
except:
    pass

try:
    import pandas as pd
    print("  pandas: %s"% pd.__version__)
except:
    print("Missing pandas package")

  pandas: 0.19.2


### ONLY FOR GRADING. DO NOT EDIT ### 
submissions=dict()
assignment_key="BBz-XobeEeegARIApDSa9g" 
all_parts=["nvDA9", "ykDlW", "rpYVm","oWy6l","MWWt7","3VyJD"]
### ONLY FOR GRADING. DO NOT EDIT ###


COURSERA_TOKEN = " "  # the key provided to the Student under his/her email on submission page
COURSERA_EMAIL =  " "  # the email


# load dataset
asset_prices = pd.read_csv(os.getcwd() + '/data/spx_holdings_and_spx_closeprice.csv',
                     date_parser=lambda dt: pd.to_datetime(dt, format='%Y-%m-%d'),
                     index_col = 0).dropna()
n_stocks_show = 12
print('Asset prices shape', asset_prices.shape)
asset_prices.iloc[:, :n_stocks_show].head()

Asset prices shape (3493, 419)


print('Last column contains SPX index prices:')
asset_prices.iloc[:, -10:].head()

Last column contains SPX index prices:


asset_returns = pd.DataFrame(data=np.zeros(shape=(len(asset_prices.index), asset_prices.shape[1])), 
                             columns=asset_prices.columns.values,
                             index=asset_prices.index)
normed_returns = asset_returns
### START CODE HERE ### (≈ 4 lines of code)
# normed_returns is pandas.DataFrame that should contain normalized returns
asset_returns = asset_prices.pct_change().dropna()
normed_returns = (asset_returns - asset_returns.mean()) / asset_returns.std()
### END CODE HERE ###


normed_returns.iloc[-5:, -10:].head()


### GRADED PART (DO NOT EDIT) ###
part_1=list(normed_returns.iloc[0,: 100].as_matrix().squeeze())
try:
    part1 = " ".join(map(repr, part_1))
except TypeError:
    part1 = repr(part_1)
submissions[all_parts[0]]=part1
grading.submit(COURSERA_EMAIL, COURSERA_TOKEN, assignment_key,all_parts[:1],all_parts,submissions)
normed_returns.iloc[0,: 100].as_matrix().squeeze()
### GRADED PART (DO NOT EDIT) ###

Submission successful, please check on the coursera grader page for the status

array([-0.19005437, -0.51371017, -2.71470869, -0.04977943,  2.18293305,
       -2.68413088, -0.21246093, -0.76699639, -1.5407309 , -1.80394666,
       -1.37299129, -0.99416907,  0.16136183,  0.72980366,  0.63485621,
       -0.72131907, -0.01302927, -0.80797756,  0.39923062, -0.75893259,
       -1.43444651, -1.12783867, -1.29385343, -0.44802859, -2.13973399,
        0.58949813, -0.87826364,  0.31428572, -1.08060243, -0.31367868,
        0.11819333, -1.8686777 , -1.87275168, -0.22608376, -0.04189121,
       -0.02136145, -0.60458719, -1.43087396, -1.16679677, -1.65594274,
       -0.50493241, -1.5196492 , -0.36359946, -0.58859176, -0.73289901,
        0.87654672, -3.12410596, -1.33977245, -1.33866029, -0.53051976,
       -1.28309222, -2.2171311 ,  1.75785074,  0.22815795, -0.48093428,
       -0.21160476, -1.39163378, -1.8907977 , -1.26523275, -0.90790361,
        1.20007622, -1.13783598, -1.06735573, -1.49029484,  1.65191927,
       -0.94841616,  3.36936561, -0.82344479,  1.76591258,  0.0414378 ,
       -2.73686257, -0.93544592,  0.02499427, -0.52726361, -0.34692757,
       -3.31744267, -1.10532688, -0.797565  , -0.45450193,  1.58036671,
       -1.05535759, -0.19732619, -0.85221605, -3.09447476, -2.41199636,
       -0.9392503 , -1.88367011, -2.73709342, -2.97077299, -0.52321504,
       -0.7113052 ,  2.02582123, -1.26160414, -3.24554378, -1.04361909,
       -0.21374985,  0.86653839, -0.53475603,  0.92652973, -0.51024788])


train_end = datetime.datetime(2012, 3, 26) 

df_train = None
df_test = None
df_raw_train = None
df_raw_test = None

df_train = normed_returns[normed_returns.index <= train_end].copy()
df_test = normed_returns[normed_returns.index > train_end].copy()

df_raw_train = asset_returns[asset_returns.index <= train_end].copy()
df_raw_test = asset_returns[asset_returns.index > train_end].copy()

print('Train dataset:', df_train.shape)
print('Test dataset:', df_test.shape)

Train dataset: (3055, 419)
Test dataset: (437, 419)


from sklearn.decomposition import PCA
import seaborn as sns

stock_tickers = normed_returns.columns.values[:-1]
assert 'SPX' not in stock_tickers, "By accident included SPX index"

n_tickers = len(stock_tickers)
pca = None
cov_matrix = pd.DataFrame(data=np.ones(shape=(n_tickers, n_tickers)), columns=stock_tickers)
cov_matrix_raw = cov_matrix

if df_train is not None and df_raw_train is not None:
    stock_tickers = asset_returns.columns.values[:-1]
    assert 'SPX' not in stock_tickers, "By accident included SPX index"

    ### START CODE HERE ### (≈ 2-3 lines of code)
    cov_matrix = df_train.loc[:, df_train.columns != 'SPX'].cov()    
    # computing PCA on S&P 500 stocks
    pca = PCA().fit(cov_matrix)
    # not normed covariance matrix
    cov_matrix_raw = df_raw_train.loc[:, df_raw_train.columns != 'SPX'].cov()  
    ### END CODE HERE ###
    
    cov_raw_df = pd.DataFrame({'Variance': np.diag(cov_matrix_raw)}, index=stock_tickers)    
    # cumulative variance explained
    var_threshold = 0.8
    var_explained = np.cumsum(pca.explained_variance_ratio_)
    num_comp = np.where(np.logical_not(var_explained < var_threshold))[0][0] + 1  # +1 due to zero based-arrays
    print('%d components explain %.2f%% of variance' %(num_comp, 100* var_threshold))

4 components explain 80.00% of variance


### GRADED PART (DO NOT EDIT) ###
part_2 = np.diag(cov_matrix[: 100])
try:
    part2 = " ".join(map(repr, part_2))
except TypeError:
    part2 = repr(part_2)
submissions[all_parts[1]]=part2
grading.submit(COURSERA_EMAIL, COURSERA_TOKEN, assignment_key,all_parts[:2],all_parts,submissions)
### GRADED PART (DO NOT EDIT) ###
np.diag(cov_matrix[: 100])

Submission successful, please check on the coursera grader page for the status

array([ 1.10446611,  1.09424087,  1.08190134,  1.10517006,  1.06941473,
        1.10597862,  1.11869287,  1.0839399 ,  1.09803084,  1.06590728,
        1.07798702,  1.107393  ,  1.12418337,  1.10412774,  1.07721126,
        1.11952577,  1.11507312,  1.10687469,  1.04827028,  1.10800935,
        1.10480045,  1.04297489,  1.07466613,  1.12510255,  1.10831513,
        1.09118222,  1.08418296,  1.02668336,  1.09808835,  1.08506552,
        1.08022595,  1.08116796,  1.09591114,  0.99807688,  1.11068716,
        1.01433366,  1.10360906,  1.06598755,  1.11003861,  1.0879927 ,
        1.08236593,  1.093903  ,  1.08489115,  1.1050359 ,  0.99850151,
        1.08347058,  1.1019318 ,  1.08932552,  1.08876911,  1.09560839,
        1.1027858 ,  1.09150807,  1.07067427,  1.1119615 ,  1.07304668,
        1.10625388,  1.10454709,  1.11531806,  1.06707655,  1.08925028,
        1.07207857,  1.08151718,  1.11539438,  1.09563297,  1.09915349,
        1.10098573,  1.09770417,  1.05315411,  1.08235287,  1.10420203,
        1.10765821,  1.08524638,  1.02531398,  1.10595498,  1.10337109,
        1.10913785,  1.08713617,  1.11825335,  1.11819787,  1.08122381,
        1.11686164,  1.0559472 ,  1.09614651,  1.10212167,  1.06172191,
        1.09017849,  1.09338258,  1.11186398,  1.04779305,  1.0920264 ,
        1.09189706,  1.10245445,  1.09369637,  1.09399401,  1.09920198,
        0.92356831,  1.0993287 ,  1.05898641,  1.08077773,  1.09900737])


if pca is not None:
    bar_width = 0.9
    n_asset = int((1 / 10) * normed_returns.shape[1])
    x_indx = np.arange(n_asset)
    fig, ax = plt.subplots()
    fig.set_size_inches(12, 4)
    # Eigenvalues are measured as percentage of explained variance.
    rects = ax.bar(x_indx, pca.explained_variance_ratio_[:n_asset], bar_width, color='deepskyblue')
    ax.set_xticks(x_indx + bar_width / 2)
    ax.set_xticklabels(list(range(n_asset)), rotation=45)
    ax.set_title('Percent variance explained')
    ax.legend((rects[0],), ('Percent variance explained by principal components',))


if pca is not None:
    projected = pca.fit_transform(cov_matrix)


# the first two eigen-portfolio weights# the fi 
# first component
# get the Principal components
pc_w = np.zeros(len(stock_tickers))
eigen_prtf1 = pd.DataFrame(data ={'weights': pc_w.squeeze()*100}, index = stock_tickers)
if pca is not None:
    pcs = pca.components_

    ### START CODE HERE ### (≈ 1-2 lines of code)
    # normalized to 1 
    pc_w = pcs[:, 0] / sum(pcs[:, 0])
    
    ### END CODE HERE ###
    
    eigen_prtf1 = pd.DataFrame(data ={'weights': pc_w.squeeze()*100}, index = stock_tickers)
    eigen_prtf1.sort_values(by=['weights'], ascending=False, inplace=True)
    print('Sum of weights of first eigen-portfolio: %.2f' % np.sum(eigen_prtf1))
    eigen_prtf1.plot(title='First eigen-portfolio weights', 
                     figsize=(12,6), 
                     xticks=range(0, len(stock_tickers),10), 
                     rot=45, 
                     linewidth=3)

Sum of weights of first eigen-portfolio: 100.00


### GRADED PART (DO NOT EDIT) ###
part_3 = list(eigen_prtf1.squeeze().values)
try:
    part3 = " ".join(map(repr, part_3))
except TypeError:
    part3 = repr(part_3)
submissions[all_parts[2]]=part3
grading.submit(COURSERA_EMAIL, COURSERA_TOKEN, assignment_key,all_parts[:3],all_parts,submissions)
eigen_prtf1.squeeze().values
### GRADED PART (DO NOT EDIT) ###

Submission successful, please check on the coursera grader page for the status

array([ 32.15627636,  30.933928  ,  25.50776408,  22.16747872,
        18.00892145,  17.79316587,  17.76782255,  16.81928891,
        16.18087374,  15.83795897,  15.80665872,  15.43552731,
        15.08220124,  14.72631873,  14.69273653,  14.01762469,
        13.94532135,  13.82765547,  13.79929467,  13.50835772,
        13.22735361,  13.07723085,  12.6101472 ,  12.49384414,
        12.49071201,  12.13649924,  11.93252738,  11.25794592,
        11.18841954,  11.09494289,  10.92729081,  10.82338093,
        10.81882089,  10.64506427,  10.42304627,  10.32449474,
        10.25693633,   9.99434957,   9.97161064,   9.96682527,
         9.93480531,   9.86884924,   9.80240134,   9.71265744,
         9.6591354 ,   9.62279386,   9.55800793,   9.33991524,
         9.31636337,   9.29680255,   9.17862032,   9.15001613,
         8.96845462,   8.91793278,   8.91342386,   8.89668537,
         8.8801725 ,   8.75420601,   8.7305932 ,   8.66536223,
         8.59827941,   8.56445122,   8.49320556,   8.34214463,
         8.31702119,   8.30595091,   8.2435913 ,   8.01729508,
         8.01116225,   7.95202673,   7.92653242,   7.89411659,
         7.82607966,   7.77944802,   7.73992992,   7.7104847 ,
         7.67636914,   7.65524306,   7.4606705 ,   7.1692165 ,
         7.16514262,   7.07709956,   6.88773062,   6.7971531 ,
         6.57188438,   6.51131209,   6.35928881,   6.34822256,
         6.29609156,   6.26489402,   5.9178115 ,   5.88005587,
         5.83164358,   5.75464697,   5.751713  ,   5.6780948 ,
         5.66534148,   5.63171516,   5.58108367,   5.52824121,
         5.48679532,   5.4295469 ,   5.38997364,   5.38548353,
         5.38021992,   5.33471658,   5.32436336,   5.12059851,
         5.11212257,   5.08363416,   4.94687645,   4.9419909 ,
         4.89848515,   4.87187476,   4.69646802,   4.61296522,
         4.56002204,   4.54205748,   4.52356808,   4.50206086,
         4.44237392,   4.39524874,   4.18702601,   4.11514071,
         4.10328863,   4.0795515 ,   3.98378154,   3.98242273,
         3.95138962,   3.91404548,   3.88759381,   3.8452876 ,
         3.806866  ,   3.78288618,   3.70341557,   3.6867927 ,
         3.66238738,   3.64811561,   3.61886522,   3.61576454,
         3.60244652,   3.53737689,   3.53415434,   3.45098448,
         3.41455925,   3.36513668,   3.3625489 ,   3.3191274 ,
         3.24158055,   3.19570771,   3.18552748,   3.12815354,
         3.05870406,   3.04515239,   3.02934119,   2.98816968,
         2.81980822,   2.77921913,   2.74139179,   2.6059568 ,
         2.52151711,   2.50986115,   2.4747736 ,   2.45968604,
         2.44460196,   2.33681242,   2.3221179 ,   2.29982764,
         2.22892247,   2.21395704,   2.19525634,   2.13304931,
         2.05499157,   1.99194993,   1.91187091,   1.812632  ,
         1.74310967,   1.71497934,   1.70530273,   1.5769675 ,
         1.54524789,   1.51493611,   1.42286668,   1.42179869,
         1.35881444,   1.30386817,   1.26820852,   1.22983483,
         1.18231373,   1.17456906,   1.11103599,   1.09741697,
         1.05291044,   1.03669352,   1.00895468,   1.00588621,
         0.97293487,   0.81706987,   0.81123455,   0.7368411 ,
         0.72750008,   0.5982855 ,   0.52125733,   0.51458962,
         0.4600874 ,   0.3615934 ,   0.34609816,   0.2924146 ,
         0.26727967,   0.26190696,   0.17730959,   0.15125563,
         0.10723449,   0.08654033,   0.0597015 ,  -0.0557358 ,
        -0.08692647,  -0.11206998,  -0.15121343,  -0.18790832,
        -0.21548668,  -0.21631904,  -0.23281801,  -0.24094782,
        -0.32807204,  -0.46561326,  -0.5320355 ,  -0.62555696,
        -0.64875481,  -0.67846195,  -0.68742399,  -0.7202875 ,
        -0.74365652,  -0.80234017,  -0.81926408,  -1.04241588,
        -1.05620274,  -1.21894655,  -1.27563693,  -1.29541343,
        -1.29812308,  -1.34256956,  -1.38818191,  -1.41697038,
        -1.51100807,  -1.59597119,  -1.60330609,  -1.65659672,
        -1.75410943,  -1.75917702,  -2.04606048,  -2.0465409 ,
        -2.10907307,  -2.14474455,  -2.22803484,  -2.22943001,
        -2.29888029,  -2.31460232,  -2.35367212,  -2.42836882,
        -2.49384416,  -2.51557327,  -2.55189638,  -2.57184377,
        -2.57892112,  -2.59479446,  -2.65391145,  -2.65594557,
        -2.67815035,  -2.67819484,  -2.79741082,  -2.8160212 ,
        -2.86855778,  -2.87291304,  -2.87832079,  -2.88938517,
        -2.94101885,  -2.95450215,  -2.95630914,  -2.97900869,
        -3.09023062,  -3.12574211,  -3.2053992 ,  -3.40985894,
        -3.43672823,  -3.48775542,  -3.50664183,  -3.57241786,
        -3.57407899,  -3.63658212,  -3.66220695,  -3.6978119 ,
        -3.71078064,  -3.7540065 ,  -3.7871498 ,  -3.78886105,
        -3.85146459,  -3.87653019,  -3.92668502,  -4.0896392 ,
        -4.24201923,  -4.24697029,  -4.25243107,  -4.49493243,
        -4.50392189,  -4.53222319,  -4.53712082,  -4.55964191,
        -4.57491027,  -4.67137914,  -4.71198908,  -4.71302006,
        -4.73363279,  -4.75173183,  -4.78598877,  -4.87544271,
        -4.94834241,  -4.96539629,  -4.99123477,  -4.99908917,
        -5.07711316,  -5.09577626,  -5.17424435,  -5.1843572 ,
        -5.18764847,  -5.22835566,  -5.23852723,  -5.30139067,
        -5.48553464,  -5.53526614,  -5.55627871,  -5.64957709,
        -5.69071538,  -5.69387325,  -5.83713928,  -5.92627383,
        -6.07701325,  -6.18955893,  -6.24396636,  -6.2490045 ,
        -6.33103449,  -6.35300848,  -6.78733658,  -6.83534616,
        -6.93671333,  -7.0955804 ,  -7.25835221,  -7.30938741,
        -7.45799232,  -7.58359386,  -7.63607691,  -7.71934206,
        -7.88876604,  -7.89500746,  -8.06174906,  -8.0678495 ,
        -8.14725187,  -8.34405988,  -8.44670927,  -8.82389528,
        -8.97050342,  -9.12295329,  -9.13890405,  -9.17737275,
        -9.18297553,  -9.24627595,  -9.2867669 ,  -9.39608929,
        -9.46310214,  -9.5180178 ,  -9.54215838,  -9.81277016,
        -9.81750221,  -9.81955017,  -9.90111576, -10.17353046,
       -10.29779865, -10.66189804, -10.68262901, -10.9296642 ,
       -10.94297036, -11.04055867, -11.04260035, -11.32856023,
       -11.61560743, -11.81154341, -11.86495788, -11.86567688,
       -11.98486517, -12.16750417, -12.41881119, -12.44057146,
       -12.63399896, -12.68060184, -12.68608292, -12.86036696,
       -12.8764651 , -12.96922261, -13.06289534, -13.07823812,
       -13.87902318, -14.31115145, -14.34953669, -14.61440766,
       -14.65627554, -14.99342457, -15.02195634, -15.37738891,
       -15.6738353 , -15.79055611, -15.93681142, -16.22246156,
       -16.77791626, -17.31352144, -17.60067165, -17.79578107,
       -21.00138056, -21.04210789])


pc_w = np.zeros(len(stock_tickers))
eigen_prtf2 = pd.DataFrame(data ={'weights': pc_w.squeeze()*100}, index = stock_tickers)

if pca is not None:
    pcs = pca.components_
    
    ### START CODE HERE ### (≈ 1-2 lines of code)
    # normalized to 1 
    pc_w = pcs[:, 1] / sum(pcs[:, 1])
    
    ### END CODE HERE ###

    eigen_prtf2 = pd.DataFrame(data ={'weights': pc_w.squeeze()*100}, index = stock_tickers)
    eigen_prtf2.sort_values(by=['weights'], ascending=False, inplace=True)
    print('Sum of weights of second eigen-portfolio: %.2f' % np.sum(eigen_prtf2))
    eigen_prtf2.plot(title='Second eigen-portfolio weights',
                     figsize=(12,6), 
                     xticks=range(0, len(stock_tickers),10), 
                     rot=45, 
                     linewidth=3)

Sum of weights of second eigen-portfolio: 100.00


### GRADED PART (DO NOT EDIT) ###
part_4 = list(eigen_prtf2.as_matrix().squeeze())
try:
    part4 = " ".join(map(repr, part_4))
except TypeError:
    part4 = repr(part_4)
submissions[all_parts[3]]=part4
grading.submit(COURSERA_EMAIL, COURSERA_TOKEN, assignment_key,all_parts[:4],all_parts,submissions)
eigen_prtf2.as_matrix().squeeze()
### GRADED PART (DO NOT EDIT) ###

Submission successful, please check on the coursera grader page for the status

array([ 27.53031336,  27.44303101,  26.92015668,  25.4310494 ,
        25.03044897,  24.12127012,  23.62909928,  23.13646227,
        21.73518084,  21.3899741 ,  21.10378786,  20.86975774,
        20.56977124,  20.26750658,  19.9710755 ,  19.41651496,
        18.77730475,  18.51765116,  18.49095368,  18.25276419,
        16.39168846,  16.25426255,  15.97969732,  15.96457002,
        15.63103436,  15.40186792,  15.34420783,  15.25021659,
        14.77661408,  14.73123119,  14.70789736,  14.63359636,
        14.31750245,  14.21219482,  14.08577939,  14.08395854,
        14.01547523,  13.91722912,  13.63007081,  13.53974902,
        13.13074358,  13.07617812,  13.07322375,  12.96274837,
        12.77003598,  12.73471508,  12.5667775 ,  12.47796756,
        12.29056859,  12.04541745,  11.96601592,  11.93996984,
        11.79293162,  11.50831821,  11.45618398,  11.45151495,
        11.20146656,  11.14579526,  11.1357662 ,  11.0945854 ,
        10.99734798,  10.91045933,  10.78097499,  10.74314901,
        10.72078183,  10.67801201,  10.50463204,  10.46149178,
        10.43159287,  10.26087759,  10.08394312,   9.9609939 ,
         9.86182923,   9.76396495,   9.63650882,   9.63004769,
         9.50466844,   9.49864483,   9.4957028 ,   9.4703881 ,
         9.3941701 ,   9.31171419,   9.14361585,   9.08169237,
         8.68245288,   8.65412772,   8.62048511,   8.52003903,
         8.37839441,   7.99133823,   7.79946679,   7.57260774,
         7.35288001,   7.33939776,   7.30067078,   6.93373317,
         6.86419914,   6.83250721,   6.82884836,   6.80372355,
         6.76185388,   6.75965892,   6.69159231,   6.68643616,
         6.64537488,   6.5516704 ,   6.53342164,   6.43976482,
         6.34857817,   6.32610806,   6.15118462,   6.12894593,
         6.01615886,   5.90917836,   5.66915694,   5.62308417,
         5.62034367,   5.58230684,   5.54576126,   5.42484044,
         5.41215222,   5.34383525,   5.25534923,   5.24655677,
         5.12282146,   5.12250845,   5.11237952,   5.11150717,
         5.01559552,   4.94211285,   4.91390062,   4.81946811,
         4.81749083,   4.7459741 ,   4.67075723,   4.61996263,
         4.45603814,   4.29761458,   4.27250743,   4.22510039,
         4.18212156,   4.18007646,   4.15940821,   4.03964224,
         3.92161175,   3.87527802,   3.87136317,   3.77679406,
         3.62925732,   3.55483188,   3.51472534,   3.42643802,
         3.37957108,   3.36669628,   3.17837066,   3.1545631 ,
         3.14759078,   3.14110355,   3.10089143,   3.04229264,
         2.8908103 ,   2.86709806,   2.83207955,   2.7405512 ,
         2.55098267,   2.54731302,   2.50924445,   2.38649765,
         2.34661498,   2.33828808,   2.3316228 ,   2.23586861,
         2.22452326,   2.2138903 ,   2.1540927 ,   2.0636746 ,
         1.79629483,   1.78096309,   1.77540758,   1.76643907,
         1.7487678 ,   1.74241004,   1.70905534,   1.66062031,
         1.60810452,   1.60708039,   1.49939442,   1.44505791,
         1.37853864,   1.3427175 ,   1.31523359,   1.17971819,
         1.0689228 ,   0.94968933,   0.91405518,   0.90345156,
         0.87726586,   0.84206071,   0.76114543,   0.66349731,
         0.64490199,   0.6056523 ,   0.59829791,   0.56974935,
         0.56062364,   0.51316831,   0.5072826 ,   0.49061942,
         0.44739042,   0.44575907,   0.35986181,   0.2244308 ,
         0.15289962,   0.10523638,   0.08442007,  -0.06922596,
        -0.08264087,  -0.13604063,  -0.13848862,  -0.14340122,
        -0.17758784,  -0.20937016,  -0.24110603,  -0.32821667,
        -0.39524778,  -0.48437771,  -0.54934386,  -0.57920837,
        -0.59195883,  -0.60826219,  -0.71402152,  -0.77244242,
        -0.83678351,  -0.99919211,  -1.19684113,  -1.25542595,
        -1.27328751,  -1.28626492,  -1.30765915,  -1.34999235,
        -1.38768349,  -1.44995527,  -1.49616718,  -1.59863266,
        -1.6151748 ,  -1.64604519,  -1.65687562,  -1.79219817,
        -1.82935763,  -1.87309535,  -1.90995093,  -1.93195537,
        -1.93406615,  -1.9950288 ,  -2.05814617,  -2.14045012,
        -2.17157674,  -2.18586761,  -2.25096975,  -2.31295378,
        -2.3848644 ,  -2.41325419,  -2.43382476,  -2.47888688,
        -2.72898462,  -2.74958125,  -2.86606204,  -2.92175855,
        -3.17222143,  -3.54024415,  -3.65383342,  -3.74044297,
        -3.77890186,  -3.7906216 ,  -3.82178724,  -3.83076534,
        -3.8808885 ,  -3.89014202,  -3.98923981,  -4.04239777,
        -4.06875243,  -4.22500533,  -4.31113533,  -4.44475693,
        -4.48514079,  -4.62357889,  -4.68715274,  -4.69055963,
        -4.71498031,  -4.7388593 ,  -4.78665783,  -4.79506315,
        -4.84080404,  -4.9463238 ,  -4.96863853,  -4.97880824,
        -4.98467908,  -5.13193765,  -5.13841775,  -5.22479417,
        -5.3299395 ,  -5.3509973 ,  -5.39164525,  -5.40541501,
        -5.44419132,  -5.56293087,  -5.6089496 ,  -5.711541  ,
        -5.77341293,  -5.79981169,  -5.96099501,  -5.99663691,
        -6.17214175,  -6.17715124,  -6.27317934,  -6.27963382,
        -6.30551099,  -6.33362791,  -6.33531977,  -6.35011287,
        -6.46015663,  -6.49909982,  -6.59631923,  -6.60860519,
        -6.9912739 ,  -7.02155377,  -7.11244457,  -7.33323449,
        -7.48903655,  -7.58651288,  -7.6091664 ,  -7.70481273,
        -7.74936346,  -7.78657882,  -7.80950439,  -7.87491615,
        -7.87498827,  -7.95252638,  -8.09514874,  -8.13069207,
        -8.18003935,  -8.21838164,  -8.25111808,  -8.49197299,
        -8.51751981,  -8.52169152,  -8.58301075,  -8.5875456 ,
        -8.9974824 ,  -9.03994749,  -9.09555791,  -9.15749336,
        -9.17420706,  -9.21076096,  -9.26583559,  -9.63636144,
        -9.66700172,  -9.75542795,  -9.8225125 ,  -9.87907527,
        -9.95561572, -10.13038866, -10.15211806, -10.15909807,
       -10.33947034, -10.51475891, -10.53577983, -10.66690814,
       -10.7585451 , -10.96655041, -11.02521309, -11.16919069,
       -11.19154488, -11.34578475, -11.62614273, -12.14767145,
       -12.23541092, -12.46824707, -12.51057085, -12.87699868,
       -13.2331274 , -13.35529279, -13.40052933, -13.70009205,
       -13.71781767, -14.02202044, -14.39984146, -14.58819737,
       -15.32871122, -15.59575095, -15.65026676, -15.87551601,
       -16.02614228, -16.10217363, -17.08467598, -17.26678038,
       -17.36490516, -17.78117739, -18.01583006, -18.38291172,
       -18.40663005, -18.76725963, -19.43166057, -20.05927005,
       -20.83294877, -21.14534698, -21.32065422, -21.4458864 ,
       -23.0226232 , -23.11954752, -24.63871717, -24.93168371,
       -25.46098173, -25.56237365, -27.06112175, -27.0785548 ,
       -28.39508025, -30.44951689])


def sharpe_ratio(ts_returns, periods_per_year=252):
    """
    sharpe_ratio - Calculates annualized return, annualized vol, and annualized sharpe ratio, 
                    where sharpe ratio is defined as annualized return divided by annualized volatility 
                    
    Arguments:
    ts_returns - pd.Series of returns of a single eigen portfolio
    
    Return:
    a tuple of three doubles: annualized return, volatility, and sharpe ratio
    """
    
    annualized_return = 0.
    annualized_vol = 0.
    annualized_sharpe = 0.
    
    ### START CODE HERE ### (≈ 4-5 lines of code)
    ### ...
    n_years = ts_returns.shape[0] / periods_per_year
    annualized_return = np.power(np.prod(1 + ts_returns),(1 / n_years)) - 1
    annualized_vol = ts_returns.std() * np.sqrt(periods_per_year)
    annualized_sharpe = annualized_return / annualized_vol
    ### END CODE HERE ###
    
    return annualized_return, annualized_vol, annualized_sharpe


if df_raw_test is not None:
    eigen_prtf1_returns = np.dot(df_raw_test.loc[:, eigen_prtf1.index], eigen_prtf1 / 100)
    eigen_prtf1_returns = pd.Series(eigen_prtf1_returns.squeeze(), index=df_test.index)
    er, vol, sharpe = sharpe_ratio(eigen_prtf1_returns)
    print('First eigen-portfolio:\nReturn = %.2f%%\nVolatility = %.2f%%\nSharpe = %.2f' % (er*100, vol*100, sharpe))
    year_frac = (eigen_prtf1_returns.index[-1] - eigen_prtf1_returns.index[0]).days / 252

    df_plot = pd.DataFrame({'PC1': eigen_prtf1_returns, 'SPX': df_raw_test.loc[:, 'SPX']}, index=df_test.index)
    np.cumprod(df_plot + 1).plot(title='Returns of the market-cap weighted index vs. First eigen-portfolio', 
                             figsize=(12,6), linewidth=3)

First eigen-portfolio:
Return = 41.39%
Volatility = 31.50%
Sharpe = 1.31


if df_raw_test is not None:
    eigen_prtf2_returns = np.dot(df_raw_test.loc[:, eigen_prtf2.index], eigen_prtf2 / 100)
    eigen_prtf2_returns = pd.Series(eigen_prtf2_returns.squeeze(), index=df_test.index)
    er, vol, sharpe = sharpe_ratio(eigen_prtf2_returns)
    print('Second eigen-portfolio:\nReturn = %.2f%%\nVolatility = %.2f%%\nSharpe = %.2f' % (er*100, vol*100, sharpe))

Second eigen-portfolio:
Return = 15.76%
Volatility = 42.84%
Sharpe = 0.37


n_portfolios = 120
annualized_ret = np.array([0.] * n_portfolios)
sharpe_metric = np.array([0.] * n_portfolios)
annualized_vol = np.array([0.] * n_portfolios)
idx_highest_sharpe = 0 # index into sharpe_metric which identifies a portfolio with rhe highest Sharpe ratio
    
if pca is not None:
    for ix in range(n_portfolios):
        
        ### START CODE HERE ### (≈ 4-5 lines of code)
        pc_w = pcs[:, ix] / sum(pcs[:, ix])
        eigen_prtfix = pd.DataFrame(data ={'weights': pc_w.squeeze()*100}, index = stock_tickers)
        eigen_prtfix.sort_values(by=['weights'], ascending=False, inplace=True)
        
        eigen_prtix_returns = np.dot(df_raw_test.loc[:, eigen_prtfix.index], eigen_prtfix / 100)
        eigen_prtix_returns = pd.Series(eigen_prtix_returns.squeeze(), index=df_test.index)
        er, vol, sharpe = sharpe_ratio(eigen_prtix_returns)
        annualized_ret[ix] = er
        annualized_vol[ix] = vol
        sharpe_metric[ix] = sharpe
    
        ### END CODE HERE ###
    
    
    # find portfolio with the highest Sharpe ratio
    ### START CODE HERE ### (≈ 2-3 lines of code)
    ### ...
    idx_highest_sharpe = np.nanargmax(sharpe_metric)
    ### END CODE HERE ###
    
    print('Eigen portfolio #%d with the highest Sharpe. Return %.2f%%, vol = %.2f%%, Sharpe = %.2f' % 
          (idx_highest_sharpe,
           annualized_ret[idx_highest_sharpe]*100, 
           annualized_vol[idx_highest_sharpe]*100, 
           sharpe_metric[idx_highest_sharpe]))

    fig, ax = plt.subplots()
    fig.set_size_inches(12, 4)
    ax.plot(sharpe_metric, linewidth=3)
    ax.set_title('Sharpe ratio of eigen-portfolios')
    ax.set_ylabel('Sharpe ratio')
    ax.set_xlabel('Portfolios')

/opt/conda/lib/python3.6/site-packages/ipykernel/__main__.py:21: RuntimeWarning: invalid value encountered in power

Eigen portfolio #42 with the highest Sharpe. Return 61.14%, vol = 22.80%, Sharpe = 2.68


results = pd.DataFrame(data={'Return': annualized_ret, 'Vol': annualized_vol, 'Sharpe': sharpe_metric})
results.dropna(inplace=True)
results.sort_values(by=['Sharpe'], ascending=False, inplace=True)
results.head(10)


### GRADED PART (DO NOT EDIT) ###
part_5 = list(results.iloc[:, 1].values.squeeze())
try:
    part5 = " ".join(map(repr, part_5))
except TypeError:
    part5 = repr(part_5)
submissions[all_parts[4]]=part5
grading.submit(COURSERA_EMAIL, COURSERA_TOKEN, assignment_key,all_parts[:5],all_parts,submissions)
results.iloc[:, 1].values.squeeze()
### GRADED PART (DO NOT EDIT) ###

Submission successful, please check on the coursera grader page for the status

array([ 2.68135386,  2.43134379,  2.39872442,  2.33792864,  2.30660114,
        2.22158907,  2.2167735 ,  2.19851244,  1.81300846,  1.79342192,
        1.76312136,  1.61426685,  1.31394555,  1.30788062,  1.26660256,
        1.25888541,  1.18687021,  1.17034684,  1.1646922 ,  1.06187713,
        1.05581668,  1.05503146,  1.04881373,  1.02165359,  1.01830316,
        1.00829296,  0.99498987,  0.9752889 ,  0.958144  ,  0.92726752,
        0.92483757,  0.90418802,  0.89253674,  0.88640631,  0.86855843,
        0.8428844 ,  0.79154999,  0.7280406 ,  0.68482477,  0.66834314,
        0.64094412,  0.62471951,  0.6200365 ,  0.60334151,  0.60021067,
        0.5947495 ,  0.5909932 ,  0.48427946,  0.47798503,  0.47444463,
        0.47279705,  0.46333553,  0.46119478,  0.4259212 ,  0.42363749,
        0.4154423 ,  0.41218842,  0.36779464,  0.30061885,  0.29467637,
        0.27703449,  0.25218081,  0.24689467,  0.23703113,  0.23105779,
        0.22907762,  0.20739231,  0.14234158,  0.1409318 ,  0.14041686,
        0.12992353,  0.11648127,  0.10386079,  0.06260278,  0.05659276,
        0.04381381,  0.03635583,  0.02334462,  0.01970139, -0.02079731,
       -0.02326461, -0.05238554, -0.07089928, -0.07540045, -0.08174287,
       -0.09489727, -0.09821993, -0.10092825, -0.14388953, -0.20465891,
       -0.21335143, -0.21535573, -0.21826577, -0.26268148, -0.28928421,
       -0.29044099, -0.33644553, -0.3399915 , -0.34757978, -0.34789059,
       -0.36090558, -0.41387712, -0.42602414, -0.42603886, -0.42931113,
       -0.50531371, -0.50667481, -0.5444063 , -0.56360796, -0.68829347,
       -0.69692501, -0.7294767 , -0.73094093, -0.82976161, -0.89658555,
       -1.09025839, -1.21303625])


### GRADED PART (DO NOT EDIT) ###
part6 = str(idx_highest_sharpe)
submissions[all_parts[5]]=part6
grading.submit(COURSERA_EMAIL, COURSERA_TOKEN, assignment_key,all_parts[:6],all_parts,submissions)
idx_highest_sharpe
### GRADED PART (DO NOT EDIT) ###

Submission successful, please check on the coursera grader page for the status

42

	A	AA	AAPL	ABC	ABT	ADBE	ADI	ADM	ADP	ADSK	AEE	AEP
2000-01-27	46.1112	78.9443	3.9286	4.5485	13.7898	15.6719	48.0313	10.8844	39.5477	8.1250	32.9375	33.5625
2000-01-28	45.8585	77.8245	3.6295	4.5485	14.2653	14.3906	47.7500	10.7143	38.5627	7.7188	32.3125	33.0000
2000-01-31	44.5952	78.0345	3.7054	4.3968	14.5730	13.7656	46.7500	10.6576	37.3807	7.6406	32.5625	33.5000
2000-02-01	47.8377	80.7640	3.5804	4.5333	14.7128	13.9688	49.0000	10.8844	37.9717	7.9219	32.5625	33.6875
2000-02-02	51.5434	83.4934	3.5290	4.5788	14.7968	15.3281	48.1250	10.6576	35.9032	7.9688	32.5625	33.6250

	STJ	SVU	SWY	TEG	TER	TGNA	THC	X	MAR.1	SPX
2000-01-27	5.5918	86.6178	26.3983	11.3873	65.8677	22.1921	60.9705	20.7086	12.2457	1398.56
2000-01-28	5.4520	82.4218	27.4137	11.2230	60.3487	21.7558	62.3032	20.1183	12.0742	1360.16
2000-01-31	5.5499	86.3181	28.2444	11.0862	62.1484	22.0533	60.6373	19.5772	12.1722	1394.46
2000-02-01	5.4240	83.0212	28.7982	11.1683	67.3674	22.2120	60.4708	19.5772	12.5151	1409.28
2000-02-02	5.3541	81.5226	28.6136	11.1956	68.9271	22.6483	62.4698	19.5281	12.3192	1409.12

	STJ	SVU	SWY	TEG	TER	TGNA	THC	X	MAR.1	SPX
2013-12-16	0.852722	0.965219	-1.168885	0.884751	0.095865	0.656639	0.180014	-0.238498	0.465047	0.467931
2013-12-17	0.275173	0.517307	-0.086106	-0.306213	0.589689	-0.118610	-0.549523	0.025268	-0.260013	-0.247921
2013-12-18	0.864485	0.509435	0.600714	1.210605	-0.190024	0.925461	0.756998	0.058428	0.952458	1.252703
2013-12-19	0.210069	0.399574	-0.100159	-0.757419	-0.208023	0.304913	-0.772205	1.544228	-0.167775	-0.056358
2013-12-20	0.827306	0.748420	0.372443	1.048113	0.264046	0.436874	0.320641	-0.740854	0.373717	0.353859

	Return	Sharpe	Vol
42	0.611437	2.681354	0.228033
24	1.032178	2.431344	0.424530
104	0.512464	2.398724	0.213640
97	1.425562	2.337929	0.609754
9	0.753548	2.306601	0.326692
94	0.502025	2.221589	0.225976
93	0.601081	2.216774	0.271151
2	0.453437	2.198512	0.206247
102	0.274142	1.813008	0.151208
118	0.874381	1.793422	0.487549

Eigen-portfolio construction using Principal Component Analysis (PCA)¶

PCA via sklearn.decomposition using S&P 500 Index stock data¶

About iPython Notebooks¶

Part 1 (Asset Returns Calculation)¶

Part 2 (PCA fitting)¶

Part 3 (Eigen-portfolios construction)¶

Part 4 (Compute performance of several eigen portfolios)¶