In [1]:
import pandas as pd
import numpy as np
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
from quantopian.pipeline.factors import Returns
from quantopian.research import prices, symbols
from quantopian.research import returns, symbols
from quantopian.pipeline.data.factset import Fundamentals
from quantopian.pipeline.data import EquityPricing
from quantopian.pipeline.factors import SimpleMovingAverage
from quantopian.pipeline.data.factset import EquityMetadata
from quantopian.pipeline.filters import StaticAssets
import alphalens as al
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.pipeline.data.sentdex import sentiment
from quantopian.pipeline.domain import US_EQUITIES
from quantopian.pipeline.data.psychsignal import stocktwits
from quantopian.pipeline.domain import US_EQUITIES
from quantopian.pipeline.data import USEquityPricing
from quantopian.pipeline.domain import CN_EQUITIES

PsychSignal data prior to May 2020 is still available for use on Quantopian;
however, the dataset has stopped updating. For more information see:
https://www.quantopian.com/posts/psychsignal-trader-mood-update



#### Dataset
Coverage: All supported countries on Quantopian.    
Data Frequency: Daily   
Update Frequency: Daily (updated overnight after each trading day).   
Timespan: 2004-01-01 to present.   
take US and China as example

In [2]:
from quantopian.pipeline import CustomFilter
class SidInList(CustomFilter):
    """
    Filter returns True for any SID included in parameter tuple passed at creation.
    """    
    inputs = []
    window_length = 1
    params = ('sid_list',)

    def compute(self, today, assets, out, sid_list):
        out[:] = np.in1d(assets, sid_list)  

In [3]:
ret = Returns(window_length=2)
security_type = EquityMetadata.security_type.latest

pipe = Pipeline(
    
    columns={
        'returns': ret,
        'is_ETF': security_type.eq('ETF_ETF')
    }
)
df = run_pipeline(pipe, '2015-01-01', '2015-01-01')
df.head()




Unnamed: 0,Unnamed: 1,is_ETF,returns
2015-01-02 00:00:00+00:00,Equity(2 [HWM]),False,-0.003155
2015-01-02 00:00:00+00:00,Equity(21 [AAME]),False,0.025445
2015-01-02 00:00:00+00:00,Equity(24 [AAPL]),False,-0.019098
2015-01-02 00:00:00+00:00,Equity(25 [HWM_PR]),False,-0.015281
2015-01-02 00:00:00+00:00,Equity(31 [ABAX]),False,-0.020207


In [4]:
df=pd.DataFrame(df.values,index=df.index.droplevel(0),columns=df.columns)
df=df.reset_index()
df=df.set_index("is_ETF")
country=df.loc[True]['index'].values.tolist()

In [None]:
#Query data and calculate momentum
price = pd.DataFrame()
ret = pd.DataFrame()
momentum = pd.DataFrame()
sentimentscore=pd.DataFrame()
sidlist=[]
for i in country:
    if prices(assets=symbols(i),start='2015-01-01',end='2015-01-02')[0]!=np.NaN:
        #price[i] = prices(assets=symbols(i),start=period_start,end=period_end)
        sidlist.append(i.sid)
        # The length of rolling window is 10 months
        #momentum[i] = price[i].rolling(200).mean()
    else:
        country.remove(i)
        next
sidlist=tuple(sidlist)
#momentum=momentum[210:]
#momentum=momentum.dropna(axis='columns')
#momentum.plot(title='10-month SMA Momentum')
period_start = '2010-01-01'
period_end = '2020-01-01'

In [None]:
# Pipeline definition
def  make_pipeline():    
 
    include_filter = SidInList(sid_list = sidlist) # SID for APPL and SPY

    sentiment_score = SimpleMovingAverage(
        inputs=[stocktwits.bull_minus_bear],
        window_length=210,
    )

    return Pipeline(
        columns={
            'returns_1d': Returns(window_length=2),
            'returns_1w': Returns(window_length=6),
            'returns_1m': Returns(window_length=22),
            'sma_10m': sentiment_score,
        },
        screen=include_filter
    )

In [None]:
# Execute pipeline over evaluation period
pipeline_output = run_pipeline(
    make_pipeline(),
    start_date=period_start,
    end_date=period_end
)

In [None]:
pipeline_output.head()

In [None]:
# Import prices function
from quantopian.research import prices

# Get list of unique assets from the pipeline output
asset_list = pipeline_output.index.get_level_values(1).unique() 

# Query pricing data for all assets present during
# evaluation period
asset_prices = prices(
    asset_list,
    start=period_start,
    end=period_end
)

In [None]:
# Import Alphalens
import alphalens as al

# Get asset forward returns and quantile classification
# based on sentiment scores
factor_data = al.utils.get_clean_factor_and_forward_returns(
    factor=pipeline_output['sma_10m'],
    prices=asset_prices,
    quantiles=5,
    periods=(1,5,10,),
    max_loss=0.4,
)

# Display first 5 rows
factor_data.head(5)

In [None]:
import alphalens
alphalens.tears.create_full_tear_sheet(factor_data, by_group=False);