データセット

data("starmine")
starmine %>% as_tibble %>% print()
## # A tibble: 53,328 x 23
##    date       id     symbol name   country sector sec   ind     size   smi
##  * <date>     <chr>  <chr>  <chr>  <fct>   <fct>  <fct> <fct>  <dbl> <dbl>
##  1 1995-01-31 00036~ AIR    Aar C~ USA     Shops  IND   AERDF -0.979    34
##  2 1995-01-31 00088~ ADCT   Adc T~ USA     HiTec  TEC   COMEQ -1.09     NA
##  3 1995-01-31 02081~ ALO    Alpha~ USA     Hlth   HTH   PHARM -0.376    NA
##  4 1995-01-31 00176~ AMR    Amr C~ USA     Other  IND   AIRLN  1.21     74
##  5 1995-01-31 00190~ ASTA   Ast R~ USA     HiTec  TEC   COMPT -0.248     5
##  6 1995-01-31 72348~ PNW    Pinna~ USA     Utils  UTL   ELUTL  0.665    84
##  7 1995-01-31 00282~ ABT    Abbot~ USA     Hlth   HTH   PHARM  2.39     70
##  8 1995-01-31 00531~ ADAC   Adac ~ USA     Hlth   HTH   HEQSP -1.85     57
##  9 1995-01-31 00687~ ADIA   Adia ~ USA     Other  IND   COMSS -0.326    NA
## 10 1995-01-31 00738~ ADVC   Advan~ USA     HiTec  IND   ELEQT -2.85     41
## # ... with 53,318 more rows, and 13 more variables: liq <dbl>,
## #   ret.0.1.m <dbl>, ret.0.6.m <dbl>, ret.1.0.m <dbl>, ret.6.0.m <dbl>,
## #   ret.12.0.m <dbl>, mn.dollar.volume.20.d <dbl>,
## #   md.dollar.volume.120.d <dbl>, cap.usd <dbl>, cap <dbl>, sales <dbl>,
## #   net.income <dbl>, common.equity <dbl>

関数

 funcExtractName

funcExtractName <- function(data, Factor,  Name, type = "Tile", tile = 5) {
    
    col_Factor <- enquo(Factor)
    col_Name   <- enquo(Name)
    
    if (type == "Tile"){
      result <- 
        data %>% 
          mutate(Tile = paste0("F", ntile(desc(!!col_Factor), 5))) %>% 
          group_by(Tile) %>% 
          summarise(FctName = first(!!col_Name)) %>% 
          ungroup()  
    } else if(type == "All"){
      result <- 
        data %>% 
          mutate(Tile = "All") %>% 
          group_by(Tile) %>% 
          summarise(FctName = first(!!col_Name)) %>% 
          ungroup()
    }
    
    return(result)
    }

 funcCountNames

funcCountNames <- function(data, Factor, type = "Tile", tile = 5) {
    
    col_Factor <- enquo(Factor)
    
    if (type == "Tile"){
      result <- 
        data %>% 
          mutate(Tile = paste0("F", ntile(desc(!!col_Factor), 5))) %>% 
          group_by(Tile) %>% 
          summarise(Count = n()) %>% 
          ungroup()  
    } else if(type == "All"){
      result <- 
        data %>% 
          mutate(Tile = "All") %>% 
          group_by(Tile) %>% 
          summarise(Count = n()) %>% 
          ungroup()
    }
    
    return(result)
    }

 funcCalcReturn

funcCalcReturn <- function(data, Return, Factor, type = "Tile", tile = 5) {
    
    col_Return <- enquo(Return)
    col_Factor <- enquo(Factor)
    
    if (type == "Tile"){
      result <- 
        data %>% 
          mutate(Tile = paste0("F", ntile(desc(!!col_Factor), 5))) %>% 
          group_by(Tile) %>% 
          summarise(Ret = mean(!!col_Return, na.rm = TRUE)) %>% 
          ungroup()  
    } else if(type == "All"){
      result <- 
        data %>% 
          mutate(Tile = "All") %>% 
          group_by(Tile) %>% 
          summarise(Ret = mean(!!col_Return, na.rm = TRUE)) %>% 
          ungroup()
    }
    
    return(result)
    }

 funcCalcExcess

funcCalcExcess <- function(data, Return, Factor, tile = 5) {
    
    col_Return <- enquo(Return)
    col_Factor <- enquo(Factor)
    
    result <- 
      data %>% 
        mutate(Tile = paste0("F", ntile(desc(!!col_Factor), 5)), 
               Ret_All = mean(!!col_Return, na.rm = TRUE)) %>% 
        group_by(Tile) %>% 
        summarise(Ret_Tile = mean(!!col_Return, na.rm = TRUE), 
                  Ret_All  = mean(Ret_All, na.rm = TRUE)) %>% 
        ungroup() %>% 
        mutate(Excess = Ret_Tile - Ret_All)
    
    return(result)
    }

 funcCalcHitRate

funcCalcHitRate <- function(data, Return, Factor, tile = 5) {
    
    col_Return <- enquo(Return)
    col_Factor <- enquo(Factor)
    
    result <- 
      data %>% 
        mutate(Tile = paste0("F", ntile(desc(!!col_Factor), 5)), 
               Ret_All = mean(!!col_Return, na.rm = TRUE), 
               HitFlg = ifelse(!!col_Return > Ret_All, 1, 0), 
               Count = n()) %>% 
        group_by(Tile) %>% 
        summarise(HitRate = sum(HitFlg, na.rm = TRUE) / mean(Count)) %>% 
        ungroup()
        
    
    return(result)
    }

 funcCalcIC

funcCalcIC <- function(data, Return, Factor, type = "Tile", tile = 5) {
    
    col_Return <- enquo(Return)
    col_Factor <- enquo(Factor)
    
    
    if (type == "Tile"){
      result <- 
        data %>% 
          mutate(Tile = paste0("F", ntile(desc(!!col_Factor), 5))) %>% 
          group_by(Tile) %>% 
          summarise(IC_Tile = cor(x = !!col_Return,  
                                  y = !!col_Factor, 
                                  use = "pairwise.complete.obs", 
                                  method = "spearman")) %>% 
          ungroup() 
    } else if(type == "All"){
      result <- 
        data %>% 
          mutate(Tile = "All") %>% 
          group_by(Tile) %>% 
          summarise(IC_Tile = cor(x = !!col_Return,  
                                  y = !!col_Factor, 
                                  use = "pairwise.complete.obs", 
                                  method = "spearman")) %>% 
          ungroup()
      }
    
    return(result)
    }

期間分析

準備

X <- starmine %>% as_tibble() %>%  mutate(rtn = ret.0.1.m)
X %>% colnames
##  [1] "date"                   "id"                    
##  [3] "symbol"                 "name"                  
##  [5] "country"                "sector"                
##  [7] "sec"                    "ind"                   
##  [9] "size"                   "smi"                   
## [11] "liq"                    "ret.0.1.m"             
## [13] "ret.0.6.m"              "ret.1.0.m"             
## [15] "ret.6.0.m"              "ret.12.0.m"            
## [17] "mn.dollar.volume.20.d"  "md.dollar.volume.120.d"
## [19] "cap.usd"                "cap"                   
## [21] "sales"                  "net.income"            
## [23] "common.equity"          "rtn"
X_BASE <- 
  X %>% 
    select(date, symbol, rtn)

X_FCT <- 
  X %>% 
    select_if(is.numeric) %>% 
    select(-rtn, -ret.0.1.m)

メイン

XX_ALL <- 
  X_BASE %>% 
    bind_cols(X_FCT) %>% 
    gather(key = "Factor", value = "Value", -(date:rtn)) %>% 
    split(.$Factor) %>% 
    map(group_by, date) %>% 
    map(nest) %>% 
    map(mutate, FACTOR = map(data, funcExtractName, Value, Factor, "All")) %>% 
    map(mutate, COUNT = map(data, funcCountNames, Value, "All", 5)) %>% 
    map(mutate, IC = map(data, funcCalcIC, rtn, Value, "All", 5)) %>% 
    map(mutate, RET = map(data, funcCalcReturn, rtn, Value, "All", 5))
## Warning: package 'bindrcpp' was built under R version 3.4.4

統合

XXX_ALL <- 
  XX_ALL %>% 
    map(select, -data) %>% 
    map(unnest) %>% 
    map(select, date, FctName, Tile, Count, Ret, IC_Tile) %>% 
    bind_rows()

XXX_ALL %>% print()
## # A tibble: 154 x 6
##    date       FctName Tile  Count       Ret IC_Tile
##    <date>     <chr>   <chr> <int>     <dbl>   <dbl>
##  1 1995-01-31 cap     All    4593 -0.000945 -0.427 
##  2 1995-02-28 cap     All    4528 -0.00233  -0.226 
##  3 1995-03-31 cap     All    4569  0.0212   -0.0542
##  4 1995-04-30 cap     All    4708 -0.00153  -0.284 
##  5 1995-05-31 cap     All    4724  0.0159   -0.357 
##  6 1995-06-30 cap     All    4748  0.0706    0.162 
##  7 1995-07-31 cap     All    4878  0.0339    0.202 
##  8 1995-08-31 cap     All    5092  0.0125   -0.0689
##  9 1995-09-30 cap     All    5185 -0.0307    0.132 
## 10 1995-10-31 cap     All    5109  0.0342    0.0347
## # ... with 144 more rows

チェック

# # Check1
# XX[[1]][3] %>% unnest()
# 
# # Check2
# XX[[1]][2] %>% unnest() %>% funcCalcHitRate(rtn, Value)
# 
# XX[[1]][2] %>% 
#   unnest() %>% 
#           mutate(Tile = paste0("F", ntile(desc(Value), 5)), 
#                Ret_All = mean(rtn, na.rm = TRUE), 
#                HitFlg = ifelse(rtn > Ret_All, 1, 0), 
#                Count = n()) %>% 
#         group_by(Tile) %>% 
#         summarise(HitRate = sum(HitFlg, na.rm = TRUE) / mean(Count)) %>% 
#         ungroup()

分位別分析

準備

X <- starmine %>% as_tibble() %>%  mutate(rtn = ret.0.1.m)
X %>% colnames
##  [1] "date"                   "id"                    
##  [3] "symbol"                 "name"                  
##  [5] "country"                "sector"                
##  [7] "sec"                    "ind"                   
##  [9] "size"                   "smi"                   
## [11] "liq"                    "ret.0.1.m"             
## [13] "ret.0.6.m"              "ret.1.0.m"             
## [15] "ret.6.0.m"              "ret.12.0.m"            
## [17] "mn.dollar.volume.20.d"  "md.dollar.volume.120.d"
## [19] "cap.usd"                "cap"                   
## [21] "sales"                  "net.income"            
## [23] "common.equity"          "rtn"
X_BASE <- 
  X %>% 
    select(date, symbol, rtn)

X_FCT <- 
  X %>% 
    select_if(is.numeric) %>% 
    select(-rtn, -ret.0.1.m)

メイン

XX_TILE <- 
  X_BASE %>% 
    bind_cols(X_FCT) %>% 
    gather(key = "Factor", value = "Value", -(date:rtn)) %>% 
    split(.$Factor) %>% 
    map(group_by, date) %>% 
    map(nest) %>% 
    map(mutate, FACTOR = map(data, funcExtractName, Value, Factor)) %>% 
    map(mutate, COUNT  = map(data, funcCountNames, Value, "Tile", 5)) %>% 
    map(mutate, IC     = map(data, funcCalcIC, rtn, Value, "Tile", 5)) %>% 
    map(mutate, RET    = map(data, funcCalcReturn, rtn, Value, "Tile", 5)) %>% 
    map(mutate, EXCESS = map(data, funcCalcExcess, rtn, Value, 5)) %>% 
    map(mutate, HR     = map(data, funcCalcHitRate, rtn, Value, 5)) 

統合

XXX_TILE <- 
  XX_TILE %>% 
    map(select, -data) %>% 
    map(unnest) %>% 
    map(select, date, FctName, Tile, Count, Ret, IC_Tile, Excess, HitRate) %>% 
    bind_rows()

XXX_TILE %>% print()
## # A tibble: 924 x 8
##    date       FctName Tile  Count      Ret   IC_Tile  Excess HitRate
##    <date>     <chr>   <chr> <int>    <dbl>     <dbl>   <dbl>   <dbl>
##  1 1995-01-31 cap     F1      912 -0.0831    0.107   -0.0822 0.0198 
##  2 1995-01-31 cap     F2      912 -0.0236   -0.480   -0.0226 0.0814 
##  3 1995-01-31 cap     F3      912  0.0386    0.00656  0.0396 0.146  
##  4 1995-01-31 cap     F4      912  0.0393    0.0628   0.0402 0.142  
##  5 1995-01-31 cap     F5      912  0.0247   -0.0411   0.0257 0.120  
##  6 1995-01-31 cap     FNA      33  0.0256   NA        0.0266 0.00392
##  7 1995-02-28 cap     F1      899 -0.0330    0.176   -0.0307 0.0603 
##  8 1995-02-28 cap     F2      899 -0.0242   -0.265   -0.0219 0.0839 
##  9 1995-02-28 cap     F3      899  0.00805  -0.0551   0.0104 0.109  
## 10 1995-02-28 cap     F4      899  0.0135   -0.0542   0.0158 0.107  
## # ... with 914 more rows

チェック

# # Check1
# XX[[1]][3] %>% unnest()
# 
# # Check2
# XX[[1]][2] %>% unnest() %>% funcCalcHitRate(rtn, Value)
# 
# XX[[1]][2] %>% 
#   unnest() %>% 
#           mutate(Tile = paste0("F", ntile(desc(Value), 5)), 
#                Ret_All = mean(rtn, na.rm = TRUE), 
#                HitFlg = ifelse(rtn > Ret_All, 1, 0), 
#                Count = n()) %>% 
#         group_by(Tile) %>% 
#         summarise(HitRate = sum(HitFlg, na.rm = TRUE) / mean(Count)) %>% 
#         ungroup()

ターンオーバー分析

関数

funcCalcTurnover

funcCountNames <- function(data, Period, Factor, tile = 5) {
    
    col_Period <- enquo(Period)
    col_Factor <- enquo(Factor)
    dateList <- data$col_Period %>% unique() %>% sort()
    
    for (i in 2:length(dateList)){
      
    }
    
    
    return(result)
    }

準備

X <- starmine %>% as_tibble() %>%  mutate(rtn = ret.0.1.m)
X %>% colnames

X_BASE <- 
  X %>% 
    select(date, symbol, rtn)

X_FCT <- 
  X %>% 
    select_if(is.numeric) %>% 
    select(-rtn, -ret.0.1.m)

メイン

XX_TILE <- 
  X_BASE %>% 
    bind_cols(X_FCT) %>% 
    gather(key = "Factor", value = "Value", -(date:rtn)) %>% 
    split(.$Factor) %>% 
    map(funcCalcTurnover)
inserted by FC2 system