data("starmine")
starmine %>% as_tibble %>% print()
## # A tibble: 53,328 x 23
## date id symbol name country sector sec ind size smi
## * <date> <chr> <chr> <chr> <fct> <fct> <fct> <fct> <dbl> <dbl>
## 1 1995-01-31 00036~ AIR Aar C~ USA Shops IND AERDF -0.979 34
## 2 1995-01-31 00088~ ADCT Adc T~ USA HiTec TEC COMEQ -1.09 NA
## 3 1995-01-31 02081~ ALO Alpha~ USA Hlth HTH PHARM -0.376 NA
## 4 1995-01-31 00176~ AMR Amr C~ USA Other IND AIRLN 1.21 74
## 5 1995-01-31 00190~ ASTA Ast R~ USA HiTec TEC COMPT -0.248 5
## 6 1995-01-31 72348~ PNW Pinna~ USA Utils UTL ELUTL 0.665 84
## 7 1995-01-31 00282~ ABT Abbot~ USA Hlth HTH PHARM 2.39 70
## 8 1995-01-31 00531~ ADAC Adac ~ USA Hlth HTH HEQSP -1.85 57
## 9 1995-01-31 00687~ ADIA Adia ~ USA Other IND COMSS -0.326 NA
## 10 1995-01-31 00738~ ADVC Advan~ USA HiTec IND ELEQT -2.85 41
## # ... with 53,318 more rows, and 13 more variables: liq <dbl>,
## # ret.0.1.m <dbl>, ret.0.6.m <dbl>, ret.1.0.m <dbl>, ret.6.0.m <dbl>,
## # ret.12.0.m <dbl>, mn.dollar.volume.20.d <dbl>,
## # md.dollar.volume.120.d <dbl>, cap.usd <dbl>, cap <dbl>, sales <dbl>,
## # net.income <dbl>, common.equity <dbl>
funcExtractName <- function(data, Factor, Name, type = "Tile", tile = 5) {
col_Factor <- enquo(Factor)
col_Name <- enquo(Name)
if (type == "Tile"){
result <-
data %>%
mutate(Tile = paste0("F", ntile(desc(!!col_Factor), 5))) %>%
group_by(Tile) %>%
summarise(FctName = first(!!col_Name)) %>%
ungroup()
} else if(type == "All"){
result <-
data %>%
mutate(Tile = "All") %>%
group_by(Tile) %>%
summarise(FctName = first(!!col_Name)) %>%
ungroup()
}
return(result)
}
funcCountNames <- function(data, Factor, type = "Tile", tile = 5) {
col_Factor <- enquo(Factor)
if (type == "Tile"){
result <-
data %>%
mutate(Tile = paste0("F", ntile(desc(!!col_Factor), 5))) %>%
group_by(Tile) %>%
summarise(Count = n()) %>%
ungroup()
} else if(type == "All"){
result <-
data %>%
mutate(Tile = "All") %>%
group_by(Tile) %>%
summarise(Count = n()) %>%
ungroup()
}
return(result)
}
funcCalcReturn <- function(data, Return, Factor, type = "Tile", tile = 5) {
col_Return <- enquo(Return)
col_Factor <- enquo(Factor)
if (type == "Tile"){
result <-
data %>%
mutate(Tile = paste0("F", ntile(desc(!!col_Factor), 5))) %>%
group_by(Tile) %>%
summarise(Ret = mean(!!col_Return, na.rm = TRUE)) %>%
ungroup()
} else if(type == "All"){
result <-
data %>%
mutate(Tile = "All") %>%
group_by(Tile) %>%
summarise(Ret = mean(!!col_Return, na.rm = TRUE)) %>%
ungroup()
}
return(result)
}
funcCalcExcess <- function(data, Return, Factor, tile = 5) {
col_Return <- enquo(Return)
col_Factor <- enquo(Factor)
result <-
data %>%
mutate(Tile = paste0("F", ntile(desc(!!col_Factor), 5)),
Ret_All = mean(!!col_Return, na.rm = TRUE)) %>%
group_by(Tile) %>%
summarise(Ret_Tile = mean(!!col_Return, na.rm = TRUE),
Ret_All = mean(Ret_All, na.rm = TRUE)) %>%
ungroup() %>%
mutate(Excess = Ret_Tile - Ret_All)
return(result)
}
funcCalcHitRate <- function(data, Return, Factor, tile = 5) {
col_Return <- enquo(Return)
col_Factor <- enquo(Factor)
result <-
data %>%
mutate(Tile = paste0("F", ntile(desc(!!col_Factor), 5)),
Ret_All = mean(!!col_Return, na.rm = TRUE),
HitFlg = ifelse(!!col_Return > Ret_All, 1, 0),
Count = n()) %>%
group_by(Tile) %>%
summarise(HitRate = sum(HitFlg, na.rm = TRUE) / mean(Count)) %>%
ungroup()
return(result)
}
funcCalcIC <- function(data, Return, Factor, type = "Tile", tile = 5) {
col_Return <- enquo(Return)
col_Factor <- enquo(Factor)
if (type == "Tile"){
result <-
data %>%
mutate(Tile = paste0("F", ntile(desc(!!col_Factor), 5))) %>%
group_by(Tile) %>%
summarise(IC_Tile = cor(x = !!col_Return,
y = !!col_Factor,
use = "pairwise.complete.obs",
method = "spearman")) %>%
ungroup()
} else if(type == "All"){
result <-
data %>%
mutate(Tile = "All") %>%
group_by(Tile) %>%
summarise(IC_Tile = cor(x = !!col_Return,
y = !!col_Factor,
use = "pairwise.complete.obs",
method = "spearman")) %>%
ungroup()
}
return(result)
}
X <- starmine %>% as_tibble() %>% mutate(rtn = ret.0.1.m)
X %>% colnames
## [1] "date" "id"
## [3] "symbol" "name"
## [5] "country" "sector"
## [7] "sec" "ind"
## [9] "size" "smi"
## [11] "liq" "ret.0.1.m"
## [13] "ret.0.6.m" "ret.1.0.m"
## [15] "ret.6.0.m" "ret.12.0.m"
## [17] "mn.dollar.volume.20.d" "md.dollar.volume.120.d"
## [19] "cap.usd" "cap"
## [21] "sales" "net.income"
## [23] "common.equity" "rtn"
X_BASE <-
X %>%
select(date, symbol, rtn)
X_FCT <-
X %>%
select_if(is.numeric) %>%
select(-rtn, -ret.0.1.m)
XX_ALL <-
X_BASE %>%
bind_cols(X_FCT) %>%
gather(key = "Factor", value = "Value", -(date:rtn)) %>%
split(.$Factor) %>%
map(group_by, date) %>%
map(nest) %>%
map(mutate, FACTOR = map(data, funcExtractName, Value, Factor, "All")) %>%
map(mutate, COUNT = map(data, funcCountNames, Value, "All", 5)) %>%
map(mutate, IC = map(data, funcCalcIC, rtn, Value, "All", 5)) %>%
map(mutate, RET = map(data, funcCalcReturn, rtn, Value, "All", 5))
## Warning: package 'bindrcpp' was built under R version 3.4.4
XXX_ALL <-
XX_ALL %>%
map(select, -data) %>%
map(unnest) %>%
map(select, date, FctName, Tile, Count, Ret, IC_Tile) %>%
bind_rows()
XXX_ALL %>% print()
## # A tibble: 154 x 6
## date FctName Tile Count Ret IC_Tile
## <date> <chr> <chr> <int> <dbl> <dbl>
## 1 1995-01-31 cap All 4593 -0.000945 -0.427
## 2 1995-02-28 cap All 4528 -0.00233 -0.226
## 3 1995-03-31 cap All 4569 0.0212 -0.0542
## 4 1995-04-30 cap All 4708 -0.00153 -0.284
## 5 1995-05-31 cap All 4724 0.0159 -0.357
## 6 1995-06-30 cap All 4748 0.0706 0.162
## 7 1995-07-31 cap All 4878 0.0339 0.202
## 8 1995-08-31 cap All 5092 0.0125 -0.0689
## 9 1995-09-30 cap All 5185 -0.0307 0.132
## 10 1995-10-31 cap All 5109 0.0342 0.0347
## # ... with 144 more rows
# # Check1
# XX[[1]][3] %>% unnest()
#
# # Check2
# XX[[1]][2] %>% unnest() %>% funcCalcHitRate(rtn, Value)
#
# XX[[1]][2] %>%
# unnest() %>%
# mutate(Tile = paste0("F", ntile(desc(Value), 5)),
# Ret_All = mean(rtn, na.rm = TRUE),
# HitFlg = ifelse(rtn > Ret_All, 1, 0),
# Count = n()) %>%
# group_by(Tile) %>%
# summarise(HitRate = sum(HitFlg, na.rm = TRUE) / mean(Count)) %>%
# ungroup()
X <- starmine %>% as_tibble() %>% mutate(rtn = ret.0.1.m)
X %>% colnames
## [1] "date" "id"
## [3] "symbol" "name"
## [5] "country" "sector"
## [7] "sec" "ind"
## [9] "size" "smi"
## [11] "liq" "ret.0.1.m"
## [13] "ret.0.6.m" "ret.1.0.m"
## [15] "ret.6.0.m" "ret.12.0.m"
## [17] "mn.dollar.volume.20.d" "md.dollar.volume.120.d"
## [19] "cap.usd" "cap"
## [21] "sales" "net.income"
## [23] "common.equity" "rtn"
X_BASE <-
X %>%
select(date, symbol, rtn)
X_FCT <-
X %>%
select_if(is.numeric) %>%
select(-rtn, -ret.0.1.m)
XX_TILE <-
X_BASE %>%
bind_cols(X_FCT) %>%
gather(key = "Factor", value = "Value", -(date:rtn)) %>%
split(.$Factor) %>%
map(group_by, date) %>%
map(nest) %>%
map(mutate, FACTOR = map(data, funcExtractName, Value, Factor)) %>%
map(mutate, COUNT = map(data, funcCountNames, Value, "Tile", 5)) %>%
map(mutate, IC = map(data, funcCalcIC, rtn, Value, "Tile", 5)) %>%
map(mutate, RET = map(data, funcCalcReturn, rtn, Value, "Tile", 5)) %>%
map(mutate, EXCESS = map(data, funcCalcExcess, rtn, Value, 5)) %>%
map(mutate, HR = map(data, funcCalcHitRate, rtn, Value, 5))
XXX_TILE <-
XX_TILE %>%
map(select, -data) %>%
map(unnest) %>%
map(select, date, FctName, Tile, Count, Ret, IC_Tile, Excess, HitRate) %>%
bind_rows()
XXX_TILE %>% print()
## # A tibble: 924 x 8
## date FctName Tile Count Ret IC_Tile Excess HitRate
## <date> <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 1995-01-31 cap F1 912 -0.0831 0.107 -0.0822 0.0198
## 2 1995-01-31 cap F2 912 -0.0236 -0.480 -0.0226 0.0814
## 3 1995-01-31 cap F3 912 0.0386 0.00656 0.0396 0.146
## 4 1995-01-31 cap F4 912 0.0393 0.0628 0.0402 0.142
## 5 1995-01-31 cap F5 912 0.0247 -0.0411 0.0257 0.120
## 6 1995-01-31 cap FNA 33 0.0256 NA 0.0266 0.00392
## 7 1995-02-28 cap F1 899 -0.0330 0.176 -0.0307 0.0603
## 8 1995-02-28 cap F2 899 -0.0242 -0.265 -0.0219 0.0839
## 9 1995-02-28 cap F3 899 0.00805 -0.0551 0.0104 0.109
## 10 1995-02-28 cap F4 899 0.0135 -0.0542 0.0158 0.107
## # ... with 914 more rows
# # Check1
# XX[[1]][3] %>% unnest()
#
# # Check2
# XX[[1]][2] %>% unnest() %>% funcCalcHitRate(rtn, Value)
#
# XX[[1]][2] %>%
# unnest() %>%
# mutate(Tile = paste0("F", ntile(desc(Value), 5)),
# Ret_All = mean(rtn, na.rm = TRUE),
# HitFlg = ifelse(rtn > Ret_All, 1, 0),
# Count = n()) %>%
# group_by(Tile) %>%
# summarise(HitRate = sum(HitFlg, na.rm = TRUE) / mean(Count)) %>%
# ungroup()
funcCountNames <- function(data, Period, Factor, tile = 5) {
col_Period <- enquo(Period)
col_Factor <- enquo(Factor)
dateList <- data$col_Period %>% unique() %>% sort()
for (i in 2:length(dateList)){
}
return(result)
}
X <- starmine %>% as_tibble() %>% mutate(rtn = ret.0.1.m)
X %>% colnames
X_BASE <-
X %>%
select(date, symbol, rtn)
X_FCT <-
X %>%
select_if(is.numeric) %>%
select(-rtn, -ret.0.1.m)
XX_TILE <-
X_BASE %>%
bind_cols(X_FCT) %>%
gather(key = "Factor", value = "Value", -(date:rtn)) %>%
split(.$Factor) %>%
map(funcCalcTurnover)