process_data_for_paper.R

#  Copyright 2015, INSEAD
#  by T. Evgeniou, Enric Junque de Fortuny, Nick Nassuphis, Theo Vermaelen 
#  Dual licensed under the MIT or GPL Version 2 licenses.

# NOTE: These data files need to be available:

# 1) GENERATED BY create_bbissuers_data.R:
# "../FinanceData/created_projects_datasets/BUYBACKS.Rdata"

# 2) from http://www.saif.sjtu.edu.cn/facultylist/yyuan/Misp_Score.csv 
# "../FinanceData/rawdata_stambaugh_mispricing_index/Stambaugh_Mispricing_Score.csv" 
# which (see below) generates "../FinanceData/created_projects_datasets/StambaughBUYBACKS.Rdata"
# ""../FinanceData/created_projects_datasets/Buybacks.Institutional.number.Rdata" created below

##########################################################################################
# These are all the tables used in the .Rnw for the paper
##########################################################################################

rm(list=ls()) # Clean up the memory, if we want to rerun from scratch

source("helpers/lib_helpers.R", chdir=TRUE)
source("helpers/latex_code.R")
source("helpers/ff_industries_sic.R")
source("Paper_global_parameters.R")

# takes time to save and load, so we save only what is needed at the end.
initial_vars = ls(all = TRUE) 

##########################################################################################
# FILE LOCATIONS
##########################################################################################

buybacks_data_location = "../FinanceData/created_projects_datasets/BUYBACKS.Rdata"
Stambaugh_data_location = "../FinanceData/rawdata_stambaugh_mispricing_index/Stambaugh_Mispricing_Score.csv"
Stambaugh_buybacks_data_location = "../FinanceData/created_projects_datasets/StambaughBUYBACKS.Rdata"
mispricing_factors_data_location = "../FinanceData/rawdata_indices_and_factors/M4.csv"
q_factors_data_location = "../FinanceData/rawdata_indices_and_factors/qfactors.csv"
buybacks_institutional_data_location = "../FinanceData/created_projects_datasets/Buybacks.Institutional.number.Rdata"

##########################################################################################
# HELPER FUNCTIONS
##########################################################################################

# Assumes a date string formatted as yyyy-mm-dd
date_str_to_yyyy_mm <- function(date_str) {
  return(str_sub(date_str, start=1, end=7))
}

##########################################################################################
# GET THE DATA THAT WAS GENERATED BY create_bbissuers_data.R
##########################################################################################

load(buybacks_data_location)
# All the data filters are done in here - in case we want to change them for the paper
source("filter_data_for_paper.R")

dataset_names = c("buybacks", "issuers")
dataset_codes = c("bb", "iss")

# dataset_name can be either "buybacks" or "issuers"
add_dates <- function(dataset_name) {
  x = get_data(dataset_name)
  # We don't need this any more, can simplify to only get the dates needed... 
  # it's ok for now, as it is now slow
  x$DATASET$DatesMonth = create_dates_month(
    x$DATASET$SDC$Event.Date, rownames(x$Risk_Factors_Monthly)) 
  colnames(x$DATASET$DatesMonth) = x$DATASET$SDC$permno
  return(x)
}

BUYBACK_DATA = add_dates("buybacks")
ISSUERS_DATA = add_dates("issuers")

##########################################################################################

value_weights = list()
for (dataset_code in dataset_codes) {
  market_cap = get_data(dataset_code)$DATASET$CRSP$Market.Cap
  if (do.value.weight) {
    value_weights[[dataset_code]] = market_cap
  } else {
    value_weights[[dataset_code]] = rep(1, length(market_cap))
  }
}

##########################################################################################
# GET THE STAMBAUGH-YUAN MISPRICING DATA
##########################################################################################

# This is slow
if (do_Stambaugh_calculation) { 
  Stambaugh_data = read.csv(Stambaugh_data_location, sep=",")
  Stambaugh_data$yyyymm = paste(str_sub(Stambaugh_data$yyyymm, start=1, end=4),
                           str_sub(Stambaugh_data$yyyymm, start=5, end=6),
                           sep="-")

  get_stambaugh_data_for_dataset <- function(dataset_name, last_month=FALSE) {
    x = get_data(dataset_name)$DATASET$SDC
    if (last_month) {
      event_yyyymm = date_str_to_yyyy_mm(AddMonths(x$Event.Date[i], -1))
    } else {
      event_yyyymm = date_str_to_yyyy_mm(x$Event.Date[i])
    }
    sapply(seq_along(x$CUSIP), function(i) {
      getid=which(Stambaugh_data$permno == x$permno[i] & 
                  Stambaugh_data$yyyymm == event_yyyymm)
      ifelse(length(getid)== 1, Stambaugh_data$MISP[getid], NA)
    })
  }

  StambaughBB = get_stambaugh_data_for_dataset("buybacks", last_month=FALSE)
  StambaughBB_lastmonth = get_stambaugh_data_for_dataset("buybacks", last_month=TRUE)

  StambaughISS = get_stambaugh_data_for_dataset("issuers", last_month=FALSE)
  StambaughISS_lastmonth = get_stambaugh_data_for_dataset("issuers", last_month=TRUE)

  save(StambaughBB, StambaughISS, StambaughBB_lastmonth, StambaughISS_lastmonth,
       file=Stambaugh_buybacks_data_location)
}

load(Stambaugh_buybacks_data_location)
BUYBACK_DATA$DATASET$Stambaugh = StambaughBB_lastmonth
ISSUERS_DATA$DATASET$Stambaugh = StambaughISS_lastmonth

##########################################################################################
# THE RISK FACTORS
##########################################################################################

Risk_Factors_Monthly = BUYBACK_DATA$Risk_Factors_Monthly
Market_Monthly = BUYBACK_DATA$Market_Monthly

# Mispricing factors
if (use_mispricing_factors) { 
  misprice_factors = read.csv(mispricing_factors_data_location, sep=",", dec=".")
  factor_dates = as.Date(paste0(misprice_factors$YYYYMM, "01"), format="%Y%m%d")
  rownames(misprice_factors) = as.character(factor_dates)
  # Get only the factor columns, i.e. exclude the YYYYMM column
  misprice_factors = misprice_factors[, c("MKTRF", "SMB", "MGMT", "PERF", "RF")]
  use_only = which(date_str_to_yyyy_mm(rownames(misprice_factors)) %in%
                   date_str_to_yyyy_mm(rownames(Risk_Factors_Monthly)))
  misprice_factors = misprice_factors[use_only,]
  # This line does not seem to be doing anything
  # sum(str_sub(rownames(misprice_factors), start=1, end=7) != 
  #   str_sub(rownames(Risk_Factors_Monthly), start=1, end=7))
  rownames(misprice_factors) = rownames(Risk_Factors_Monthly)
  # This line does not seem to be doing anything
  # max(abs(misprice_factors$MKTRF[(misprice_factors$MKTRF != Risk_Factors_Monthly$Delta)]-
  #   Risk_Factors_Monthly$Delta[(misprice_factors$MKTRF != Risk_Factors_Monthly$Delta)]))
  names(misprice_factors)[which(names(misprice_factors) == "SMB")] = "SMB2"
  misprice_factors = cbind(misprice_factors, 
                           Risk_Factors_Monthly[, c("HML", "RMW", "CMA", "SMB")])
  names(misprice_factors)[which(names(misprice_factors) == "MKTRF")] = "Delta"
  Risk_Factors_Monthly = misprice_factors

  three_factor_model="(ri - RF) ~ Delta + SMB2 + MGMT + PERF"
  five_factor_model = "(ri - RF) ~ Delta + SMB + HML + RMW + CMA"
  
  misprice_factors_irats_MKT_HML_ME_IA_ROE = car_table(
    BUYBACK_DATA$DATASET$returns_by_event_monthly, 
    BUYBACK_DATA$DATASET$SDC$Event.Date, 
    Risk_Factors_Monthly, 
    formula_used=three_factor_model)$results
  misprice_factors_irats_MKT_HML_ME_IA_ROE = 
    misprice_factors_irats_MKT_HML_ME_IA_ROE[reported_times,]
  
} else {
  three_factor_model="(ri - RF) ~ Delta + SMB + HML"
  five_factor_model = "(ri - RF) ~ Delta + SMB + HML + RMW + CMA"
}

# q-factors test
if (use_q_factors) { 
  qfactors = read.csv(q_factors_data_location, sep=";", dec=",")
  rownames(qfactors) = paste(qfactors[, "Year"], sprintf("%02d", qfactors[, "Month"]), sep="-")
  use_only = which(rownames(qfactors) %in% date_str_to_yyyy_mm(rownames(Risk_Factors_Monthly)))
  qfactors = qfactors[use_only,]
  qfactors = qfactors[, c("MKT", "ME", "I.A", "ROE")]
  # If read.csv is used with dec="," this line is not needed
  # qfactors = apply(qfactors, 2, function(r) as.numeric(str_replace(r,",",".")))
  qfactors = qfactors / 100
  rownames(qfactors) = rownames(Risk_Factors_Monthly)[
    match(rownames(qfactors), date_str_to_yyyy_mm(rownames(Risk_Factors_Monthly)))]
  qfactors = cbind(qfactors,
                   Risk_Factors_Monthly[rownames(qfactors), c("SMB", "HML","RF","RMW","CMA")])
  Risk_Factors_Monthly = qfactors
  formula_used="(ri - RF) ~ MKT + HML + ME + I.A + ROE"
  qfactors_irats_MKT_HML_ME_IA_ROE = car_table(
    BUYBACK_DATA$DATASET$returns_by_event_monthly, 
    BUYBACK_DATA$DATASET$SDC$Event.Date, 
    qfactors, 
    formula_used=formula_used)$results
  qfactors_irats_MKT_HML_ME_IA_ROE = qfactors_irats_MKT_HML_ME_IA_ROE[reported_times,]
}

##########################################################################################
# PREPARE ALL VARIABLES

# Some event categories (all T/F vectors)
# Common event catagories for both buybacks and issuers
eu_index = list()
Idiosyncr_events = list()
VOL_events = list()
company_subset = list()
stam_events = list()
for (code in c("bb", "iss")) {
  data = get_data(code)
  rsq_score = data$DATASET$CRSP$Rsq_score
  Idiosyncr_events[[code]]$highidio = rsq_score < quantile(rsq_score, quantile_simple)
  Idiosyncr_events[[code]]$lowidio = rsq_score > quantile(rsq_score, 1 - quantile_simple)
  rm(rsq_score)

  pre_vol_score = data$DATASET$CRSP$pre_vol_Score
  VOL_events[[code]]$highvol = pre_vol_score > quantile(pre_vol_score, 1 - quantile_simple)
  VOL_events[[code]]$lowvol = pre_vol_score < quantile(pre_vol_score, quantile_simple)
  rm(pre_vol_score)

  val_ind = data$Valuation_Index
  company_subset[[code]]$undervalued = val_ind > quantile(val_ind, 1 - quantile_simple)
  company_subset[[code]]$overvalued = val_ind < quantile(val_ind, quantile_simple)
  rm(val_ind)

  stam = data$DATASET$Stambaugh
  stam_events[[code]]$high = !is.na(stam) & (stam > quantile(stam[!is.na(stam)], 1 - quantile_simple))
  stam_events[[code]]$low = !is.na(stam) & (stam < quantile(stam[!is.na(stam)], quantile_simple))
  rm(stam)

  eu_index[[code]] = sapply(1:length(data$DATASET$SDC$Event.Date), function(i) {
  ifelse(Idiosyncr_events[[code]]$highidio[i], 2, ifelse(Idiosyncr_events[[code]]$lowidio[i], 0, 1)) +
    ifelse(VOL_events[[code]]$highvol[i], 2, ifelse(VOL_events[[code]]$lowvol[i], 0, 1)) +
    ifelse(company_subset[[code]]$undervalued[i], 2, ifelse(company_subset[[code]]$overvalued[i], 0, 1))
  })
}

lev_events = list()
lt_lte = BUYBACK_DATA$DATASET$CRSP$leverage_lt_over_lt_plus_e
lev_events$bb$high = scrub(lt_lte) > quantile(lt_lte[!is.na(lt_lte)], 1 - quantile_simple)  & !is.na(lt_lte)
lev_events$bb$low = scrub(lt_lte) < quantile(lt_lte[!is.na(lt_lte)], quantile_simple) & !is.na(lt_lte)
rm(lt_lte)

eps_events = list()
rec_score = BUYBACK_DATA$DATASET$ibes$month_minus1$mean_rec_score
eps_events$bb$high = scrub(rec_score) > quantile(rec_score[!is.na(rec_score)], 1 - quantile_simple) & !is.na(rec_score)
eps_events$bb$low = scrub(rec_score) < quantile(rec_score[!is.na(rec_score)], quantile_simple) & !is.na(rec_score)
rm(rec_score)

BUYBACK_DATA$EU_index = sapply(1:length(BUYBACK_DATA$DATASET$SDC$Event.Date), function(i) {
  ifelse(Idiosyncr_events$bb$highidio[i], 2, ifelse(Idiosyncr_events$bb$lowidio[i], 0, 1)) +
    ifelse(VOL_events$bb$highvol[i], 2, ifelse(VOL_events$bb$lowvol[i], 0, 1)) +
    ifelse(company_subset$bb$undervalued[i], 2, ifelse(company_subset$bb$overvalued[i], 0, 1))
})

high_EU_bb = BUYBACK_DATA$EU_index >= quantile(BUYBACK_DATA$EU_index,0.8)
low_EU_bb = BUYBACK_DATA$EU_index <= quantile(BUYBACK_DATA$EU_index,0.2)
buybacks.events.past2years = 1 * (BUYBACK_DATA$DATASET$CRSP$buybacks_events_past2years !=0)

# Recommendation score: 1. Strong Buy, 2. Buy, 3. Hold, 4. Underperform, 5. Sell
mm1 = BUYBACK_DATA$DATASET$ibes$month_minus1$mean_rec
mm2 = BUYBACK_DATA$DATASET$ibes$month_minus2$mean_rec
downgraded_events = !is.na(mm1) & !is.na(mm2) & scrub(mm1) > scrub(mm2)
not_downgraded_events = !is.na(mm1) & !is.na(mm2) & scrub(mm1) <= scrub(mm2)
upgraded_events = !is.na(mm1) & !is.na(mm2) & scrub(mm1) < scrub(mm2)
rm(mm1, mm2)

#### Continuous variables now
valuation_index_bb = BUYBACK_DATA$Valuation_Index
Firm_size = BUYBACK_DATA$DATASET$CRSP$Market.Cap
Firm_size_score = BUYBACK_DATA$DATASET$CRSP$Market.Cap_score
Prior_R = BUYBACK_DATA$DATASET$CRSP$recent_performance
Prior_R_score = BUYBACK_DATA$DATASET$CRSP$recent_performance_score
BEME = BUYBACK_DATA$DATASET$CRSP$BE.ME
BEME_score = BUYBACK_DATA$DATASET$CRSP$BE.ME_score
U_index = BUYBACK_DATA$Valuation_Index
EU_index = BUYBACK_DATA$EU_index
Vol_raw = BUYBACK_DATA$DATASET$CRSP$pre_vol
Vol_raw_score = BUYBACK_DATA$DATASET$CRSP$pre_vol_Score
Idiosyncratic = BUYBACK_DATA$DATASET$CRSP$IVOL
Idiosyncratic_score = BUYBACK_DATA$DATASET$CRSP$IVOL_score
One_m_Rsqr = 1 - BUYBACK_DATA$DATASET$CRSP$Rsq
One_m_Rsqr_score = 1 - BUYBACK_DATA$DATASET$CRSP$Rsq_score
StambaughBB = BUYBACK_DATA$DATASET$Stambaugh
Analyst_coverage = BUYBACK_DATA$DATASET$ibes$month_minus1$analyst_coverage
Event.Size = BUYBACK_DATA$DATASET$SDC$Event.Size
buybacks.events.past2years = 1 * (BUYBACK_DATA$DATASET$CRSP$buybacks_events_past2years !=0)
Total.Payout = (BUYBACK_DATA$DATASET$CRSP$Total_Payout)
lagged.dividend.payout.ratio = BUYBACK_DATA$DATASET$CRSP$divident_payout_ratio
lagged.dividend.payout.ratio[
  scrub(lagged.dividend.payout.ratio) < 0 | 
  scrub(lagged.dividend.payout.ratio) > 100] = NA
lagged.dividend.payout.ratio = lagged.dividend.payout.ratio
Leverage = BUYBACK_DATA$DATASET$CRSP$leverage_d_over_d_plus_e
operating.income = BUYBACK_DATA$DATASET$CRSP$operating_income
std.operating.income = BUYBACK_DATA$DATASET$CRSP$std_operating_income
non.operating.income = BUYBACK_DATA$DATASET$CRSP$non_operating_income
liquid.assets = BUYBACK_DATA$DATASET$CRSP$liquid_assets
price.earnings.ratio = BUYBACK_DATA$DATASET$CRSP$price_earnings_ratio
capital.expenditures = BUYBACK_DATA$DATASET$CRSP$capital_expenditures
profitability = BUYBACK_DATA$DATASET$CRSP$profitability
net_debt = BUYBACK_DATA$DATASET$CRSP$net_debt
tax_rate = BUYBACK_DATA$DATASET$CRSP$tax_rate

if (0) {
  # Institutional - still not standard across projects
  Institutional = sapply(1:length(BUYBACK_DATA$DATASET$SDC$CUSIP), function(i) {
    tmp = BUYBACK_DATA$DATASET$institutional$Institutional.Ownership.Ratio.1_score[[i]]
    useonly = which(AddMonths(as.Date(paste(names(tmp),"01", sep="-")),1) < BUYBACK_DATA$DATASET$SDC$Event.Date[i])
    tmp = tmp[useonly]
    ifelse(sum(!is.na(tmp)), tail(tmp[!is.na(tmp)],1), NA)
  })
  Institutional[scrub(Institutional) >= 100] <- NA
  Institutional.number = sapply(1:length(BUYBACK_DATA$DATASET$SDC$CUSIP), function(i) {
    tmp = BUYBACK_DATA$DATASET$institutional$num.institutional.investors_score[[i]]
    useonly = which(AddMonths(as.Date(paste(names(tmp),"01", sep="-")),1) < BUYBACK_DATA$DATASET$SDC$Event.Date[i])
    tmp = tmp[useonly]
    ifelse(sum(!is.na(tmp)), tail(tmp[!is.na(tmp)],1), NA)
  })
  Institutional.number[Institutional >= 100] <- NA
  save(Institutional,Institutional.number, file=buybacks_institutional_data_location)
} else {
  load(buybacks_institutional_data_location)
}

# THESE ARE THE VARIABLES WE USE IN THE DATA SUMMARY STATS
all_characteristics_continuous_summary = cbind(
  buybacks.events.past2years,
  Firm_size,
  100 * Prior_R,
  BEME,
  U_index,
  EU_index,
  100 * Vol_raw,
  One_m_Rsqr,
  StambaughBB,
  Event.Size,
  Analyst_coverage,
  Total.Payout,
  lagged.dividend.payout.ratio,
  Leverage,
  profitability,
  net_debt,
  tax_rate,
  operating.income,
  std.operating.income,
  non.operating.income,
  liquid.assets,
  price.earnings.ratio,
  capital.expenditures,
  Institutional,
  Institutional.number
)

colnames(all_characteristics_continuous_summary) = c(
  "Announced Repurchace in Previous 2 Years (0/1)",
  "Market Cap. (Score)", 
  "Prior Returns (Score)",
  "BE/ME (Score)", 
  "U-index",
  "EU-index",
  "Volatility (Score)", 
  "One minus Rsq (Score)",
  "Mispricing Measure",
  "Percent Shares",
  "Analyst Coverage (Score)",
  "Total Payout in Event Year before Event",
  "Lag Dividend Payout Ratio",
  "Leverage",
  "Profitability (ROA)",
  "Net Debt",
  "Tax Rate",
  "Operating Income (Percent assets)", 
  "std Operating Income", 
  "Non-Operating Income (Percent assets)",
  "Liquid Assets (Percent assets)",
  "Price/Earnings Ratio",
  "Capital Expenditures (Percent assets)",
  "Institutional Holdings (Score)",
  "Number of Institutions (Score)"
)

##########################################################################################
# THIS IS WHERE THE VARIABLES FOR THE PAPER TABLES AND FIGURES ARE GENERATED
##########################################################################################

# Note: Groups of tables in terms of similarity:
# 1) II, III, IV, VI, and VII (VI and VII are almost identical)
# 2) V, XIII, XIV, and XV (XIII and XIV are almost identical)
# 3) XI and XII (almost identical)

##########################################################################################
# Data Summary

prepare_data_summary <- function(dataset_code) {
  x = get_data(dataset_code)$DATASET
  event_size = x$SDC$Event.Size
  market_cap = x$CRSP$Market.Cap
  be_me = x$CRSP$BE.ME
  be_me[be_me >= 1e20] = NA
  # The data summary is a named numeric vector with the following fields:
  # min, median, mean, max, std, number of missing data points
  summary_names = c("Min.", "Median", "Mean", "Max.")
  
  data_summary_per_element <- function(x) {
    round(c(summary(x[!is.na(x) & x != 0])[summary_names], 
            sd(x[!is.na(x) & x != 0]),
            sum(is.na(x) | x == 0)), 1)
  }

  res = rbind(data_summary_per_element(event_size),
              data_summary_per_element(market_cap),
              data_summary_per_element(be_me)
  )

  rownames(res) = c("Percent authorized", "Market cap.", "BE/ME")
  colnames(res) = c(summary_names, "std", "Missing")

  return(res)
}

# Compute the data summary for both buybacks and issuers
data_summary = list()
for (dataset_code in dataset_codes) {
  data_summary[[dataset_code]] = prepare_data_summary(dataset_code)
}

##########################################################################################
# Table I: Buyback and SEO announcements during 1985-2015: Descriptive Statistics

descriptive_stats_table <- function() {
  non_na_functions = list()
  non_na_functions$mean <- function(x) { mean(x[!is.na(x)]) }
  non_na_functions$median <- function(x) { median(x[!is.na(x)]) }
  non_na_functions$sd <- function(x) { sd(x[!is.na(x)]) }
  non_na_functions$quantile <- function(x, q) { quantile(x[!is.na(x)], q) }

  non_na_zero_functions = list()
  non_na_zero_functions$mean <- function(x) { mean(x[!is.na(x) & scrub(x) != 0]) }
  non_na_zero_functions$median <- function(x) { median(x[!is.na(x) & scrub(x) != 0]) }
  non_na_zero_functions$sd <- function(x) { sd(x[!is.na(x) & scrub(x) != 0]) }
  non_na_zero_functions$quantile <- function(x, q) { quantile(x[!is.na(x) & scrub(x) != 0], q) }

  # Note: Difference between the two datasets:
  # BB has:  BUYBACK_DATA$Valuation_Index, eu_index$bb, and StambaughBB
  # ISS has: -                           , -         , and ISSUERS_DATA$DATASET$Stambaugh 

  prepare_descriptive_stats <- function(dataset_code) {
    x = get_data(dataset_code)
    events = x$DATASET
    
    add_element <- function(the_list, name, data, non_zero, to_percentage) {
      the_list[[name]] = list(data=data, non_zero=non_zero, to_percentage=to_percentage)
      return(the_list)
    }

    rows_list = list()
    rows_list = add_element(rows_list, "Market Cap.", events$CRSP$Market.Cap, TRUE, FALSE)
    rows_list = add_element(rows_list, "Prior Returns", events$CRSP$recent_performance, TRUE, TRUE)
    rows_list = add_element(rows_list, "BE/ME", events$CRSP$BE.ME, TRUE, FALSE)
    rows_list = add_element(rows_list, "Volatility", events$CRSP$pre_vol, TRUE, TRUE)
    rows_list = add_element(rows_list, "(1-R^2)", events$CRSP$Rsq, TRUE, FALSE)

    # Dataset-specific elements
    if (dataset_code == "bb") {
      rows_list = add_element(rows_list, "U-index", x$Valuation_Index, FALSE, FALSE)
      # TODO: Abstract this
      rows_list = add_element(rows_list, "EU-index", eu_index$bb, FALSE, FALSE)
      # TODO: Abstract this (see how it is abstracted for ISS below)
      rows_list = add_element(rows_list, "Mispricing Measure", StambaughBB, FALSE, FALSE)
    } else if (dataset_code == "iss") {
      # Note: This removes zeros, while the Stambaugh for BB does not
      rows_list = add_element(rows_list, "Mispricing Measure", events$Stambaugh, TRUE, FALSE)
    }
    rows_list = add_element(rows_list, "Percent Shares", events$SDC$Event.Size, TRUE, FALSE)
    rows_list = add_element(rows_list, "Leverage", events$CRSP$leverage_lt_over_lt_plus_e, TRUE, FALSE)

    # Each row contains:
    # mean, median, std, 0.2 quantile, 0.8 quantile 
    res = c()
    for (elem in rows_list) {
      # Choose whether to use functions for non-zero elements
      if (elem$non_zero) {
        function_family = non_na_zero_functions
      } else {
        function_family = non_na_functions
      }

      single_row = c(
        function_family$mean(elem$data),
        function_family$median(elem$data),
        function_family$sd(elem$data),
        function_family$quantile(elem$data, 0.2),
        function_family$quantile(elem$data, 0.8))
      # Convert to percentage
      if (elem$to_percentage) {
        single_row = 100 * single_row
      }

      res = rbind(res, single_row)
    }

    rownames(res) = names(rows_list)
    colnames(res) = c("Mean", "Median", "Standard Dev.", 
                      "20^{th} Percentile", "80^{th} Percentile")

    return(res)
  }

  # Compute the descriptive stats for both buybacks and issuers
  descriptive_stats = list()
  for (dataset_code in dataset_codes) {
    descriptive_stats[[dataset_code]] = prepare_descriptive_stats(dataset_code)
  }

  return(descriptive_stats)
}

descriptive_stats = descriptive_stats_table()

##########################################################################################
# Helpers: Common to tables II, II, IV, VI, and VII

get_function <- function(method) {
  if (method == "IRATS") {
    return(car_table)
  }  else if (method == "CAL") {
    return(calendar_table)
  } else {
    stop("invalid method")
  }
}

# Method can be "IRATS" or "CAL"
# col_names is a list of two elements
single_panel_part <- function(x, useonly, weights, method, col_names) {
  models = list(`3F`=three_factor_model, `5F`=five_factor_model)
  func = get_function(method)
  
  panel_part = c()
  for (model_index in seq_along(models)) {
    formula_used = models[[model_index]]
    tmp = func(x$returns_by_event_monthly[, useonly], 
               x$SDC$Event.Date[useonly], 
               Risk_Factors_Monthly, 
               formula_used=formula_used,
               value.weights=weights[useonly])$results
    colnames(tmp)[1] = col_names[model_index]
    panel_part = cbind(panel_part, tmp)
  }

  return(panel_part)
}

combine_panel_parts <- function(panel_part_1, panel_part_2) {
  cbind(panel_part_1[, 1:3], panel_part_2[, 1:3],
        panel_part_1[, 4:6], panel_part_2[, 4:6])
}

##########################################################################################
# Table II: Buyback announcements during 1985-2015

# buyback_announcements_table$<IRATS or CAL>$<all or undervaluation>

create_buyback_announcements_table <- function() {
  x = BUYBACK_DATA$DATASET
  res = list()

  # Panel A: IRATS
  # All
  useonly = 1:length(x$SDC$CUSIP)
  weights = 1
  method = "IRATS"
  col_names = c("CAR 3F", "CAR 5F")
  res[[method]]$all = single_panel_part(x, useonly, weights, method, col_names)

  # Undervalued
  useonly = which(company_subset$bb$undervalued)
  col_names = c("U:CAR3F", "U:CAR5F")
  tmp_top = single_panel_part(x, useonly, weights, method, col_names)
  # Overvalued
  useonly = which(company_subset$bb$overvalued)
  col_names = c("O:CAR3F", "O:CAR5F")
  tmp_bottom = single_panel_part(x, useonly, weights, method, col_names)
  # Combine
  res[[method]]$undervaluation = combine_panel_parts(tmp_top, tmp_bottom)

  # Panel B: CAL
  # All
  useonly = 1:length(x$SDC$CUSIP)
  weights = value_weights$bb
  method = "CAL"
  col_names = c("CAL 3F", "CAL 5F")
  res[[method]]$all = single_panel_part(x, useonly, weights, method, col_names)

  # Undervalued
  useonly = which(company_subset$bb$undervalued)
  col_names = c("U:CAL3F", "U:CAL5F")
  tmp_top = single_panel_part(x, useonly, weights, method, col_names)
  # Overvalued
  useonly = which(company_subset$bb$overvalued)
  col_names = c("O:CAL3F", "O:CAL5F")
  tmp_bottom = single_panel_part(x, useonly, weights, method, col_names)
  # Combine
  res[[method]]$undervaluation = combine_panel_parts(tmp_top, tmp_bottom)

  return(res)
}

buyback_announcements_table = create_buyback_announcements_table()

##########################################################################################
# Table III: Buyback announcements during 1985-2015: Value Weighted Portfolios

# value_weighted_table$<all_firms or no_large_firms>$<all or undervaluation>

create_value_weighted_table <- function() {
  x = BUYBACK_DATA$DATASET
  res = list()

  # Panel A: All firms
  group = "all_firms"
  # All
  useonly = 1:length(x$SDC$CUSIP)
  weights = x$CRSP$Market.Cap
  method = "CAL"
  col_names = c("CAL 3F", "CAL 5F")
  res[[group]]$all = single_panel_part(x, useonly, weights, method, col_names)

  # Undervalued
  useonly = which(company_subset$bb$undervalued)
  col_names = c("U:CAL3F", "U:CAL5F")
  tmp_top = single_panel_part(x, useonly, weights, method, col_names)
  # Overvalued
  useonly = which(company_subset$bb$overvalued)
  col_names = c("O:CAL3F", "O:CAL5F")
  tmp_bottom = single_panel_part(x, useonly, weights, method, col_names)
  # Combine
  res[[group]]$undervaluation = combine_panel_parts(tmp_top, tmp_bottom)

  # Panel B: No large firms
  group = "no_large_firms"
  # All
  useonly_valueL = which(x$CRSP$Market.Cap_score < 0.75)
  useonly = useonly_valueL
  col_names = c("CAL 3F", "CAL 5F")
  res[[group]]$all = single_panel_part(x, useonly, weights, method, col_names)

  # Undervalued
  b = BUYBACK_DATA$Valuation_Index[useonly_valueL] > 
      quantile(BUYBACK_DATA$Valuation_Index[useonly_valueL], 1-quantile_Uindex)
  useonly = useonly_valueL[which(b)]
  col_names = c("U:CAL3F", "U:CAL5F")
  tmp_top = single_panel_part(x, useonly, weights, method, col_names)
  # Overvalued
  b = BUYBACK_DATA$Valuation_Index[useonly_valueL] < 
      quantile(BUYBACK_DATA$Valuation_Index[useonly_valueL], quantile_Uindex)
  useonly = useonly_valueL[which(b)]
  col_names = c("O:CAL3F", "O:CAL5F")
  tmp_bottom = single_panel_part(x, useonly, weights, method, col_names)
  # Combine
  res[[group]]$undervaluation = combine_panel_parts(tmp_top, tmp_bottom)

  return(res)
}

value_weighted_table = create_value_weighted_table()

##########################################################################################
# Table IV: SEO announcements during 1985-2015

# issuers_announcements_table$<IRATS or CAL>$<all or undervaluation>

create_issuers_announcements_table <- function() {
  x = ISSUERS_DATA$DATASET
  res = list()

  # Panel A: IRATS
  # All
  useonly = 1:length(x$SDC$CUSIP)
  weights = 1
  method = "IRATS"
  col_names = c("CAR 3F", "CAR 5F")
  res[[method]]$all = single_panel_part(x, useonly, weights, method, col_names)
  # Low miscpricing measure
  useonly = stam_events$iss$low
  res[[method]]$lowMM = single_panel_part(x, useonly, weights, method, col_names)
  # High Miscpricing measure
  useonly = stam_events$iss$high
  res[[method]]$highMM = single_panel_part(x, useonly, weights, method, col_names)
  
  # Panel B: CAL
  # All
  useonly = 1:length(x$SDC$CUSIP)
  weights = value_weights$iss
  method = "CAL"
  col_names = c("CAL 3F", "CAL 5F")
  res[[method]]$all = single_panel_part(x, useonly, weights, method, col_names)
  # Low miscpricing measure
  useonly = stam_events$iss$low
  res[[method]]$lowMM = single_panel_part(x, useonly, weights, method, col_names)
  # High Miscpricing measure
  useonly = stam_events$iss$high
  res[[method]]$highMM = single_panel_part(x, useonly, weights, method, col_names)

  return(res)
}

issuers_announcements_table = create_issuers_announcements_table()

##########################################################################################
# Table V: Buyback returns over different time periods

# buyback_returns_periods_table$<IRATS or CAL>

get_weights <- function(method, dataset_code, useonly) {
  if (method == "IRATS") {
    return(1)
  }  else if (method == "CAL") {
    return(value_weights[[dataset_code]][useonly])
  } else {
    stop("invalid method")
  }
}

create_buyback_returns_periods_table <- function() {
  x = BUYBACK_DATA$DATASET
  models = list(`3F`=three_factor_model, `5F`=five_factor_model)

  # Method can be "IRATS" or "CAL"
  create_buyback_returns_periods_panel <- function(method) {  
    func = get_function(method)

    return(Reduce(cbind, lapply(1:nrow(periods_considered), function(i) {
      useonly = (x$SDC$Event.Date >= periods_considered[i, 1] & 
                 x$SDC$Event.Date <= periods_considered[i, 2])
      
      # Loop through models
      res = c()
      for (model_index in seq_along(models)) {
        formula_used = models[[model_index]]
        weights = get_weights(method, "bb", useonly)
        tmp = func(x$returns_by_event_monthly[, useonly], 
                   x$SDC$Event.Date[useonly], 
                   Risk_Factors_Monthly, 
                   formula_used=formula_used,
                   value.weights=weights)$results
        res = cbind(res, tmp)
      }

      # Pass col_names, one for each model
      colnames(res) = c(paste(
        paste(str_sub(periods_considered[i, 1],start=1, end=4), 
              str_sub(periods_considered[i, 2],start=1, end=4), sep= "-"),
              "3FF", sep=" "), "t-stat", "p-value", "5FF", "t-stat", "p-value") 
      rownames(res)[nrow(res)] = "Observations"

      return(res)  
    })))
  }

  res = list()
  for (method in c("IRATS", "CAL")) {
    res[[method]] = create_buyback_returns_periods_panel(method)
  }

  return(res)
}

buyback_returns_periods_table = create_buyback_returns_periods_table()

##########################################################################################
# Table VI: Buyback and SEOs for Low and High Volatility companies
# Table VII: Buyback and SEOs for Low and High Idiosyncratic companies

# buybacks_issuers_volatility_table$<IRATS or CAL>$<bb or iss>$<lowvol or highvol>
# buybacks_issuers_idiosyncratic_table$<IRATS or CAL>$<bb or iss>$<lowidio or highidio>

# vol_type can be "volatility" or "idiosyncratic"
create_buybacks_issuers_vol_table <- function(vol) {
  if (vol == "volatility") {
    events_data = VOL_events
    vol_types = c("lowvol", "highvol")
  } else if (vol == "idiosyncratic") {
    events_data = Idiosyncr_events
    vol_types = c("lowidio", "highidio")
  } else {
    stop('Invalid vol argument. Must be "volatility" or "idiosyncratic"')
  }

  res = list()

  # Panel A: IRATS low-high volatility
  method = "IRATS"
  col_names = c("CAR 3F", "CAR 5F")
  weights = 1
  # Loop through datasets: buybacks and issuers
  for (dataset_code in c("bb", "iss")) {
    x = get_data(dataset_code)$DATASET
    # Calculations for high volatility and low volatility
    for (vol_type in vol_types) {
      useonly = events_data[[dataset_code]][[vol_type]]
      res[[method]][[dataset_code]][[vol_type]] = single_panel_part(
        x, useonly, weights, method, col_names)
    }
  }
  
  # Panel B: CAL low-high volatility
  method = "CAL"
  col_names = c("CAL 3F", "CAL 5F")
  # Loop through datasets: buybacks and issuers
  for (dataset_code in c("bb", "iss")) {
    x = get_data(dataset_code)$DATASET
    weights = value_weights[[dataset_code]]
    # Calculations for high volatility and low volatility
    for (vol_type in vol_types) {
      useonly = events_data[[dataset_code]][[vol_type]]
      res[[method]][[dataset_code]][[vol_type]] = single_panel_part(
        x, useonly, weights, method, col_names)
    }
  }

  return(res)
}

# Volatility
create_buybacks_issuers_volatility_table <- function() {
  return(create_buybacks_issuers_vol_table(vol="volatility"))
}

buybacks_issuers_volatility_table = create_buybacks_issuers_volatility_table()

# Idiosyncratic volatility
create_buybacks_issuers_idiosyncratic_table <- function() {
  return(create_buybacks_issuers_vol_table(vol="idiosyncratic"))
}

buybacks_issuers_idiosyncratic_table = create_buybacks_issuers_idiosyncratic_table()

##########################################################################################
# Table VIII: Relations across firm characteristics for Buybacks

create_buybacks_firm_characteristics_table <- function() {
  cell_value <- function(row_vec, col_vec) {
    return(100 * sum(row_vec & col_vec) / sum(row_vec))
  }

  row_vecs = list(
    company_subset$bb$undervalued, company_subset$bb$overvalued, 
    Idiosyncr_events$bb$highidio, Idiosyncr_events$bb$lowidio,
    VOL_events$bb$highvol, VOL_events$bb$lowvol,
    stam_events$bb$high, stam_events$bb$low,
    lev_events$bb$high, lev_events$bb$low)

  col_vecs = list(
    Idiosyncr_events$bb$highidio, Idiosyncr_events$bb$lowidio,
    VOL_events$bb$highvol, VOL_events$bb$lowvol,
    lev_events$bb$high, lev_events$bb$low,
    eps_events$bb$high, eps_events$bb$low
  )

  res = NULL
  for (row_vec in row_vecs) {
    this_row = c()
    for (col_vec in col_vecs) {
      this_row = cbind(this_row,  cell_value(row_vec, col_vec))
    }
    res = rbind(res, this_row)
  }

  rownames(res) = c("Undervalued", "Overvalued", 
                    "High Idiosync.", "Low Idiosync.",
                    "High Vol.", "Low Vol.", 
                    "High Mispr.", "Low Mispr.", 
                    "High Lev.", "Low Lev.")
  colnames(res) = c("H Idiosync.", "L Idiosync.", 
                    "H Vol.", "L Vol.", 
                    "H Lev.", "L Lev.", 
                    "H EPS unc.", "L EPS unc.")

  return (res)
}

buybacks_firm_characteristics_table = create_buybacks_firm_characteristics_table()

##########################################################################################
# Table IX: Correlations of Buybacks Characteristics

create_buybacks_correlations_table <- function() {
  x = BUYBACK_DATA$DATASET
  EU_index_features = scrub(cbind(1-x$CRSP$Rsq_score, x$CRSP$pre_vol_Score, 
                                  BUYBACK_DATA$Valuation_Index, x$Stambaugh))
  colnames(EU_index_features) = c("Idiosyncratic Score", "Volatility Score", 
                                  "U-Index Score", "Mispricing Measure")
  return(cor(EU_index_features))
}

buybacks_correlations_table = create_buybacks_correlations_table()

##########################################################################################
# Table X: EU relations with Firm Characteristics

create_eu_firm_characteristics_table <- function() {
  x = BUYBACK_DATA$DATASET

  str_parts <- function(str) {
    return(unlist(str_split(str, "\\+")))
  }

  unique_str <- function(vec) {
    return(unique(unlist(sapply(vec, function(i) str_parts(i)))))
  }

  length_of_intersection <- function(str, vec) {
    return(length(intersect(str_parts(str), vec)))
  }

  all_fund_sources = unique_str(x$SDC$Source...of..Funds..Code)
  cash_funds = c("CR")
  credit_funds = c("BL", "BOR", "CF", "DS")
  other_funds = setdiff(all_fund_sources,c(cash_funds,credit_funds))
  
  all_purposes = unique_str(x$SDC$Purpose.Code)
  good_purpose = c("ESV", "UVL", "STP", "ISV")
  other_purpose = setdiff(all_purposes,c(good_purpose))

  high_leverage = 0 * eu_index$bb
  high_leverage[lev_events$bb$high] = 1
  low_leverage = 0 * eu_index$bb
  low_leverage[lev_events$bb$low] = 1
  
  Missed_EPS = (x$ibes$mean_rec_last_month_score < x$ibes$mean_rec_last_last_month_score)
  Beat_EPS = (x$ibes$mean_rec_last_month_score >= x$ibes$mean_rec_last_last_month_score)
  
  low_epsunc = 0 * eu_index$bb
  low_epsunc[eps_events$bb$low] = 1
  
  ISS_Later = ifelse((x$SDC$OtherlaterEvent != 0), "Yes", "No")
  
  Credit = sapply(x$SDC$Source...of..Funds..Code, function(i) {
    length_of_intersection(i, credit_funds) != 0 & 
    length_of_intersection(i, c(cash_funds, other_funds)) == 0 
  })
  Cash = sapply(x$SDC$Source...of..Funds..Code, function(i) {
    length_of_intersection(i, cash_funds) != 0 & 
    length_of_intersection(i, c(credit_funds, other_funds)) == 0
  })  
  Good_purpose = sapply(x$SDC$Purpose.Code, function(i) {
    length_of_intersection(i, good_purpose) != 0 & 
    length_of_intersection(i, other_purpose) == 0
  })
  Stock_Option_Plan = sapply(x$SDC$Purpose.Code, function(i) {
    length_of_intersection(i, "STP") != 0 & 
    length_of_intersection(i, c("ESV","ISV","UVL")) == 0
  })
  Undervalued = sapply(x$SDC$Purpose.Code, function(i) {
    length_of_intersection(i, "UVL") != 0 & 
    length_of_intersection(i, "STP") == 0
  })
  Enhance_Shareholder_Value = sapply(x$SDC$Purpose.Code, function(i) {
    length_of_intersection(i, c("ESV","ISV")) != 0
  })

  all_characteristics = cbind(low_leverage, high_leverage, Missed_EPS, Beat_EPS, 
                              ISS_Later, Cash, Good_purpose, Undervalued, 
                              Enhance_Shareholder_Value, Stock_Option_Plan)

  EU_relations= t(apply(all_characteristics, 2, function(r) {
    x = table(eu_index$bb, r)
    x = matrix(round(100 * x[,2] / (x[,1] + x[,2]), 1), ncol=1)
    return(x)
  }))

  colnames(EU_relations) = paste0("EU", 0:(ncol(EU_relations) - 1))
  rownames(EU_relations) = gsub("_", " ", rownames(EU_relations))
  all_characteristics_continuous = cbind(x$CRSP$Market.Cap, x$CRSP$BE.ME_score, 
                                         x$SDC$Event.Size, x$Stambaugh)
  EU_relations_continuous = t(apply(all_characteristics_continuous, 2, function(r) {
    sapply(sort(unique(eu_index$bb)), function(i) {
      useonly = which(eu_index$bb == i)
      mean(r[useonly][!is.na(r[useonly])])
    })
  }))
  rownames(EU_relations_continuous) = c("Market Cap.", "BE/ME Score", 
                                        "Percentage Shares", "Mispricing Measure")
  EU_relations = rbind(EU_relations, round(EU_relations_continuous, 2))

  return(EU_relations)
}

eu_firm_characteristics_table = create_eu_firm_characteristics_table()

##########################################################################################
# Table XI: Buyback announcements IRATS for all EU-index Values
# Table XII: Buyback announcements Calendar Time for all EU-index Values

# buybacks_eu_tables$<long, hedged, long48, hedged48, IRATS, or CAL>

create_buybacks_eu_tables <- function() {
  x = BUYBACK_DATA$DATASET
  res = list()
  res$long = NULL
  res$hedged = NULL
  res$long48 = NULL
  res$hedged48 = NULL
  res$IRATS = NULL
  res$CAL = NULL
  for (i in 0:6) {
    EU_events_now = which(eu_index$bb == i)
    EU = apply(PNL_matrix_BB(
      start_date_event, "One.Year.After", EU_events_now, 
      x$DatesMonth, x$returns_by_event_monthly, event=1), 1, non_zero_mean)
    EU_hedged = remove_initialization_time(suppressWarnings(scrub(
      alpha_lm(EU,Risk_Factors_Monthly[, pnl_hedge_factors], hedge_months, trade=1))), 
      min_date=FirstTrade)
    EU48m = apply(PNL_matrix_BB(
      start_date_event, "Four.Years.After", EU_events_now, 
      x$DatesMonth, x$returns_by_event_monthly, event=1), 1, non_zero_mean)
    EU48m_hedged = remove_initialization_time(suppressWarnings(scrub(
      alpha_lm(EU48m, Risk_Factors_Monthly[, pnl_hedge_factors], hedge_months, trade=1))),
      min_date=FirstTrade)
    res$long = cbind(res$long, EU)
    res$hedged = cbind(res$hedged, EU_hedged)
    res$long48 = cbind(res$long48, EU48m)
    res$hedged48 = cbind(res$hedged48, EU48m_hedged)
    for (method in c("IRATS", "CAL")) {
      func = get_function(method)
      weights = get_weights(method, "bb", EU_events_now)
      res[[method]] = cbind(res[[method]], func(
        x$returns_by_event_monthly[, EU_events_now], 
        x$SDC$Event.Date[EU_events_now], 
        Risk_Factors_Monthly,
        formula_used=five_factor_model,
        value.weights=weights)$results)
    }
  }
  return(res)
}

buybacks_eu_tables = create_buybacks_eu_tables()

##########################################################################################
# Table XIII: Long-run IRATS abnormal returns after open market repurchase announcements 
# for low and high EU-index companies over different time periods.
# Table XIV: Calendar method monthly abnormal returns after open market repurchase 
# announcements for low and high EU-index companies over different time periods.

# buyback_returns_undervaluation_tables$<IRATS or CAL>

high_EU = (eu_index$bb %in% 4:6)
low_EU = (eu_index$bb %in% 0:3)

create_buyback_returns_undervaluation_table <- function(method) {
  x = BUYBACK_DATA$DATASET
  func = get_function(method)
  formula_used = five_factor_model

  eu_index_subsets = list(
    high=(eu_index$bb %in% 4:6),
    low=(eu_index$bb %in% 0:3)
  )

  return(Reduce(cbind, lapply(1:nrow(periods_considered), function(i) {
    periodnow = (x$SDC$Event.Date >= periods_considered[i, 1] & 
                 x$SDC$Event.Date <= periods_considered[i, 2])

    # Loop through EU index type
    res = c()
    for (eu_index_subset in eu_index_subsets) {
      useonly = periodnow & eu_index_subset
      weights = get_weights(method, "bb", useonly)
      tmp = func(x$returns_by_event_monthly[, useonly], 
                 x$SDC$Event.Date[useonly], 
                 Risk_Factors_Monthly, 
                 formula_used=formula_used,
                 value.weights=weights)$results
      res = cbind(res, tmp)
    }

    # Pass col_names, one for each model
    colnames(res) = c(paste(
      paste(str_sub(periods_considered[i, 1],start=1, end=4), 
            str_sub(periods_considered[i, 2],start=1, end=4), sep= "-"),
            "U 5FF", sep=" "), "t-stat", "p-value", "O 5FF", "t-stat", "p-value") 
    rownames(res)[nrow(res)] = "Observations"

    return(res)  
  })))
}

buyback_returns_undervaluation_tables = list()
for (method in c("IRATS", "CAL")) {
  buyback_returns_undervaluation_tables[[method]] = 
    create_buyback_returns_undervaluation_table(method)
}

##########################################################################################
# Table XV: Buyback for Low and High EU-index and for Low and High Mispricing 
# measure companies

# buyback_returns_index_table$<IRATS or CAL>$<EU, Stam, or Stam_EU>

create_buyback_returns_index_table <- function(method, use_eu_index=TRUE, use_mm_index=FALSE) {
  x = BUYBACK_DATA$DATASET
  func = get_function(method)
  formula_used = five_factor_model

  if (method == "IRATS") {
    method_code = "CAR"
  } else if (method == "CAL") {
    method_code = "CAL"
  }

  eu_index_subsets = list(
    low=(eu_index$bb %in% 1:3),
    high=(eu_index$bb %in% 4:6)
  )

  mm_index_subsets = list(
    low=stam_events$bb$low,
    high=stam_events$bb$high
  )

  if (use_eu_index & !use_mm_index) {
    useonly_subsets = eu_index_subsets
    col_names = c("Low EU:", "High EU:")
  } else if (!use_eu_index & use_mm_index) {
    useonly_subsets = mm_index_subsets
    col_names = c("Low Mis.:", "High Mis.:")
  } else if (use_eu_index & use_mm_index) {
    # Use both indexes (EU and miscpricing measure)
    useonly_subsets = list()
    i = 1
    for (mm_index_subset in mm_index_subsets) {
      for (eu_index_subset in eu_index_subsets) {
        useonly_subsets[[i]] = mm_index_subset & eu_index_subset
        i = i + 1
      }
    }
    col_names = c("Low Mis.: L. EU", "H. EU", "High Mis.: L. EU", "H. EU")
  } else {
    stop("index flags must be binary")
  }

  # Loop through index subsets 
  res = c()
  for (i in seq_along(useonly_subsets)) {
    useonly = which(useonly_subsets[[i]])
    weights = get_weights(method, "bb", useonly)
    tmp = func(x$returns_by_event_monthly[, useonly], 
               x$SDC$Event.Date[useonly], 
               Risk_Factors_Monthly, 
               formula_used=formula_used,
               value.weights=weights)$results
    colnames(tmp) = c(paste0(col_names[i], " ", method_code) , "t-stat", "p-value")
    res = cbind(res, tmp)
  }

  return(res)  
}

buyback_returns_index_table = list()
for (method in c("IRATS", "CAL")) {
  buyback_returns_index_table[[method]]$EU =
    create_buyback_returns_index_table(method, use_eu_index=TRUE, use_mm_index=FALSE)
  buyback_returns_index_table[[method]]$Stam =
    create_buyback_returns_index_table(method, use_eu_index=FALSE, use_mm_index=TRUE)
  buyback_returns_index_table[[method]]$Stam_EU =
    create_buyback_returns_index_table(method, use_eu_index=TRUE, use_mm_index=TRUE)
}

##########################################################################################
# Get all P&L data

all_events = 1:length(BUYBACK_DATA$DATASET$SDC$Event.Date)
events_all_12m = PNL_matrix_BB(start_date_event, "One.Year.After", all_events, 
                               BUYBACK_DATA$DATASET$DatesMonth, 
                               BUYBACK_DATA$DATASET$returns_by_event_monthly, event=1)  
pnl_returns_events_all_12M = apply(events_all_12m, 1, function(r) non_zero_mean(scrub(r)))
long_all12mshort_risk_factors = suppressWarnings(scrub(alpha_lm(
  pnl_returns_events_all_12M, Risk_Factors_Monthly[, pnl_hedge_factors], hedge_months, trade=1)))

event_months = list(
  `1`="One.Month.After",
  `3`="Three.Month.After",
  `6`="Six.Month.After",
  `12`="One.Year.After",
  `24`="Two.Years.After",
  `36`="Three.Years.After",
  `48`="Four.Years.After"
)

# BUYBACK_DATA$events_all_<1,3,6,12,24,36,48>m
# BUYBACK_DATA$pnl_returns_events_all_<1,3,6,12,24,36,48>M
# BUYBACK_DATA$long_all<1,3,6,12,24,36,48>mshort_risk_factors
add_pnl_data <- function(buyback_data) {
  for (month in names(event_months)) {
    events_all_month = paste0("events_all_", month, "m")
    buyback_data[[events_all_month]] = scrub(PNL_matrix_BB(
      start_date_event, event_months[[month]], all_events, 
      buyback_data$DATASET$DatesMonth, buyback_data$DATASET$returns_by_event_monthly, event=1))
    
    pnl_returns_month = paste0("pnl_returns_events_all_", month, "M")
    buyback_data[[pnl_returns_month]] = apply(buyback_data[[events_all_month]], 1, 
      function(r) non_zero_mean(scrub(r)))

    long_short_month = paste0("long_all", month, "mshort_risk_factors")
    buyback_data[[long_short_month]] = suppressWarnings(scrub(alpha_lm(
      buyback_data[[pnl_returns_month]], Risk_Factors_Monthly[, pnl_hedge_factors],
      hedge_months, trade = 1)))

  }
  return(buyback_data)
}

BUYBACK_DATA = add_pnl_data(BUYBACK_DATA)

rm("all_events")

create_months <- function() {
  months = list()
  for (month in names(event_months)) {
    long_short_month = paste0("long_all", month, "mshort_risk_factors")
    months[[month]] = cumsum(100 * remove_initialization_time(BUYBACK_DATA[[long_short_month]], 
                                                              min_date=FirstTrade))
  }
  return(months)
}

months = create_months()

create_eu_data <- function() {
  res = list()
  res$binary$high = high_EU
  res$binary$low = low_EU

  # P&L
  for (eu_type in c("high", "low")) {
    for (month in c("12", "48")) {
      res$pnl[[eu_type]][[month]] = apply(PNL_matrix_BB(
        start_date_event, event_months[[month]], res$binary[[eu_type]], BUYBACK_DATA$DATASET$DatesMonth,
        BUYBACK_DATA$DATASET$returns_by_event_monthly, event=1), 1, non_zero_mean)
      # Hedged
      res$pnl_hedged[[eu_type]][[month]] = remove_initialization_time(suppressWarnings(
        scrub(alpha_lm(res$pnl[[eu_type]][[month]], Risk_Factors_Monthly[, pnl_hedge_factors], 
        hedge_months, trade=1))), min_date=FirstTrade)
    }
  }
  return(res)
}

eu_bb_data = create_eu_data()

########################################################################################################
# Now save the results of this file, which will be used in the .Rnw file to generate the final paper

save(list = setdiff(ls(all=TRUE), initial_vars), file="data_for_paper.Rdata")