Linear Regression looping in r with variable change y

1

I'm doing a regression on the Y ~ X1 + X2 model.

Y are shares of the financial market (variation in daily liquidity).

X1 is a portfolio with the most traded shares (1st quartile), tb with the variation in daily liquidity, however this variation is in the portfolio.

X2 is the market portfolio (all actions).

When Y is within X1 , I need to remove% with% of this portlet and run the regression in the model above, but in Y , X1 will not be.

How can I do this in looping, since I have on average 50 actions that are part of the Y portfolio, and for each action, are I actually doing a regression with a X1 different?

X1 is the last column, sum of actions:

X1 is the last column, sum of actions:

To regress:

Petr4 ~ X1 + X2 

I need to get the X2 from within Petr4 . And so, successively, with all that are in X1 .

My base has 350 actions, and on average 50 are part of X1 , in each of the 20 quarters analyzed.

X1 <- structure(list(ABEV3 = c(-1.68961360937539, 0.336976612313896, 0.493514089699225, 0.254432823660535, -0.256405265540702, 0.264718777827759, 1.55834609347509, -1.20767246429255, -0.141477780693152, -0.19214164080899, -0.000600037380238257, -0.147233871179818, 1.75915399473419, -2.94440084531756, 2.34297132404171, -2.05898734280988, 2.59824344556308, -2.55640564217954, 0.930253209395329), 
B3SA3 = c(-0.249394879240838, -0.435826887737834, 0.674263712359221, -1.47663475639016, 1.65555693916285, -0.577034735765758, 2.26875789306363, -2.2875050945378, 0.537969408805017, -0.205268668573947, 0.414868128804647, -1.48457772878109, 1.15631736949281, 0.0305113207811356, -0.102657510980629, 1.37400357417165, -2.07665582016, 2.98843329137188, -2.69685509115329), 
BBDC4 = c(-0.212217934317245, 0.118614534704256, 0.506732536312761, 0, 0, 1.2116182781466, 1.56667948677674, -3.59090513583332, 2.58117667166673, -2.25835436838038, -0.0164787028530569, -0.475048661790092, 0.120260328263065, 0.69979767549019, -1.00732194106152, 1.39438438911175, -1.58704315346786, 2.26692551507724, 1.01733376126224), 
PETR4 = c(-1.30694127859772, 0.0556754000556631, 1.6427228959543, -2.07784831693627, 0.48951225216644, 1.77717497076481, -0.651276784435446, 0.216236137690635, -0.921123800566615, -0.675165953169317, -0.00202368123748712, -0.0428920777198445, 0.121292232920353, 1.23851717771871, -0.683468222178768, -0.908243883108815, -0.107679168050808, 1.83291659681442, -1.19385481066669), 
VALE3 = c(2.41401127732222, -2.76421566679027, 0.461422210143545, -0.875330431707249, 0.885104981630114, -0.843726971357561, 1.5682910541085, -0.571164168008786, -0.10252567045643, -0.989403988444739, -0.692572450320219, 0.982267519085984, 1.26078160982549, -0.466576892153469, 0.432368421707883, -2.05782217710974, 1.50754449280209, -0.916145594562414, -0.0527556012964956), 
X1 = c(-1.04415642420898, -2.68877600745429, 3.77865544446905, -4.17538068137314, 2.7737689074187, 1.83275031961585, 6.31079774298852, -7.44101072498182, 1.95401882875555, -4.32033461937738, -0.296806742986354, -1.16748482038486, 4.41780553523591, -1.44215156348099, 0.981892071528681, -2.25666543974503, 0.334409796686495, 3.61572416652159, -1.9958785324589)), 
row.names = c(NA, -19L), class = c("tbl_df", "tbl", "data.frame"))


X2 <- structure(list(AALR3 = c(-0.206362587327738, 1.61895166368979, -0.861370994204189, 0.174830746289519, 0.407773465953981, 0.970959829160052, -1.17300376052144, -0.575620575876884, 0.197278912037272, 0.897302548964069, -1.32420067203198, 3.95899769222222, -1.21163567633219, -1.83565877386244, 1.54036374813566, -1.82397761281091, 1.20959432595504, -0.0759481540437981, -0.638232551464981), ABCB10 = c(-1.60296610096249, 2.09620731537456, -0.41694352576488, -0.838618382137318, -0.216899651453602, -0.235666655708187, 4.2059275953516, -2.48173025488413, 4.15244771238538, -2.20385731514288, 0.647168032961616, -4.53221518198161, 2.62619079447456, 0.0935951716704986, -0.354716323827452, 3.91837590914364, -1.93979523690335, 1.61498476891751, 0.122212912954949), 
ABCB4 = c(-1.60296610096249, 2.09620731537456, -0.41694352576488, -0.838618382137318, -0.216899651453602, -0.235666655708187, 4.2059275953516, -2.48173025488413, 4.15244771238538, -2.20385731514288, 0.647168032961616, -4.53221518198161, 2.62619079447456, 0.0935951716704986, -0.354716323827452, 3.91837590914364, -1.93979523690335, 1.61498476891751, 0.122212912954949), 
ABRE11 = c(-1.27341135842787, -1.86497998476113, -0.484129996891839, 1.47010984330541, -0.0210150840181142, -0.357694757179082, 0, 0, 1.62240834012385, 2.80846409804994, -2.58178098976869, 1.15726791232328, -4.12912917187979, 0.695814197684221, 1.9304416196954, -0.687599921266767, 0.0592175350156154, 0.0742655438637557, 0.0469482393875812 ), 
AEDU3 = c(1.22237903311943, -0.0432075917354171, -0.151354670240687, 0.206910952878326, 1.03072282359242, -0.311151317468535, -2.7112186212168, 0.216238189442155, 0.307723724513601, 0.534775159411636, 0.976633521829294, -1.44257539205824, 0.177300976652729, 0.585653957384092, -0.862543274976379, -0.663875728078051, 0.426836767805498, 1.87185456856346, -2.35992132953569 ), 
ABEV3 = c(-1.68961360937539, 0.336976612313896, 0.493514089699225, 0.254432823660535, -0.256405265540702, 0.264718777827759, 1.55834609347509, -1.20767246429255, -0.141477780693152, -0.19214164080899, -0.000600037380238257, -0.147233871179818, 1.75915399473419, -2.94440084531756, 2.34297132404171, -2.05898734280988, 2.59824344556308, -2.55640564217954, 0.930253209395329 ), 
B3SA3 = c(-0.249394879240838, -0.435826887737834, 0.674263712359221, -1.47663475639016, 1.65555693916285, -0.577034735765758, 2.26875789306363, -2.2875050945378, 0.537969408805017, -0.205268668573947, 0.414868128804647, -1.48457772878109, 1.15631736949281, 0.0305113207811356, -0.102657510980629, 1.37400357417165, -2.07665582016, 2.98843329137188, -2.69685509115329 ), 
BBDC4 = c(-0.212217934317245, 0.118614534704256, 0.506732536312761, 0, 0, 1.2116182781466, 1.56667948677674, -3.59090513583332, 2.58117667166673, -2.25835436838038, -0.0164787028530569, -0.475048661790092, 0.120260328263065, 0.69979767549019, -1.00732194106152, 1.39438438911175, -1.58704315346786, 2.26692551507724, 1.01733376126224), 
PETR4 = c(-1.30694127859772, 0.0556754000556631, 1.6427228959543, -2.07784831693627, 0.48951225216644, 1.77717497076481, -0.651276784435446, 0.216236137690635, -0.921123800566615, -0.675165953169317, -0.00202368123748712, -0.0428920777198445, 0.121292232920353, 1.23851717771871, -0.683468222178768, -0.908243883108815, -0.107679168050808, 1.83291659681442, -1.19385481066669), 
VALE3 = c(2.41401127732222, -2.76421566679027, 0.461422210143545, -0.875330431707249, 0.885104981630114, -0.843726971357561, 1.5682910541085, -0.571164168008786, -0.10252567045643, -0.989403988444739, -0.692572450320219, 0.982267519085984, 1.26078160982549, -0.466576892153469, 0.432368421707883, -2.05782217710974, 1.50754449280209, -0.916145594562414, -0.0527556012964956), 
X2 = c(-4.50748353877014, 1.21440271048808, 1.44791273160258, -4.00076590317452, 3.75745081003979, 1.66353076271191, 10.8384305519535, -12.7638536211848, 12.386325230201, -4.48750744323749, -1.9318188170345, -6.55822497186082, 4.50672325262578, -1.80915183893412, 2.88072151672846, 2.40463311638653, -1.84953204834405, 8.71586566274002, -4.70265834816209)), 
row.names = c(NA, -19L ), class = c("tbl_df", "tbl", "data.frame"))
    
asked by anonymous 16.07.2018 / 16:25

1 answer

0

You can do what you want without for , with *apply cycles.

First, I'm going to redo the base since it's easier to have all the columns in the same data.frame .

base <- X1
base$X2 <- X2$X2

Now, I'm going to run the templates for each of the answers.

nc <- ncol(base)
respostas <- names(base[-((nc - 1):nc)])

model_list <- lapply(respostas, function(resp){
  fmla <- as.formula(paste(resp, "X1 + X2", sep = "~"))
  lm(fmla, data = base)
})

names(model_list) <- respostas

This result can then be used to extract the information you want.

For example, the coefficients of the regressions:

t(sapply(model_list, coef))

To extract more things, it is better to use summary .

model_smry <- lapply(model_list, summary)

And of the summaries we take the tables with the coefficients that we already have, the values of the statistics t or the p.values , among others. To extract elements from these tables is a little more complicated but the process is always the same, with a little practice it becomes intuitive.

model_coef <- lapply(model_smry, '[[', "coefficients")
model_coef <- lapply(model_coef, as.data.frame)
sapply(model_coef, '[[', "Pr(>|t|)")

For example, the coefficients of determination directly from the summaries, since they are not in the coefficient tables:

sapply(model_smry, '[[', "r.squared")
#    ABEV3     B3SA3     BBDC4     PETR4     VALE3 
#0.1333845 0.6140561 0.5823004 0.1924847 0.5148558
    
18.07.2018 / 06:30