# rm(list=ls())
options(OutDec = ",")
#==============================================================================
# Exemplo:
# Salarios de chefes de familia entre 1976 e 1982.
#==============================================================================
# Analise
#==============================================================================
library(MASS)
dataxy <- read.table("../dados/d12_salarios.txt",header=T)
print(head(dataxy))
lnsalario <- dataxy[,12]
exper <- dataxy[,1]
exper2 <- exper^2
semana <- dataxy[,2]
ocupacao <- dataxy[,3]
industria <- dataxy[,4]
sul <- dataxy[,5]
smsa <- dataxy[,6]
casado <- dataxy[,7]
uniao <- dataxy[,9]
educacao <- dataxy[,10]
feminino <- dataxy[,8]
negro <- dataxy[,11]
n <- length(lnsalario)
exp wks occ ind south smsa ms fem union ed blk lwage 1 3 32 0 0 1 0 1 0 0 9 0 5,56068 2 4 43 0 0 1 0 1 0 0 9 0 5,72031 3 5 40 0 0 1 0 1 0 0 9 0 5,99645 4 6 39 0 0 1 0 1 0 0 9 0 5,99645 5 7 42 0 1 1 0 1 0 0 9 0 6,06146 6 8 35 0 1 1 0 1 0 0 9 0 6,17379
#==============================================================================
# Ajuste da regressao linear por minimos quadrados
#==============================================================================
# Estimacao por minimos quadrados ordinarios
fity <- lm(lnsalario ~ exper + exper2 + semana + ocupacao + industria
+ sul + smsa + casado + uniao + educacao + feminino + negro )
print(summary(fity))
qest <- cbind(fity$coef,sqrt(diag(vcov(fity))))
colnames(qest) <- c("Coeficientes","Erro Padrao")
print(qest)
Call:
lm(formula = lnsalario ~ exper + exper2 + semana + ocupacao +
industria + sul + smsa + casado + uniao + educacao + feminino +
negro)
Residuals:
Min 1Q Median 3Q Max
-2,18965 -0,23536 -0,00988 0,22906 2,08738
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5,251e+00 7,129e-02 73,662 < 2e-16 ***
exper 4,010e-02 2,159e-03 18,574 < 2e-16 ***
exper2 -6,734e-04 4,744e-05 -14,193 < 2e-16 ***
semana 4,216e-03 1,081e-03 3,899 9,82e-05 ***
ocupacao -1,400e-01 1,466e-02 -9,553 < 2e-16 ***
industria 4,679e-02 1,179e-02 3,967 7,39e-05 ***
sul -5,564e-02 1,253e-02 -4,441 9,17e-06 ***
smsa 1,517e-01 1,207e-02 12,567 < 2e-16 ***
casado 4,845e-02 2,057e-02 2,355 0,0185 *
uniao 9,263e-02 1,280e-02 7,237 5,45e-13 ***
educacao 5,670e-02 2,613e-03 21,702 < 2e-16 ***
feminino -3,678e-01 2,510e-02 -14,655 < 2e-16 ***
negro -1,669e-01 2,204e-02 -7,574 4,45e-14 ***
---
Signif. codes: 0 ‘***’ 0,001 ‘**’ 0,01 ‘*’ 0,05 ‘.’ 0,1 ‘ ’ 1
Residual standard error: 0,3494 on 4152 degrees of freedom
Multiple R-squared: 0,4286, Adjusted R-squared: 0,427
F-statistic: 259,5 on 12 and 4152 DF, p-value: < 2,2e-16
Coeficientes Erro Padrao
(Intercept) 5,251123587 7,128679e-02
exper 0,040104650 2,159175e-03
exper2 -0,000673377 4,744313e-05
semana 0,004216089 1,081366e-03
ocupacao -0,140009344 1,465670e-02
industria 0,046788640 1,179350e-02
sul -0,055637368 1,252710e-02
smsa 0,151667119 1,206870e-02
casado 0,048448508 2,056867e-02
uniao 0,092626749 1,279951e-02
educacao 0,056704208 2,612826e-03
feminino -0,367785217 2,509705e-02
negro -0,166937634 2,204219e-02
#==============================================================================
# Estimacao robusta do erro padrao
#==============================================================================
X <- cbind(rep(1,n),exper,exper2,semana,ocupacao,industria,sul,
smsa,casado,uniao,educacao,feminino,negro)
e <- fity$res
Q <- matrix(0,13,13)
T <- matrix(0,13,13)
j1 <- 1
j2 <- 7
k <- 0
for(i in 1:595){
z <- rep(0,13)
for(t in 1:7){
k <- k + 1
z <- z + X[k,]*e[k]
}
Q <- Q + t(X[j1:j2,])%*%X[j1:j2,]
T <- T + z%*%t(z)
j1 <- j1 + 7
j2 <- j2 + 7
}
P <- solve(Q)
S <- P%*%T%*%P
qest2 <- cbind(fity$coef,sqrt(diag(vcov(fity))),sqrt(diag(S)))
colnames(qest2) <- c("Coeficientes","Erro Padrao","Err. Pad. Robusto")
print(qest2)
Coeficientes Erro Padrao Err. Pad. Robusto (Intercept) 5,251123587 7,128679e-02 1,232643e-01 exper 0,040104650 2,159175e-03 4,067119e-03 exper2 -0,000673377 4,744313e-05 9,110648e-05 semana 0,004216089 1,081366e-03 1,538441e-03 ocupacao -0,140009344 1,465670e-02 2,718068e-02 industria 0,046788640 1,179350e-02 2,360873e-02 sul -0,055637368 1,252710e-02 2,609963e-02 smsa 0,151667119 1,206870e-02 2,404766e-02 casado 0,048448508 2,056867e-02 4,085041e-02 uniao 0,092626749 1,279951e-02 2,361785e-02 educacao 0,056704208 2,612826e-03 5,551871e-03 feminino -0,367785217 2,509705e-02 4,547036e-02 negro -0,166937634 2,204219e-02 4,422801e-02
#==============================================================================
# Utilizando o estimador de White
#==============================================================================
XtXinv <- solve(t(X)%*%X)
S0 <- matrix(0,13,13)
for(i in 1:n){
S0 <- S0 + (fity$res[i]^2)*(X[i,]%*%t(X[i,]))
mii <- 1 - t(X[i,])%*%XtXinv%*%X[i,]
}
S0 <- S0/n
varbw <- n*XtXinv%*%S0%*%XtXinv
sdbw <- sqrt(diag(varbw))
qest3 <- cbind(fity$coef,sqrt(diag(vcov(fity))),sqrt(diag(S)),sdbw)
colnames(qest3) <- c("Coeficientes","Erro Padrao","Err. Pad. Robusto","White")
print(qest3)
# Os erros padrao robustos sao em geral duas vezes os incorretos de minimos
# quadrados (OLS).
# O estimador de White da' o mesmo resultados incorretos de minimos quadrados.
Coeficientes Erro Padrao Err. Pad. Robusto White (Intercept) 5,251123587 7,128679e-02 1,232643e-01 7,435056e-02 exper 0,040104650 2,159175e-03 4,067119e-03 2,157767e-03 exper2 -0,000673377 4,744313e-05 9,110648e-05 4,789466e-05 semana 0,004216089 1,081366e-03 1,538441e-03 1,142606e-03 ocupacao -0,140009344 1,465670e-02 2,718068e-02 1,493567e-02 industria 0,046788640 1,179350e-02 2,360873e-02 1,199425e-02 sul -0,055637368 1,252710e-02 2,609963e-02 1,274420e-02 smsa 0,151667119 1,206870e-02 2,404766e-02 1,207902e-02 casado 0,048448508 2,056867e-02 4,085041e-02 2,049446e-02 uniao 0,092626749 1,279951e-02 2,361785e-02 1,233306e-02 educacao 0,056704208 2,612826e-03 5,551871e-03 2,726454e-03 feminino -0,367785217 2,509705e-02 4,547036e-02 2,310026e-02 negro -0,166937634 2,204219e-02 4,422801e-02 2,074718e-02