# rm(list=ls())
options(OutDec = ",")
#==============================================================================
# Exemplo:
# Salarios de chefes de familia entre 1976 e 1982.
#==============================================================================
# Analise
#==============================================================================
library(MASS)
dataxy <- read.table("../dados/d12_salarios.txt",header=T)
print(head(dataxy))
lnsalario <- dataxy[,12]
exper <- dataxy[,1]
exper2 <- exper^2
semana <- dataxy[,2]
ocupacao <- dataxy[,3]
industria <- dataxy[,4]
sul <- dataxy[,5]
smsa <- dataxy[,6]
casado <- dataxy[,7]
uniao <- dataxy[,9]
educacao <- dataxy[,10]
feminino <- dataxy[,8]
negro <- dataxy[,11]
n <- length(lnsalario)
exp wks occ ind south smsa ms fem union ed blk lwage 1 3 32 0 0 1 0 1 0 0 9 0 5,56068 2 4 43 0 0 1 0 1 0 0 9 0 5,72031 3 5 40 0 0 1 0 1 0 0 9 0 5,99645 4 6 39 0 0 1 0 1 0 0 9 0 5,99645 5 7 42 0 1 1 0 1 0 0 9 0 6,06146 6 8 35 0 1 1 0 1 0 0 9 0 6,17379
#==============================================================================
# Ajuste da regressao linear por minimos quadrados
# Regressao de colecao (pooled)
#==============================================================================
# Estimacao por minimos quadrados ordinarios
fity <- lm(lnsalario ~ exper + exper2 + semana + ocupacao + industria
+ sul + smsa + casado + uniao)
SSEpooled <- sum(fity$res^2)
print(summary(fity))
print("==== ==== ==== ==== ==== ====")
stats0 <- cbind(fity$coef,sqrt(diag(vcov(fity))))
colnames(stats0) <- c("Coeficiente","Erro Padrao")
print(stats0)
# Estimacao Robusta do Erro Padrao
X <- cbind(rep(1,n),exper,exper2,semana,ocupacao,industria,sul,
smsa,casado,uniao)
p <- dim(X)[2]
e <- fity$res
Q <- matrix(0,p,p)
T <- matrix(0,p,p)
j1 <- 1
j2 <- 7
k <- 0
for(i in 1:595){
z <- rep(0,p)
for(t in 1:7){
k <- k + 1
z <- z + X[k,]*e[k]
}
Q <- Q + t(X[j1:j2,])%*%X[j1:j2,]
T <- T + z%*%t(z)
j1 <- j1 + 7
j2 <- j2 + 7
}
P <- solve(Q)
S <- P%*%T%*%P
stats1 <- cbind(fity$coef,sqrt(diag(S)))
colnames(stats1) <- c("Coeficiente","Erro Padrao")
print("==== ==== ==== ==== ==== ====")
print(stats1)
Call:
lm(formula = lnsalario ~ exper + exper2 + semana + ocupacao +
industria + sul + smsa + casado + uniao)
Residuals:
Min 1Q Median 3Q Max
-2,03101 -0,25501 -0,00924 0,24810 2,17443
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5,880e+00 6,035e-02 97,428 < 2e-16 ***
exper 3,611e-02 2,357e-03 15,318 < 2e-16 ***
exper2 -6,550e-04 5,186e-05 -12,629 < 2e-16 ***
semana 4,461e-03 1,180e-03 3,780 0,000159 ***
ocupacao -3,176e-01 1,349e-02 -23,538 < 2e-16 ***
industria 3,213e-02 1,277e-02 2,516 0,011894 *
sul -1,137e-01 1,345e-02 -8,453 < 2e-16 ***
smsa 1,586e-01 1,303e-02 12,173 < 2e-16 ***
casado 3,203e-01 1,585e-02 20,213 < 2e-16 ***
uniao 6,975e-02 1,392e-02 5,009 5,69e-07 ***
---
Signif. codes: 0 ‘***’ 0,001 ‘**’ 0,01 ‘*’ 0,05 ‘.’ 0,1 ‘ ’ 1
Residual standard error: 0,3823 on 4155 degrees of freedom
Multiple R-squared: 0,3155, Adjusted R-squared: 0,314
F-statistic: 212,7 on 9 and 4155 DF, p-value: < 2,2e-16
[1] "==== ==== ==== ==== ==== ===="
Coeficiente Erro Padrao
(Intercept) 5,8802364337 6,035439e-02
exper 0,0361095011 2,357291e-03
exper2 -0,0006550021 5,186458e-05
semana 0,0044612967 1,180097e-03
ocupacao -0,3176203748 1,349408e-02
industria 0,0321346501 1,277024e-02
sul -0,1136763426 1,344857e-02
smsa 0,1585789272 1,302696e-02
casado 0,3203284434 1,584772e-02
uniao 0,0697536098 1,392442e-02
[1] "==== ==== ==== ==== ==== ===="
Coeficiente Erro Padrao
(Intercept) 5,8802364337 0,0965426056
exper 0,0361095011 0,0045241581
exper2 -0,0006550021 0,0001013902
semana 0,0044612967 0,0017250510
ocupacao -0,3176203748 0,0272120873
industria 0,0321346501 0,0252122647
sul -0,1136763426 0,0286267042
smsa 0,1585789272 0,0259672168
casado 0,3203284434 0,0348732598
uniao 0,0697536098 0,0266187998
#==============================================================================
# Ajuste da regressao linear por minimos quadrados
# Regressao de efeitos fixos
#==============================================================================
w <- rep(1,7)
h <- matrix(0,595,595)
for(i in 1:595) h[i,i] <- 1
D <- kronecker(h,w)
X <- cbind(exper,exper2,semana,ocupacao,industria,sul,smsa,casado,uniao,D)
rm(list=c("w","h","D"))
XtXinv <- solve(t(X)%*%X)
Xty <- t(X)%*%lnsalario
betahat <- XtXinv%*%Xty
res <- lnsalario - X%*%betahat
p <- length(betahat)
s2 <- sum(res^2)/(n-p)
varbeta <- s2*XtXinv
sdbeta <- sqrt(diag(varbeta))
stats2 <- cbind(betahat[1:9],sdbeta[1:9])
colnames(stats2) <- c("Coeficiente","Erro Padrao")
print("==== ==== ==== ==== ==== ====")
print(stats2)
# Outra maneira de fazer o ajuste.
fitz <- lm(lnsalario ~ X - 1) # -1: sem intercepto
SSEfixed <- sum(fitz$res^2)
stats3 <- cbind(fitz$coef[1:9],sqrt(diag(vcov(fitz)))[1:9])
colnames(stats3) <- c("Coeficiente","Erro Padrao")
print("==== ==== ==== ==== ==== ====")
print(stats3)
[1] "==== ==== ==== ==== ==== ===="
Coeficiente Erro Padrao
exper 0,1132082750 2,471036e-03
exper2 -0,0004183513 5,459451e-05
semana 0,0008359460 5,996694e-04
ocupacao -0,0214764983 1,378368e-02
industria 0,0192101222 1,544630e-02
sul -0,0018611924 3,429928e-02
smsa -0,0424691528 1,942836e-02
casado -0,0297258386 1,898357e-02
uniao 0,0327848598 1,492287e-02
[1] "==== ==== ==== ==== ==== ===="
Coeficiente Erro Padrao
Xexper 0,1132082750 2,471036e-03
Xexper2 -0,0004183513 5,459451e-05
Xsemana 0,0008359460 5,996694e-04
Xocupacao -0,0214764983 1,378368e-02
Xindustria 0,0192101222 1,544630e-02
Xsul -0,0018611924 3,429928e-02
Xsmsa -0,0424691528 1,942836e-02
Xcasado -0,0297258386 1,898357e-02
Xuniao 0,0327848598 1,492287e-02
#==============================================================================
# Ajuste da regressao linear por minimos quadrados
# Regressao de efeitos aleatorios
#==============================================================================
# Estimando sigma_u^2 + sigma_eps^2
sig2total <- SSEpooled / (n-10) # da regressao de colecao (pooled)
sig2eps <- SSEfixed / (n - 595 - 9)
sig2u <- sig2total - sig2eps
X <- cbind(rep(1,n),exper,exper2,semana,ocupacao,industria,sul,
smsa,casado,uniao)
p <- dim(X)[2]
SIGMA <- matrix(sig2u,7,7)
for(i in 1:7) SIGMA[i,i] <- sig2total
H <- solve(SIGMA)
Q <- matrix(0,p,p)
A <- matrix(0,p,p)
T <- rep(0,p)
j1 <- 1
j2 <- 7
for(i in 1:595){
B <- t(X[j1:j2,])%*%H
A <- A + t(X[j1:j2,])%*%H%*%X[j1:j2,]
Q <- Q + B%*%X[j1:j2,]
T <- T + B%*%lnsalario[j1:j2]
j1 <- j1 + 7
j2 <- j2 + 7
}
P <- solve(Q)
bhat <- P%*%T
res <- lnsalario - X%*%bhat
bvcov <- solve(A) # ( X^T * SIGMA^{-1} * X )^{-1}
stats4 <- cbind(bhat,sqrt(diag(bvcov)))
colnames(stats4) <- c("Coeficiente","Erro Padrao")
print("==== ==== ==== ==== ==== ====")
print(stats4)
[1] "==== ==== ==== ==== ==== ===="
Coeficiente Erro Padrao
5,3454691959 4,361449e-02
exper 0,0890629490 2,280338e-03
exper2 -0,0007577415 5,035979e-05
semana 0,0010664393 5,939245e-04
ocupacao -0,1067181875 1,268771e-02
industria -0,0163723408 1,390745e-02
sul -0,0689930882 2,353983e-02
smsa -0,0153040846 1,649340e-02
casado -0,0239787445 1,711092e-02
uniao 0,0359659052 1,367374e-02