In [1]:
# rm(list=ls()) 
options(OutDec = ",")
#==============================================================================
# Exemplo:
#     Salarios de chefes de familia entre 1976 e 1982. 
#==============================================================================
# Analise
#==============================================================================
library(MASS)
dataxy       <- read.table("../dados/d12_salarios.txt",header=T)
print(head(dataxy))
lnsalario    <- dataxy[,12]
exper        <- dataxy[,1]
exper2       <- exper^2
semana       <- dataxy[,2]
ocupacao     <- dataxy[,3]
industria    <- dataxy[,4]
sul          <- dataxy[,5]
smsa         <- dataxy[,6]
casado       <- dataxy[,7]
uniao        <- dataxy[,9]
educacao     <- dataxy[,10]
feminino     <- dataxy[,8]
negro        <- dataxy[,11]
n            <- length(lnsalario)
  exp wks occ ind south smsa ms fem union ed blk   lwage
1   3  32   0   0     1    0  1   0     0  9   0 5,56068
2   4  43   0   0     1    0  1   0     0  9   0 5,72031
3   5  40   0   0     1    0  1   0     0  9   0 5,99645
4   6  39   0   0     1    0  1   0     0  9   0 5,99645
5   7  42   0   1     1    0  1   0     0  9   0 6,06146
6   8  35   0   1     1    0  1   0     0  9   0 6,17379
In [2]:
#==============================================================================
# Ajuste da regressao linear por minimos quadrados
# Regressao de colecao (pooled)
#==============================================================================
# Estimacao por minimos quadrados ordinarios
fity <- lm(lnsalario ~ exper + exper2 + semana + ocupacao + industria 
         + sul + smsa + casado + uniao)
SSEpooled <- sum(fity$res^2)
print(summary(fity))
print("==== ==== ==== ==== ==== ====")
stats0 <- cbind(fity$coef,sqrt(diag(vcov(fity))))
colnames(stats0) <- c("Coeficiente","Erro Padrao")
print(stats0)

# Estimacao Robusta do Erro Padrao
X   <- cbind(rep(1,n),exper,exper2,semana,ocupacao,industria,sul,
             smsa,casado,uniao)
p   <- dim(X)[2]
e   <- fity$res
Q   <- matrix(0,p,p)
T   <- matrix(0,p,p)
j1  <- 1
j2  <- 7
k   <- 0
for(i in 1:595){
    z  <- rep(0,p)
    for(t in 1:7){
        k  <- k + 1
        z  <- z + X[k,]*e[k]
    }
    Q  <- Q + t(X[j1:j2,])%*%X[j1:j2,]
    T  <- T + z%*%t(z)
    j1 <- j1 + 7
    j2 <- j2 + 7
}
P   <- solve(Q)
S   <- P%*%T%*%P

stats1 <- cbind(fity$coef,sqrt(diag(S)))
colnames(stats1) <- c("Coeficiente","Erro Padrao")
print("==== ==== ==== ==== ==== ====")
print(stats1)
Call:
lm(formula = lnsalario ~ exper + exper2 + semana + ocupacao + 
    industria + sul + smsa + casado + uniao)

Residuals:
     Min       1Q   Median       3Q      Max 
-2,03101 -0,25501 -0,00924  0,24810  2,17443 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  5,880e+00  6,035e-02  97,428  < 2e-16 ***
exper        3,611e-02  2,357e-03  15,318  < 2e-16 ***
exper2      -6,550e-04  5,186e-05 -12,629  < 2e-16 ***
semana       4,461e-03  1,180e-03   3,780 0,000159 ***
ocupacao    -3,176e-01  1,349e-02 -23,538  < 2e-16 ***
industria    3,213e-02  1,277e-02   2,516 0,011894 *  
sul         -1,137e-01  1,345e-02  -8,453  < 2e-16 ***
smsa         1,586e-01  1,303e-02  12,173  < 2e-16 ***
casado       3,203e-01  1,585e-02  20,213  < 2e-16 ***
uniao        6,975e-02  1,392e-02   5,009 5,69e-07 ***
---
Signif. codes:  0 ‘***’ 0,001 ‘**’ 0,01 ‘*’ 0,05 ‘.’ 0,1 ‘ ’ 1

Residual standard error: 0,3823 on 4155 degrees of freedom
Multiple R-squared:  0,3155,	Adjusted R-squared:  0,314 
F-statistic: 212,7 on 9 and 4155 DF,  p-value: < 2,2e-16

[1] "==== ==== ==== ==== ==== ===="
              Coeficiente  Erro Padrao
(Intercept)  5,8802364337 6,035439e-02
exper        0,0361095011 2,357291e-03
exper2      -0,0006550021 5,186458e-05
semana       0,0044612967 1,180097e-03
ocupacao    -0,3176203748 1,349408e-02
industria    0,0321346501 1,277024e-02
sul         -0,1136763426 1,344857e-02
smsa         0,1585789272 1,302696e-02
casado       0,3203284434 1,584772e-02
uniao        0,0697536098 1,392442e-02
[1] "==== ==== ==== ==== ==== ===="
              Coeficiente  Erro Padrao
(Intercept)  5,8802364337 0,0965426056
exper        0,0361095011 0,0045241581
exper2      -0,0006550021 0,0001013902
semana       0,0044612967 0,0017250510
ocupacao    -0,3176203748 0,0272120873
industria    0,0321346501 0,0252122647
sul         -0,1136763426 0,0286267042
smsa         0,1585789272 0,0259672168
casado       0,3203284434 0,0348732598
uniao        0,0697536098 0,0266187998
In [3]:
#==============================================================================
# Ajuste da regressao linear por minimos quadrados
# Regressao de efeitos fixos
#==============================================================================
w   <- rep(1,7)
h   <- matrix(0,595,595)
for(i in 1:595) h[i,i] <- 1
D   <- kronecker(h,w)
X   <- cbind(exper,exper2,semana,ocupacao,industria,sul,smsa,casado,uniao,D)
rm(list=c("w","h","D"))
XtXinv       <- solve(t(X)%*%X)
Xty          <- t(X)%*%lnsalario
betahat      <- XtXinv%*%Xty
res          <- lnsalario - X%*%betahat
p            <- length(betahat)
s2           <- sum(res^2)/(n-p)
varbeta      <- s2*XtXinv
sdbeta       <- sqrt(diag(varbeta))

stats2 <- cbind(betahat[1:9],sdbeta[1:9])
colnames(stats2) <- c("Coeficiente","Erro Padrao")
print("==== ==== ==== ==== ==== ====")
print(stats2)

# Outra maneira de fazer o ajuste.
fitz      <- lm(lnsalario ~ X - 1) # -1: sem intercepto
SSEfixed  <- sum(fitz$res^2)
stats3 <- cbind(fitz$coef[1:9],sqrt(diag(vcov(fitz)))[1:9])
colnames(stats3) <- c("Coeficiente","Erro Padrao")
print("==== ==== ==== ==== ==== ====")
print(stats3)
[1] "==== ==== ==== ==== ==== ===="
            Coeficiente  Erro Padrao
exper      0,1132082750 2,471036e-03
exper2    -0,0004183513 5,459451e-05
semana     0,0008359460 5,996694e-04
ocupacao  -0,0214764983 1,378368e-02
industria  0,0192101222 1,544630e-02
sul       -0,0018611924 3,429928e-02
smsa      -0,0424691528 1,942836e-02
casado    -0,0297258386 1,898357e-02
uniao      0,0327848598 1,492287e-02
[1] "==== ==== ==== ==== ==== ===="
             Coeficiente  Erro Padrao
Xexper      0,1132082750 2,471036e-03
Xexper2    -0,0004183513 5,459451e-05
Xsemana     0,0008359460 5,996694e-04
Xocupacao  -0,0214764983 1,378368e-02
Xindustria  0,0192101222 1,544630e-02
Xsul       -0,0018611924 3,429928e-02
Xsmsa      -0,0424691528 1,942836e-02
Xcasado    -0,0297258386 1,898357e-02
Xuniao      0,0327848598 1,492287e-02
In [4]:
#==============================================================================
# Ajuste da regressao linear por minimos quadrados
# Regressao de efeitos aleatorios
#==============================================================================
# Estimando sigma_u^2 + sigma_eps^2
sig2total <- SSEpooled / (n-10) # da regressao de colecao (pooled)
sig2eps   <- SSEfixed / (n - 595 - 9)
sig2u     <- sig2total - sig2eps 
X         <- cbind(rep(1,n),exper,exper2,semana,ocupacao,industria,sul,
                   smsa,casado,uniao)
p         <- dim(X)[2]
SIGMA     <- matrix(sig2u,7,7)
for(i in 1:7) SIGMA[i,i] <- sig2total
H       <- solve(SIGMA)
Q       <- matrix(0,p,p)
A       <- matrix(0,p,p)
T       <- rep(0,p)
j1      <- 1
j2      <- 7
for(i in 1:595){
    B  <- t(X[j1:j2,])%*%H
    A  <- A + t(X[j1:j2,])%*%H%*%X[j1:j2,]
    Q  <- Q + B%*%X[j1:j2,]
    T  <- T + B%*%lnsalario[j1:j2]
    j1 <- j1 + 7
    j2 <- j2 + 7
}
P       <- solve(Q)
bhat    <- P%*%T 
res     <- lnsalario - X%*%bhat
bvcov   <- solve(A) # ( X^T * SIGMA^{-1} * X )^{-1}

stats4 <- cbind(bhat,sqrt(diag(bvcov)))
colnames(stats4) <- c("Coeficiente","Erro Padrao")
print("==== ==== ==== ==== ==== ====")
print(stats4)
[1] "==== ==== ==== ==== ==== ===="
            Coeficiente  Erro Padrao
           5,3454691959 4,361449e-02
exper      0,0890629490 2,280338e-03
exper2    -0,0007577415 5,035979e-05
semana     0,0010664393 5,939245e-04
ocupacao  -0,1067181875 1,268771e-02
industria -0,0163723408 1,390745e-02
sul       -0,0689930882 2,353983e-02
smsa      -0,0153040846 1,649340e-02
casado    -0,0239787445 1,711092e-02
uniao      0,0359659052 1,367374e-02