library(foreign)
library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0 ✔ purrr 0.2.5
## ✔ tibble 1.4.2 ✔ dplyr 0.7.6
## ✔ tidyr 0.8.1 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(knitr)
library(reshape)
##
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
##
## rename
## The following objects are masked from 'package:tidyr':
##
## expand, smiths
library(poLCA)
## Loading required package: scatterplot3d
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(parallel)
library(broom)
migracion <- read.spss("http://www.losmexicanos.unam.mx/migracion/encuesta_nacional/base_datos/Encuesta_Nacional_de_Migracion.sav",
to.data.frame = TRUE)
## re-encoding from CP1252
diccionario <- tibble(nombre = names(migracion),
etiquetas = str_trim(attr(migracion, "variable.labels")))
migracion <- as.tibble(migracion)
f <- cbind(p7_1,p7_2,p7_3,p7_4,p7_5,p7_6,p7_7,p7_8,p7_9,p7_10)~1
set.seed(2018)
migracion %>%
dplyr::select(starts_with("p7_")) %>%
poLCA(f, ., nclass = 5, nrep = 4) -> LCA_3clases
## Model 1: llik = -8908.353 ... best llik = -8908.353
## Model 2: llik = -8890.167 ... best llik = -8890.167
## Model 3: llik = -8906.306 ... best llik = -8890.167
## Model 4: llik = -8911.801 ... best llik = -8890.167
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $p7_1
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5)
## class 1: 0.7245 0.1585 0.1080 0.0090 0.0000
## class 2: 0.1606 0.7758 0.0636 0.0000 0.0000
## class 3: 0.2630 0.1514 0.1131 0.4173 0.0552
## class 4: 0.1981 0.2488 0.5442 0.0089 0.0000
## class 5: 0.9364 0.0148 0.0464 0.0024 0.0000
##
## $p7_2
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5)
## class 1: 0.7392 0.1965 0.0577 0.0065 0.0000
## class 2: 0.1394 0.8237 0.0369 0.0000 0.0000
## class 3: 0.1212 0.1704 0.0596 0.5659 0.0828
## class 4: 0.0932 0.3050 0.5929 0.0090 0.0000
## class 5: 0.9819 0.0047 0.0063 0.0072 0.0000
##
## $p7_3
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5)
## class 1: 0.7712 0.1834 0.0454 0.0000 0.0000
## class 2: 0.1225 0.8351 0.0423 0.0000 0.0000
## class 3: 0.2219 0.1801 0.0182 0.5247 0.0552
## class 4: 0.1555 0.3646 0.4799 0.0000 0.0000
## class 5: 0.9815 0.0167 0.0018 0.0000 0.0000
##
## $p7_4
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5)
## class 1: 0.2819 0.3845 0.3295 0.0041 0.0000
## class 2: 0.1044 0.6639 0.1923 0.0337 0.0056
## class 3: 0.0281 0.0837 0.1697 0.6079 0.1106
## class 4: 0.0000 0.1205 0.8724 0.0071 0.0000
## class 5: 0.9477 0.0224 0.0127 0.0172 0.0000
##
## $p7_5
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5)
## class 1: 0.6061 0.2490 0.1329 0.0032 0.0088
## class 2: 0.0799 0.8443 0.0532 0.0226 0.0000
## class 3: 0.0957 0.2769 0.0947 0.4601 0.0726
## class 4: 0.0675 0.3051 0.6199 0.0075 0.0000
## class 5: 0.9781 0.0103 0.0092 0.0024 0.0000
##
## $p7_6
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5)
## class 1: 0.2422 0.3519 0.3823 0.0236 0.0000
## class 2: 0.0394 0.6912 0.1821 0.0873 0.0000
## class 3: 0.0000 0.0828 0.2267 0.5798 0.1107
## class 4: 0.0410 0.1471 0.8119 0.0000 0.0000
## class 5: 0.9061 0.0499 0.0194 0.0223 0.0024
##
## $p7_7
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5)
## class 1: 0.7386 0.2110 0.0504 0.0000 0.0000
## class 2: 0.1245 0.8125 0.0630 0.0000 0.0000
## class 3: 0.3459 0.1155 0.1106 0.3452 0.0828
## class 4: 0.2806 0.3162 0.3943 0.0089 0.0000
## class 5: 0.9906 0.0046 0.0000 0.0048 0.0000
##
## $p7_8
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5)
## class 1: 0.6770 0.2684 0.0515 0.0031 0.0000
## class 2: 0.1205 0.8681 0.0000 0.0114 0.0000
## class 3: 0.1192 0.1849 0.1384 0.4747 0.0828
## class 4: 0.0362 0.3172 0.6392 0.0074 0.0000
## class 5: 0.9880 0.0120 0.0000 0.0000 0.0000
##
## $p7_9
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5)
## class 1: 0.7065 0.2484 0.0418 0.0033 0.0000
## class 2: 0.0897 0.9103 0.0000 0.0000 0.0000
## class 3: 0.2183 0.0912 0.0693 0.5384 0.0828
## class 4: 0.0637 0.3776 0.5587 0.0000 0.0000
## class 5: 0.9975 0.0025 0.0000 0.0000 0.0000
##
## $p7_10
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5)
## class 1: 0.3122 0.2894 0.3984 0.0000 0.0000
## class 2: 0.0599 0.7105 0.1729 0.0453 0.0113
## class 3: 0.0143 0.0278 0.1880 0.6732 0.0967
## class 4: 0.0177 0.1174 0.8649 0.0000 0.0000
## class 5: 0.9730 0.0082 0.0063 0.0102 0.0024
##
## Estimated class population shares
## 0.259 0.1473 0.0604 0.1866 0.3467
##
## Predicted class memberships (by modal posterior prob.)
## 0.2585 0.1501 0.0601 0.1852 0.3461
##
## =========================================================
## Fit for 5 latent classes:
## =========================================================
## number of observations: 1199
## number of estimated parameters: 204
## residual degrees of freedom: 995
## maximum log-likelihood: -8890.167
##
## AIC(5): 18188.33
## BIC(5): 19226.54
## G^2(5): 6066.052 (Likelihood ratio/deviance statistic)
## X^2(5): 17940695627 (Chi-square goodness of fit)
##
etiquetas_item <- tibble(outcome = 1:length(levels(migracion$p7_1)),
etiqueta_item = levels(migracion$p7_1)
)
LCA_3clases %>%
tidy %>%
left_join(dplyr::rename(diccionario, variable = nombre)) %>%
left_join(etiquetas_item) %>%
mutate(etiquetas = str_remove(etiquetas,
"7 ¿Estaría dispuesto o no estaría dispuesto a permitir que en su casa vivieran personas "),
etiquetas = str_wrap(etiquetas, 30),
etiqueta_item = factor(etiqueta_item,
levels = c("No", "Sí, en parte", "Sí", "NS", "NC")),
class = paste("Clase", class)) %>%
ggplot(aes(x = etiquetas, fill = etiqueta_item, y = estimate)) +
geom_col() +
facet_wrap(~class, nrow = 1) +
scale_fill_viridis_d() +
theme_minimal() +
theme(legend.position = "bottom") +
labs(fill = "Respuesta",
x = "¿Estaría dispuesto o no estaría dispuesto a
permitir que en su casa vivieran personas ",
y = "Probabilidad de respuesta por clase") +
coord_flip()
## Joining, by = "variable"
## Joining, by = "outcome"

Análisis de clases latentes con covariadas
f.cov <- cbind(p7_1,p7_2,p7_3,p7_4,p7_5,p7_6,p7_7,p7_8,p7_9,p7_10)~sd2+sd4
set.seed(2018)
migracion %>%
dplyr::select(starts_with("p7_"), sd1, sd2, sd4) %>%
mutate(sd2 = as.numeric(str_trim(as.character(sd2)))) %>%
mutate_all(na_if, "NS") %>%
mutate_all(na_if, "NC") %>%
drop_na %>%
mutate(sd4 = str_trim(as.character(sd4))) %>%
mutate(sd4 = recode(sd4,
"Preescolar" = "Pri o <",
"Ninguno" = "Pri o <",
"Primaria" = "Pri o <",
"Preparatoria o Bachillerato" = "Prepa",
"Carrera técnica" = "Prepa",
"Normal" = "Prepa",
"Licenciatura (Profesional)" = "Univ",
"Maestría" = "Univ",
"Doctorado" = "Univ"
)) %>%
mutate_if(is.factor, droplevels) %>%
mutate(sd4 = fct_relevel(sd4, "Pri o <")) -> datos
## Warning in evalq(as.numeric(str_trim(as.character(sd2))), <environment>):
## NAs introducidos por coerción
poLCA(f.cov, datos, nclass = 3, nrep = 6) -> LCAcov_3clases
## Model 1: llik = -7228.091 ... best llik = -7228.091
## Model 2: llik = -7067.375 ... best llik = -7067.375
## Model 3: llik = -7067.375 ... best llik = -7067.375
## Model 4: llik = -7067.375 ... best llik = -7067.375
## Model 5: llik = -7259.869 ... best llik = -7067.375
## Model 6: llik = -7067.375 ... best llik = -7067.375
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $p7_1
## Pr(1) Pr(2) Pr(3)
## class 1: 0.1928 0.1318 0.6754
## class 2: 0.3161 0.5606 0.1233
## class 3: 0.9231 0.0279 0.0490
##
## $p7_2
## Pr(1) Pr(2) Pr(3)
## class 1: 0.1412 0.1639 0.6949
## class 2: 0.2884 0.6074 0.1043
## class 3: 0.9592 0.0255 0.0154
##
## $p7_3
## Pr(1) Pr(2) Pr(3)
## class 1: 0.2045 0.2182 0.5773
## class 2: 0.3070 0.6161 0.0769
## class 3: 0.9528 0.0370 0.0103
##
## $p7_4
## Pr(1) Pr(2) Pr(3)
## class 1: 0.0000 0.0793 0.9207
## class 2: 0.1361 0.5146 0.3493
## class 3: 0.7924 0.1158 0.0918
##
## $p7_5
## Pr(1) Pr(2) Pr(3)
## class 1: 0.0807 0.1362 0.7831
## class 2: 0.2087 0.6676 0.1237
## class 3: 0.9251 0.0499 0.0249
##
## $p7_6
## Pr(1) Pr(2) Pr(3)
## class 1: 0.0650 0.0719 0.8631
## class 2: 0.0942 0.5714 0.3344
## class 3: 0.7528 0.1239 0.1233
##
## $p7_7
## Pr(1) Pr(2) Pr(3)
## class 1: 0.3258 0.1905 0.4837
## class 2: 0.3075 0.6147 0.0778
## class 3: 0.9611 0.0277 0.0112
##
## $p7_8
## Pr(1) Pr(2) Pr(3)
## class 1: 0.0865 0.1770 0.7366
## class 2: 0.2258 0.6863 0.0879
## class 3: 0.9582 0.0336 0.0082
##
## $p7_9
## Pr(1) Pr(2) Pr(3)
## class 1: 0.1170 0.2112 0.6719
## class 2: 0.2238 0.7139 0.0623
## class 3: 0.9724 0.0209 0.0067
##
## $p7_10
## Pr(1) Pr(2) Pr(3)
## class 1: 0.0070 0.0645 0.9285
## class 2: 0.1278 0.5100 0.3622
## class 3: 0.8175 0.0775 0.1050
##
## Estimated class population shares
## 0.1504 0.3289 0.5207
##
## Predicted class memberships (by modal posterior prob.)
## 0.1496 0.3267 0.5237
##
## =========================================================
## Fit for 3 latent classes:
## =========================================================
## 2 / 1
## Coefficient Std. error t value Pr(>|t|)
## (Intercept) 0.40029 0.44898 0.892 0.373
## sd2 -0.00746 0.00796 -0.938 0.349
## sd4Prepa 0.97084 0.32532 2.984 0.003
## sd4Secundaria 0.94808 0.29100 3.258 0.001
## sd4Univ 0.74154 0.45650 1.624 0.105
## =========================================================
## 3 / 1
## Coefficient Std. error t value Pr(>|t|)
## (Intercept) 1.47085 0.40736 3.611 0.000
## sd2 -0.01907 0.00739 -2.581 0.010
## sd4Prepa 0.72662 0.29188 2.489 0.013
## sd4Secundaria 0.55436 0.26535 2.089 0.037
## sd4Univ 1.29784 0.39714 3.268 0.001
## =========================================================
## number of observations: 1056
## number of estimated parameters: 70
## residual degrees of freedom: 986
## maximum log-likelihood: -7067.375
##
## AIC(3): 14274.75
## BIC(3): 14622.11
## X^2(3): 162215.4 (Chi-square goodness of fit)
##
## ALERT: estimation algorithm automatically restarted with new initial values
##
LCAcov_3clases %>%
tidy %>%
left_join(dplyr::rename(diccionario, variable = nombre)) %>%
left_join(etiquetas_item) %>%
mutate(etiquetas = str_remove(etiquetas,
"7 ¿Estaría dispuesto o no estaría dispuesto a permitir que en su casa vivieran personas "),
etiquetas = str_wrap(etiquetas, 30),
etiqueta_item = factor(etiqueta_item,
levels = c("No", "Sí, en parte", "Sí", "NS", "NC")),
class = paste("Clase", class)) %>%
ggplot(aes(x = etiquetas, fill = etiqueta_item, y = estimate)) +
geom_col() +
facet_wrap(~class, nrow = 1) +
scale_fill_viridis_d() +
theme_minimal() +
theme(legend.position = "bottom") +
labs(fill = "Respuesta",
x = "¿Estaría dispuesto o no estaría dispuesto a
permitir que en su casa vivieran personas ",
y = "Probabilidad de respuesta por clase") +
coord_flip()
## Joining, by = "variable"
## Joining, by = "outcome"

plot(effects::allEffects(LCAcov_3clases))
