IRT without the normality assumption

library(IRTest)
library(ggplot2)




1. Dichotomous items

The function DataGeneration can be used for the pre-analysis step. This function returns artificial data and some useful objects for analysis (i.e., theta, data_D, item_D, and initialitem_d).

In the parameter estimation process, the initialitem can be used for an input of the function IRTest_Dich (i.e., initialitem = initialitem). The data is an artificial item response data that could be unnecessary if user-imported item response data is used. The theta and item are not used for the estimation process. They can be considered as the true parameters only if the artificial data (data) is used for analysis.

Alldata <- DataGeneration(seed = 123456789,
                          model_D = rep(1:2, each=5),
                          N=500,
                          nitem_D = 10,
                          nitem_P = 0,
                          d = 1.664,
                          sd_ratio = 2,
                          prob = 0.3)

data <- Alldata$data_D
item <- Alldata$item_D
initialitem <- Alldata$initialitem_D
theta <- Alldata$theta

If the artificial data (data) is used, the true latent distribution looks like,




######                            ######
###### Empirical histogram method ######
######                            ######
Mod1 <- IRTest_Dich(initialitem = initialitem,
                    data = data,
                    model = rep(1:2, each=5),
                    latent_dist = "EHM",
                    max_iter = 200,
                    threshold = .0001)

######                                  ######
###### Kernel density estimation method ######
######                                  ######
# Mod1 <- IRTest_Dich(initialitem = initialitem,
#                     data = data,
#                     model = rep(1:2, each=5),
#                     latent_dist = "KDE",
#                     bandwidth = "SJ-ste",
#                     max_iter = 200,
#                     threshold = .001)

######                      ######
###### Normality assumption ######
######                      ######
#  Mod1 <- IRTest_Dich(initialitem = initialitem,
#                      data = data,
#                      model = rep(1:2, each=5),
#                      latent_dist = "Normal",
#                      max_iter = 200,
#                      threshold = .0001)

######                                             ######
###### Two-component Gaussian mixture distribution ######
######                                             ######
#  Mod1 <- IRTest_Dich(initialitem = initialitem,
#                      data = data,
#                      model = rep(1:2, each=5),
#                      latent_dist = "Mixture",
#                      max_iter = 200,
#                      threshold = .0001)

######                                                       ######
###### Davidian curve (for an arbitrarily chosen case of h=4)######
######                                                       ######
#  Mod1 <- IRTest_Dich(initialitem = initialitem,
#                      data = data,
#                      model = rep(1:2, each=5),
#                      latent_dist = "DC",
#                      max_iter = 200,
#                      threshold = .0001,
#                      h=4)




### The estimated item parameters
Mod1$par_est
#>              a           b c
#>  [1,] 1.000000 -0.74508109 0
#>  [2,] 1.000000  0.51116617 0
#>  [3,] 1.000000  0.80336947 0
#>  [4,] 1.000000  0.53158883 0
#>  [5,] 1.000000 -0.39455364 0
#>  [6,] 1.700258  0.01012807 0
#>  [7,] 1.526307  0.89420050 0
#>  [8,] 2.203176 -1.12635131 0
#>  [9,] 1.566285  0.39084407 0
#> [10,] 1.385190  0.76792824 0

### The asymptotic standard errors of item parameters
Mod1$se
#>               a          b  c
#>  [1,]        NA 0.10304406 NA
#>  [2,]        NA 0.10098172 NA
#>  [3,]        NA 0.10355444 NA
#>  [4,]        NA 0.10112153 NA
#>  [5,]        NA 0.10034057 NA
#>  [6,] 0.1446766 0.06711641 NA
#>  [7,] 0.1537574 0.08561540 NA
#>  [8,] 0.2434543 0.07258288 NA
#>  [9,] 0.1404522 0.07275011 NA
#> [10,] 0.1388418 0.08841969 NA

### The estimated ability parameters
head(Mod1$theta)
#> [1] -0.8551470 -0.6567867 -0.7198031 -1.0039922 -1.2714637 -0.7823374

### The estimated latent distribution
plot_LD(Mod1, xlim = c(-6,6))




2. Polytomous items

As in the case of dichotomous items, the function DataGeneration can be used for the pre-analysis step. This function returns artificial data and some useful objects for analysis (i.e., theta, data_P, item_P, and initialitem_P).

In the parameter estimation process, the initialitem can be used for an input of the function IRTest_Poly (i.e., initialitem = initialitem). The data is an artificial item response data that could be unnecessary if user-imported item response data is used. The theta and item are not used for the estimation process. They can be considered as the true parameters only if the artificial data (data) is used for analysis.

Alldata <- DataGeneration(seed = 123456789,
                          model_P = "GPCM",
                          categ = rep(c(3,7), each = 5),
                          N=1000,
                          nitem_D = 0,
                          nitem_P = 10,
                          d = 1.414,
                          sd_ratio = 2,
                          prob = 0.5)

data <- Alldata$data_P
item <- Alldata$item_P
initialitem <- Alldata$initialitem_P
theta <- Alldata$theta

If the artificial data (data) is used, the true latent distribution looks like,




######                                  ######
###### Kernel density estimation method ######
######                                  ######
Mod1 <- IRTest_Poly(initialitem = initialitem,
                    data = data,
                    model = "GPCM",
                    latent_dist = "KDE",
                    bandwidth = "SJ-ste",
                    max_iter = 200,
                    threshold = .001)

######                      ######
###### Normality assumption ######
######                      ######
#  Mod1 <- IRTest_Poly(initialitem = initialitem,
#                      data = data,
#                      model = "GPCM",
#                      latent_dist = "Normal",
#                      max_iter = 200,
#                      threshold = .001)

######                            ######
###### Empirical histogram method ######
######                            ######
#  Mod1 <- IRTest_Poly(initialitem = initialitem,
#                      data = data,
#                      model = "GPCM",
#                      latent_dist = "EHM",
#                      max_iter = 200,
#                      threshold = .001)

######                                             ######
###### Two-component Gaussian mixture distribution ######
######                                             ######
#  Mod1 <- IRTest_Poly(initialitem = initialitem,
#                      data = data,
#                      model = "GPCM",
#                      latent_dist = "Mixture",
#                      max_iter = 200,
#                      threshold = .001)

######                                                        ######
###### Davidian curve (for an arbitrarily chosen case of h=4) ######
######                                                        ######
#  Mod1 <- IRTest_Poly(initialitem = initialitem,
#                      data = data,
#                      model = "GPCM",
#                      latent_dist = "DC",
#                      max_iter = 200,
#                      threshold = .001,
#                      h=4)




### The estimated item parameters
Mod1$par_est
#>               a          b_1         b_2        b_3         b_4        b_5
#>  [1,] 1.9320419  0.396698909  0.46603651         NA          NA         NA
#>  [2,] 1.6880632 -0.303763927  0.05188864         NA          NA         NA
#>  [3,] 2.0504530 -0.376056390 -0.24941074         NA          NA         NA
#>  [4,] 1.0605873 -0.159282350  0.16815184         NA          NA         NA
#>  [5,] 0.7835049  0.003729855  0.22658723         NA          NA         NA
#>  [6,] 1.8907281  0.253908544  0.29706486  0.5563451  0.56108999  0.7435219
#>  [7,] 1.5439636 -1.603051232 -1.12357477 -1.0013097 -1.06426560 -0.7066684
#>  [8,] 0.9479375 -0.332417438 -0.17196848  0.3086190  0.06595696 -0.2510836
#>  [9,] 1.4230350 -0.784687384 -0.69487372 -0.6851528 -0.46813907 -0.5758896
#> [10,] 2.5827215  0.546921194  0.94939512  0.7099745  1.04467562  1.0309654
#>              b_6
#>  [1,]         NA
#>  [2,]         NA
#>  [3,]         NA
#>  [4,]         NA
#>  [5,]         NA
#>  [6,]  0.8505778
#>  [7,] -0.6593098
#>  [8,]  0.1465100
#>  [9,] -0.4351178
#> [10,]  1.2435975

### The asymptotic standard errors of item parameters
Mod1$se
#>                a        b_1        b_2       b_3        b_4        b_5
#>  [1,] 0.11560425 0.06407208 0.06475432        NA         NA         NA
#>  [2,] 0.09873818 0.05692682 0.05873271        NA         NA         NA
#>  [3,] 0.12254146 0.05222493 0.05290338        NA         NA         NA
#>  [4,] 0.06700339 0.08275252 0.08438107        NA         NA         NA
#>  [5,] 0.05524448 0.10839246 0.10990234        NA         NA         NA
#>  [6,] 0.11338544 0.06983861 0.08428678 0.1076463 0.11530990 0.10459915
#>  [7,] 0.09071435 0.14185389 0.12458750 0.1208955 0.10620807 0.08435877
#>  [8,] 0.05380619 0.11427616 0.13330931 0.1692535 0.18844552 0.17129060
#>  [9,] 0.08124190 0.10135122 0.11385157 0.1183858 0.11737822 0.10510023
#> [10,] 0.16806178 0.05840518 0.08680555 0.1003863 0.09807777 0.09198491
#>              b_6
#>  [1,]         NA
#>  [2,]         NA
#>  [3,]         NA
#>  [4,]         NA
#>  [5,]         NA
#>  [6,] 0.08789139
#>  [7,] 0.07567987
#>  [8,] 0.13258224
#>  [9,] 0.08666445
#> [10,] 0.07509506

### The estimated ability parameters
head(Mod1$theta)
#> [1] -0.5375323 -0.5787214 -0.2605974 -1.0428218 -0.9306040 -1.2750381

### The estimated latent distribution
plot_LD(Mod1, xlim = c(-6,6))




3. Mixed-format test

As in the case of dichotomous and polytomous items, the function DataGeneration can be used for the pre-analysis step. This function returns artificial data and some useful objects for analysis (i.e., theta, data_D, item_D, initialitem_D, data_P, item_P, and initialitem_P).

In the parameter estimation process, the initialitem can be used for an input of the function IRTest_Mix (i.e., initialitem = initialitem). The data is an artificial item response data that could be unnecessary if user-imported item response data is used. The theta and item are not used for the estimation process. They can be considered as the true parameters only if the artificial data (data) is used for analysis.

Alldata <- DataGeneration(seed = 123456789,
                          model_D = rep(2,5),
                          model_P = "GPCM",
                          categ = rep(3,5),
                          N=1000,
                          nitem_D = 5,
                          nitem_P = 5,
                          d = 1.664,
                          sd_ratio = 1,
                          prob = 0.5)

DataD <- Alldata$data_D
DataP <- Alldata$data_P
itemD <- Alldata$item_D
itemP <- Alldata$item_P
initialitemD <- Alldata$initialitem_D
initialitemP <- Alldata$initialitem_P
theta <- Alldata$theta

If the artificial data (data) is used, the true latent distribution looks like,




######                                  ######
###### Kernel density estimation method ######
######                                  ######
Mod1 <- IRTest_Mix(initialitem_D = initialitemD,
                   initialitem_P = initialitemP,
                   data_D = DataD,
                   data_P = DataP,
                   model_D = rep(2,5),
                   model_P = "GPCM",
                   latent_dist = "KDE",
                   bandwidth = "SJ-ste",
                   max_iter = 200,
                   threshold = .001)

######                      ######
###### Normality assumption ######
######                      ######
#  Mod1 <- IRTest_Mix(initialitem_D = initialitemD,
#                     initialitem_P = initialitemP,
#                     data_D = DataD,
#                     data_P = DataP,
#                     model_D = rep(2,5),
#                     model_P = "GPCM",
#                     latent_dist = "Normal",
#                     max_iter = 200,
#                     threshold = .001)

######                            ######
###### Empirical histogram method ######
######                            ######
#  Mod1 <- IRTest_Mix(initialitem_D = initialitemD,
#                     initialitem_P = initialitemP,
#                     data_D = DataD,
#                     data_P = DataP,
#                     model_D = rep(2,5),
#                     model_P = "GPCM",
#                     latent_dist = "EHM",
#                     max_iter = 200,
#                     threshold = .001)

######                                             ######
###### Two-component Gaussian mixture distribution ######
######                                             ######
#  Mod1 <- IRTest_Mix(initialitem_D = initialitemD,
#                     initialitem_P = initialitemP,
#                     data_D = DataD,
#                     data_P = DataP,
#                     model_D = rep(2,5),
#                     model_P = "GPCM",
#                     latent_dist = "Mixture",
#                     max_iter = 200,
#                     threshold = .001)

######                                                        ######
###### Davidian curve (for an arbitrarily chosen case of h=4) ######
######                                                        ######
#  Mod1 <- IRTest_Mix(initialitem_D = initialitemD,
#                     initialitem_P = initialitemP,
#                     data_D = DataD,
#                     data_P = DataP,
#                     model_D = rep(2,5),
#                     model_P = "GPCM",
#                     latent_dist = "DC",
#                     max_iter = 200,
#                     threshold = .001,
#                     h = 4)




### The estimated item parameters
Mod1$par_est
#> $Dichotomous
#>             a          b c
#> [1,] 2.200954  0.9115074 0
#> [2,] 1.984903 -1.0332999 0
#> [3,] 1.110728  0.4980855 0
#> [4,] 1.258792  0.5392781 0
#> [5,] 2.287480  1.4436252 0
#> 
#> $Polytomous
#>             a        b_1         b_2 b_3 b_4 b_5 b_6
#> [1,] 1.966701  0.3937469  0.42467910  NA  NA  NA  NA
#> [2,] 2.007852 -0.3261131  0.06844343  NA  NA  NA  NA
#> [3,] 2.056885 -0.3897486 -0.22236003  NA  NA  NA  NA
#> [4,] 1.030497 -0.1731280  0.19751324  NA  NA  NA  NA
#> [5,] 0.785141  0.2433940  0.08376259  NA  NA  NA  NA

### The asymptotic standard errors of item parameters
Mod1$se
#> $Dichotomous
#>               a          b  c
#> [1,] 0.15475720 0.04668629 NA
#> [2,] 0.14171212 0.05355501 NA
#> [3,] 0.08520384 0.06947386 NA
#> [4,] 0.09141200 0.06332382 NA
#> [5,] 0.19665345 0.06471224 NA
#> 
#> $Polytomous
#>               a        b_1        b_2 b_3 b_4 b_5 b_6
#> [1,] 0.11688799 0.05981212 0.05958615  NA  NA  NA  NA
#> [2,] 0.11456065 0.05304505 0.05249768  NA  NA  NA  NA
#> [3,] 0.11996224 0.05626596 0.05611498  NA  NA  NA  NA
#> [4,] 0.06432937 0.08543217 0.08539457  NA  NA  NA  NA
#> [5,] 0.05401889 0.11481480 0.11424988  NA  NA  NA  NA

### The estimated ability parameters
head(Mod1$theta)
#> [1] -0.5775496 -0.7558405  0.6185433 -0.8691925 -1.3624809 -1.5623822

### The estimated latent distribution
plot_LD(Mod1, xlim = c(-6,6))



—-