Skip to content

This repository contains R programs for the article, “Varying-coefficient regression analysis for pooled biomonitoring,” by Dewei Wang, Xichen Mou, and Yan Liu. This article has been submitted for publication.

Notifications You must be signed in to change notification settings

Harrindy/VCMforPB

Repository files navigation

R programs for the article "Varying-coefficient regression analysis for pooled biomonitoring."

The required R packges are Rcpp and RcppArmadillo. You can install these two R packages by

    install.packages(c("RcppArmadillo","Rcpp"))

The necessary files are "VCM.cpp" and "Functions.R". Please download the two files to the directory of your R/Rstudio. Use the following codes to source both of them.

    library(Rcpp)
    library(RcppArmadillo)
    sourceCpp("VCM.cpp")
    source("Functions.R")

Now you are ready to fit our varying-coefficient model.

When indiviudal-level biomonitoring is used

    Individual.fit=IndT(u_grid, Y, U, X, W, c(a,b))
    # u_grid is a grid of u-values
    # Y: a size-J vector of individual-level measurements
    # U: a size-J vector of indivdiual-level U-covariates
    # X: a J*(p+1) matrix of indiviudal-level X-covariates
    # W: a size-J vector of individual-level sampling weights
    # (a,b): the cross-validation will search h within the interval (a,b)
    
    #The output Individual.fit is a list of the selected bandwidth and the estimates
    Individual.fit$h # a single value
    Individual.fit$fit # a (p+1)*length(u_grid) matrix
    
    # You can plot the estimates by 
    par(mfrow=c(ceiling(nrow(Individual.fit$fit)/2),2))
    par(mar=c(4,4,.1,.1))
    for(k in 1:nrow(Individual.fit$fit))
    {
            plot(u_grid,Individual.fit$fit[k,],type="l",xlab="u",ylab=expression(beta(u)))
    }

An example

Download the "individual.csv" file to the directory. Run the following programs.

    individual.data=read.csv("individual.csv")
    u_grid=seq(-1.5,1.5,length=400)
    Y=individual.data$Y
    U=individual.data$U
    X=cbind(individual.data$X1,individual.data$X2,individual.data$X3,individual.data$X4)
    W=individual.data$W
    Individual.fit=IndT(u_grid,Y,U,X,W,c(0.01,1))
    Individual.fit$h
    par(mfrow=c(ceiling(nrow(Individual.fit$fit)/2),2))
    par(mar=c(4,4,.1,.1))
    for(k in 1:nrow(Individual.fit$fit))
    {
            plot(u_grid,Individual.fit$fit[k,],type="l",xlab="u",ylab=expression(beta(u)))
    }

Output are

    >  Individual.fit$h
    [1] 0.5753412

Optional Text

When randomly pooled biomonitoring is used

    Random.fit=RT(u_grid,Z,U,X,POOLID,W,c(a,b))
    # u_grid is a grid of u-values
    # Z: a size-N vector of pooled-level measurements for each individual,
    #       if two indiviudals are in the same pool, the corresponding Z-value are the same
    # U: a size-N vector of indivdiual-level U-covariates
    # X: a N*(p+1) matrix of indiviudal-level X-covariates
    # PoolID: a size-N vector of individuals' pool ID,
    #       if two individuals are in the same pool, their pool IDs are the same
    # W: a size-N vector of individual-level sampling weights
    # (a,b): the cross-validation will search h within the interval (a,b)
    
    #The output Random.fit is a list of the selected bandwidth and the estimates
    Random.fit$h # a single value
    Random.fit$fit # a (p+1)*length(u_grid) matrix
    
    # You can plot the estimates by 
    par(mfrow=c(ceiling(nrow(Random.fit$fit)/2),2))
    par(mar=c(4,4,.1,.1))
    for(k in 1:nrow(Random.fit$fit))
    {
            plot(u_grid,Random.fit$fit[k,],type="l",xlab="u",ylab=expression(beta(u)))
    }

An example

Download the "random.csv" file to the directory. Run the following programs.

    random.data=read.csv("random.csv")
    u_grid=seq(-1.5,1.5,length=400)
    Z=random.data$Z
    U=random.data$U
    X=cbind(random.data$X1,random.data$X2,random.data$X3,random.data$X4)
    W=random.data$W
    PoolID=random.data$poolID
    Random.fit=RT(u_grid,Z,U,X,PoolID,W,c(0.01,1))
    Random.fit$h
    par(mfrow=c(ceiling(nrow(Random.fit$fit)/2),2))
    par(mar=c(4,4,.1,.1))
    for(k in 1:nrow(Random.fit$fit))
    {
            plot(u_grid,Random.fit$fit[k,],type="l",xlab="u",ylab=expression(beta(u)))
    }

Output are

    >  Random.fit$h
    [1] 0.7529637

Optional Text

When homogeneously pooled biomonitoring is used

    Homogenous.fit=HT(u_grid,Z,U,X,POOLID,W,c(a,b))
    # u_grid is a grid of u-values
    # Z: a size-N vector of pooled-level measurements for each individual,
    #       if two indiviudals are in the same pool, the corresponding Z-value are the same
    # U: a size-N vector of indivdiual-level U-covariates
    # X: a N*(p+1) matrix of indiviudal-level X-covariates
    # PoolID: a size-N vector of individuals' pool ID,
    #       if two individuals are in the same pool, their pool IDs are the same
    # W: a size-N vector of individual-level sampling weights
    # (a,b): the cross-validation will search h within the interval (a,b)
    
    #The output Homogeneous.fit is a list of the selected bandwidth and the estimates
    Homogeneous.fit$h # a single value
    Homogeneous.fit$fit # a (p+1)*length(u_grid) matrix
    
    # You can plot the estimates by 
    par(mfrow=c(ceiling(nrow(Homogeneous.fit$fit)/2),2))
    par(mar=c(4,4,.1,.1))
    for(k in 1:nrow(Homogeneous.fit$fit))
    {
            plot(u_grid,Homogeneous.fit$fit[k,],type="l",xlab="u",ylab=expression(beta(u)))
    }

An example

Download the "homogeneous.csv" file to the directory. Run the following programs.

    homogeneous.data=read.csv("homogeneous.csv")
    u_grid=seq(-1.5,1.5,length=400)
    Z=homogeneous.data$Z
    U=homogeneous.data$U
    X=cbind(homogeneous.data$X1,homogeneous.data$X2,homogeneous.data$X3,homogeneous.data$X4)
    W=homogeneous.data$W
    PoolID=homogeneous.data$poolID
    Homogeneous.fit=HT(u_grid,Z,U,X,PoolID,W,c(0.01,1))
    Homogeneous.fit$h
    par(mfrow=c(ceiling(nrow(Homogeneous.fit$fit)/2),2))
    par(mar=c(4,4,.1,.1))
    for(k in 1:nrow(Homogeneous.fit$fit))
    {
            plot(u_grid,Homogeneous.fit$fit[k,],type="l",xlab="u",ylab=expression(beta(u)))
    }

Output are

    >  Homogeneous.fit$h
    [1] 0.3141536

Optional Text

About

This repository contains R programs for the article, “Varying-coefficient regression analysis for pooled biomonitoring,” by Dewei Wang, Xichen Mou, and Yan Liu. This article has been submitted for publication.

Topics

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published