Skip to contents

Function codebook collects documentation about an item, or the items in a data set or external data file. It returns an object that, when shown, print this documentation in a nicely formatted way.

Usage

codebook(x, weights = NULL, unweighted = TRUE, ...)
# S4 method for item
codebook(x, weights = NULL, unweighted = TRUE, ...)
# S4 method for atomic
codebook(x, weights = NULL, unweighted = TRUE, ...)
# S4 method for factor
codebook(x, weights = NULL, unweighted = TRUE, ...)
# S4 method for data.set
codebook(x, weights = NULL, unweighted = TRUE, ...)
# S4 method for importer
codebook(x, weights = NULL, unweighted = TRUE, ...)
# S4 method for data.frame
codebook(x, weights = NULL, unweighted = TRUE, ...)
# S4 method for tbl_df
codebook(x, weights = NULL, unweighted = TRUE, ...)

Arguments

x

an item, numeric or character vector, factor, data.set, data.frame or importer object for codebook()

weights

an optional vector of weights.

unweighted

an optional logical vector; if weights are given, it determines of only summaries of weighted data are show or also summaries of unweighted data.

...

other arguments, currently ignored.

Value

An object of class "codebook", for which a show method exists that produces a nicely formatted output.

Examples


Data <- data.set(
          vote = sample(c(1,2,3,8,9,97,99),size=300,replace=TRUE),
          region = sample(c(rep(1,3),rep(2,2),3,99),size=300,replace=TRUE),
          income = exp(rnorm(300,sd=.7))*2000
          )

Data <- within(Data,{
  description(vote) <- "Vote intention"
  description(region) <- "Region of residence"
  description(income) <- "Household income"
  wording(vote) <- "If a general election would take place next tuesday,
                    the candidate of which party would you vote for?"
  wording(income) <- "All things taken into account, how much do all
                    household members earn in sum?"
  foreach(x=c(vote,region),{
    measurement(x) <- "nominal"
    })
  measurement(income) <- "ratio"
  labels(vote) <- c(
                    Conservatives         =  1,
                    Labour                =  2,
                    "Liberal Democrats"   =  3,
                    "Don't know"          =  8,
                    "Answer refused"      =  9,
                    "Not applicable"      = 97,
                    "Not asked in survey" = 99)
  labels(region) <- c(
                    England               =  1,
                    Scotland              =  2,
                    Wales                 =  3,
                    "Not applicable"      = 97,
                    "Not asked in survey" = 99)
  foreach(x=c(vote,region,income),{
    annotation(x)["Remark"] <- "This is not a real survey item, of course ..."
    })
  missing.values(vote) <- c(8,9,97,99)
  missing.values(region) <- c(97,99)
})

description(Data)
#> 
#>  vote   'Vote intention'     
#>  region 'Region of residence'
#>  income 'Household income'   
#> 

codebook(Data)
#> ================================================================================
#> 
#>    vote 'Vote intention'
#> 
#>    "If a general election would take place next tuesday, the candidate of which
#>    party would you vote for?"
#> 
#> --------------------------------------------------------------------------------
#> 
#>    Storage mode: double
#>    Measurement: nominal
#>    Missing values: 8, 9, 97, 99
#> 
#>    Values and labels              N Valid Total
#>                                                
#>     1   'Conservatives'          49  39.5  16.3
#>     2   'Labour'                 37  29.8  12.3
#>     3   'Liberal Democrats'      38  30.6  12.7
#>     8 M 'Don't know'             46        15.3
#>     9 M 'Answer refused'         47        15.7
#>    97 M 'Not applicable'         42        14.0
#>    99 M 'Not asked in survey'    41        13.7
#> 
#>    Remark:
#>        This is not a real survey item, of course ...
#> 
#> ================================================================================
#> 
#>    region 'Region of residence'
#> 
#> --------------------------------------------------------------------------------
#> 
#>    Storage mode: double
#>    Measurement: nominal
#>    Missing values: 97, 99
#> 
#>    Values and labels              N Valid Total
#>                                                
#>     1   'England'               121  45.5  40.3
#>     2   'Scotland'              111  41.7  37.0
#>     3   'Wales'                  34  12.8  11.3
#>    99 M 'Not asked in survey'    34        11.3
#> 
#>    Remark:
#>        This is not a real survey item, of course ...
#> 
#> ================================================================================
#> 
#>    income 'Household income'
#> 
#>    "All things taken into account, how much do all household members earn in
#>    sum?"
#> 
#> --------------------------------------------------------------------------------
#> 
#>    Storage mode: double
#>    Measurement: ratio
#> 
#>         Min:   363.452
#>         Max: 15476.165
#>        Mean:  2463.786
#>    Std.Dev.:  1687.826
#> 
#>    Remark:
#>        This is not a real survey item, of course ...
#> 

codebook(Data)$vote
#> ================================================================================
#> 
#>    vote 'Vote intention'
#> 
#>    "If a general election would take place next tuesday, the candidate of which
#>    party would you vote for?"
#> 
#> --------------------------------------------------------------------------------
#> 
#>    Storage mode: double
#>    Measurement: nominal
#>    Missing values: 8, 9, 97, 99
#> 
#>    Values and labels              N Valid Total
#>                                                
#>     1   'Conservatives'          49  39.5  16.3
#>     2   'Labour'                 37  29.8  12.3
#>     3   'Liberal Democrats'      38  30.6  12.7
#>     8 M 'Don't know'             46        15.3
#>     9 M 'Answer refused'         47        15.7
#>    97 M 'Not applicable'         42        14.0
#>    99 M 'Not asked in survey'    41        13.7
#> 
#>    Remark:
#>        This is not a real survey item, of course ...
#> 
codebook(Data)[2]
#> ================================================================================
#> 
#>    region 'Region of residence'
#> 
#> --------------------------------------------------------------------------------
#> 
#>    Storage mode: double
#>    Measurement: nominal
#>    Missing values: 97, 99
#> 
#>    Values and labels              N Valid Total
#>                                                
#>     1   'England'               121  45.5  40.3
#>     2   'Scotland'              111  41.7  37.0
#>     3   'Wales'                  34  12.8  11.3
#>    99 M 'Not asked in survey'    34        11.3
#> 
#>    Remark:
#>        This is not a real survey item, of course ...
#> 

codebook(Data[2])
#> ================================================================================
#> 
#>    region 'Region of residence'
#> 
#> --------------------------------------------------------------------------------
#> 
#>    Storage mode: double
#>    Measurement: nominal
#>    Missing values: 97, 99
#> 
#>    Values and labels              N Valid Total
#>                                                
#>     1   'England'               121  45.5  40.3
#>     2   'Scotland'              111  41.7  37.0
#>     3   'Wales'                  34  12.8  11.3
#>    99 M 'Not asked in survey'    34        11.3
#> 
#>    Remark:
#>        This is not a real survey item, of course ...
#> 
 
DataFr <- as.data.frame(Data)
DataHv <- as_haven(Data,user_na=TRUE)

codebook(DataFr)
#> ================================================================================
#> 
#>    vote 'Vote intention'
#> 
#> --------------------------------------------------------------------------------
#> 
#>    Storage mode: integer
#>    Factor with 3 levels
#> 
#>    Levels and labels          N Valid Total
#>                                            
#>     1 'Conservatives'        49  39.5  16.3
#>     2 'Labour'               37  29.8  12.3
#>     3 'Liberal Democrats'    38  30.6  12.7
#>    NA                       176        58.7
#> 
#> ================================================================================
#> 
#>    region 'Region of residence'
#> 
#> --------------------------------------------------------------------------------
#> 
#>    Storage mode: integer
#>    Factor with 3 levels
#> 
#>    Levels and labels     N Valid Total
#>                                       
#>     1 'England'        121  45.5  40.3
#>     2 'Scotland'       111  41.7  37.0
#>     3 'Wales'           34  12.8  11.3
#>    NA                   34        11.3
#> 
#> ================================================================================
#> 
#>    income 'Household income'
#> 
#> --------------------------------------------------------------------------------
#> 
#>    Storage mode: double
#> 
#>         Min:   363.4518
#>         Max: 15476.1649
#>        Mean:  2463.7859
#>    Std.Dev.:  1687.8259
#> 
codebook(DataHv)
#> ================================================================================
#> 
#>    vote 'Vote intention'
#> 
#> --------------------------------------------------------------------------------
#> 
#>    Storage mode: integer
#>    Measurement: undefined
#>    Missing values: 8, 9, 97, 99
#> 
#>    Values and labels              N Valid Total
#>                                                
#>     1   'Conservatives'          49  39.5  16.3
#>     2   'Labour'                 37  29.8  12.3
#>     3   'Liberal Democrats'      38  30.6  12.7
#>     8 M 'Don't know'             46        15.3
#>     9 M 'Answer refused'         47        15.7
#>    97 M 'Not applicable'         42        14.0
#>    99 M 'Not asked in survey'    41        13.7
#> 
#> ================================================================================
#> 
#>    region 'Region of residence'
#> 
#> --------------------------------------------------------------------------------
#> 
#>    Storage mode: integer
#>    Measurement: undefined
#>    Missing values: 97, 99
#> 
#>    Values and labels              N Valid Total
#>                                                
#>     1   'England'               121  45.5  40.3
#>     2   'Scotland'              111  41.7  37.0
#>     3   'Wales'                  34  12.8  11.3
#>    99 M 'Not asked in survey'    34        11.3
#> 
#> ================================================================================
#> 
#>    income 'Household income'
#> 
#> --------------------------------------------------------------------------------
#> 
#>    Storage mode: double
#> 
#>         Min:   363.4518
#>         Max: 15476.1649
#>        Mean:  2463.7859
#>    Std.Dev.:  1687.8259
#> 

   
if (FALSE) {
Write(description(Data),
           file="Data-desc.txt")
Write(codebook(Data),
           file="Data-cdbk.txt")
  }