Generate a Codebook of a Data Set
codebook.Rd
Function codebook
collects documentation about an item,
or the items in a data set or external data file. It returns
an object that, when show
n, print this documentation
in a nicely formatted way.
Usage
codebook(x, weights = NULL, unweighted = TRUE, ...)
# S4 method for item
codebook(x, weights = NULL, unweighted = TRUE, ...)
# S4 method for atomic
codebook(x, weights = NULL, unweighted = TRUE, ...)
# S4 method for factor
codebook(x, weights = NULL, unweighted = TRUE, ...)
# S4 method for data.set
codebook(x, weights = NULL, unweighted = TRUE, ...)
# S4 method for importer
codebook(x, weights = NULL, unweighted = TRUE, ...)
# S4 method for data.frame
codebook(x, weights = NULL, unweighted = TRUE, ...)
# S4 method for tbl_df
codebook(x, weights = NULL, unweighted = TRUE, ...)
Arguments
- x
an
item
, numeric or character vector, factor,data.set
,data.frame
orimporter
object forcodebook()
- weights
an optional vector of weights.
- unweighted
an optional logical vector; if weights are given, it determines of only summaries of weighted data are show or also summaries of unweighted data.
- ...
other arguments, currently ignored.
Value
An object of class "codebook", for which a show
method exists that
produces a nicely formatted output.
Examples
Data <- data.set(
vote = sample(c(1,2,3,8,9,97,99),size=300,replace=TRUE),
region = sample(c(rep(1,3),rep(2,2),3,99),size=300,replace=TRUE),
income = exp(rnorm(300,sd=.7))*2000
)
Data <- within(Data,{
description(vote) <- "Vote intention"
description(region) <- "Region of residence"
description(income) <- "Household income"
wording(vote) <- "If a general election would take place next tuesday,
the candidate of which party would you vote for?"
wording(income) <- "All things taken into account, how much do all
household members earn in sum?"
foreach(x=c(vote,region),{
measurement(x) <- "nominal"
})
measurement(income) <- "ratio"
labels(vote) <- c(
Conservatives = 1,
Labour = 2,
"Liberal Democrats" = 3,
"Don't know" = 8,
"Answer refused" = 9,
"Not applicable" = 97,
"Not asked in survey" = 99)
labels(region) <- c(
England = 1,
Scotland = 2,
Wales = 3,
"Not applicable" = 97,
"Not asked in survey" = 99)
foreach(x=c(vote,region,income),{
annotation(x)["Remark"] <- "This is not a real survey item, of course ..."
})
missing.values(vote) <- c(8,9,97,99)
missing.values(region) <- c(97,99)
})
description(Data)
#>
#> vote 'Vote intention'
#> region 'Region of residence'
#> income 'Household income'
#>
codebook(Data)
#> ================================================================================
#>
#> vote 'Vote intention'
#>
#> "If a general election would take place next tuesday, the candidate of which
#> party would you vote for?"
#>
#> --------------------------------------------------------------------------------
#>
#> Storage mode: double
#> Measurement: nominal
#> Missing values: 8, 9, 97, 99
#>
#> Values and labels N Valid Total
#>
#> 1 'Conservatives' 49 39.5 16.3
#> 2 'Labour' 37 29.8 12.3
#> 3 'Liberal Democrats' 38 30.6 12.7
#> 8 M 'Don't know' 46 15.3
#> 9 M 'Answer refused' 47 15.7
#> 97 M 'Not applicable' 42 14.0
#> 99 M 'Not asked in survey' 41 13.7
#>
#> Remark:
#> This is not a real survey item, of course ...
#>
#> ================================================================================
#>
#> region 'Region of residence'
#>
#> --------------------------------------------------------------------------------
#>
#> Storage mode: double
#> Measurement: nominal
#> Missing values: 97, 99
#>
#> Values and labels N Valid Total
#>
#> 1 'England' 121 45.5 40.3
#> 2 'Scotland' 111 41.7 37.0
#> 3 'Wales' 34 12.8 11.3
#> 99 M 'Not asked in survey' 34 11.3
#>
#> Remark:
#> This is not a real survey item, of course ...
#>
#> ================================================================================
#>
#> income 'Household income'
#>
#> "All things taken into account, how much do all household members earn in
#> sum?"
#>
#> --------------------------------------------------------------------------------
#>
#> Storage mode: double
#> Measurement: ratio
#>
#> Min: 363.452
#> Max: 15476.165
#> Mean: 2463.786
#> Std.Dev.: 1687.826
#>
#> Remark:
#> This is not a real survey item, of course ...
#>
codebook(Data)$vote
#> ================================================================================
#>
#> vote 'Vote intention'
#>
#> "If a general election would take place next tuesday, the candidate of which
#> party would you vote for?"
#>
#> --------------------------------------------------------------------------------
#>
#> Storage mode: double
#> Measurement: nominal
#> Missing values: 8, 9, 97, 99
#>
#> Values and labels N Valid Total
#>
#> 1 'Conservatives' 49 39.5 16.3
#> 2 'Labour' 37 29.8 12.3
#> 3 'Liberal Democrats' 38 30.6 12.7
#> 8 M 'Don't know' 46 15.3
#> 9 M 'Answer refused' 47 15.7
#> 97 M 'Not applicable' 42 14.0
#> 99 M 'Not asked in survey' 41 13.7
#>
#> Remark:
#> This is not a real survey item, of course ...
#>
codebook(Data)[2]
#> ================================================================================
#>
#> region 'Region of residence'
#>
#> --------------------------------------------------------------------------------
#>
#> Storage mode: double
#> Measurement: nominal
#> Missing values: 97, 99
#>
#> Values and labels N Valid Total
#>
#> 1 'England' 121 45.5 40.3
#> 2 'Scotland' 111 41.7 37.0
#> 3 'Wales' 34 12.8 11.3
#> 99 M 'Not asked in survey' 34 11.3
#>
#> Remark:
#> This is not a real survey item, of course ...
#>
codebook(Data[2])
#> ================================================================================
#>
#> region 'Region of residence'
#>
#> --------------------------------------------------------------------------------
#>
#> Storage mode: double
#> Measurement: nominal
#> Missing values: 97, 99
#>
#> Values and labels N Valid Total
#>
#> 1 'England' 121 45.5 40.3
#> 2 'Scotland' 111 41.7 37.0
#> 3 'Wales' 34 12.8 11.3
#> 99 M 'Not asked in survey' 34 11.3
#>
#> Remark:
#> This is not a real survey item, of course ...
#>
DataFr <- as.data.frame(Data)
DataHv <- as_haven(Data,user_na=TRUE)
codebook(DataFr)
#> ================================================================================
#>
#> vote 'Vote intention'
#>
#> --------------------------------------------------------------------------------
#>
#> Storage mode: integer
#> Factor with 3 levels
#>
#> Levels and labels N Valid Total
#>
#> 1 'Conservatives' 49 39.5 16.3
#> 2 'Labour' 37 29.8 12.3
#> 3 'Liberal Democrats' 38 30.6 12.7
#> NA 176 58.7
#>
#> ================================================================================
#>
#> region 'Region of residence'
#>
#> --------------------------------------------------------------------------------
#>
#> Storage mode: integer
#> Factor with 3 levels
#>
#> Levels and labels N Valid Total
#>
#> 1 'England' 121 45.5 40.3
#> 2 'Scotland' 111 41.7 37.0
#> 3 'Wales' 34 12.8 11.3
#> NA 34 11.3
#>
#> ================================================================================
#>
#> income 'Household income'
#>
#> --------------------------------------------------------------------------------
#>
#> Storage mode: double
#>
#> Min: 363.4518
#> Max: 15476.1649
#> Mean: 2463.7859
#> Std.Dev.: 1687.8259
#>
codebook(DataHv)
#> ================================================================================
#>
#> vote 'Vote intention'
#>
#> --------------------------------------------------------------------------------
#>
#> Storage mode: integer
#> Measurement: undefined
#> Missing values: 8, 9, 97, 99
#>
#> Values and labels N Valid Total
#>
#> 1 'Conservatives' 49 39.5 16.3
#> 2 'Labour' 37 29.8 12.3
#> 3 'Liberal Democrats' 38 30.6 12.7
#> 8 M 'Don't know' 46 15.3
#> 9 M 'Answer refused' 47 15.7
#> 97 M 'Not applicable' 42 14.0
#> 99 M 'Not asked in survey' 41 13.7
#>
#> ================================================================================
#>
#> region 'Region of residence'
#>
#> --------------------------------------------------------------------------------
#>
#> Storage mode: integer
#> Measurement: undefined
#> Missing values: 97, 99
#>
#> Values and labels N Valid Total
#>
#> 1 'England' 121 45.5 40.3
#> 2 'Scotland' 111 41.7 37.0
#> 3 'Wales' 34 12.8 11.3
#> 99 M 'Not asked in survey' 34 11.3
#>
#> ================================================================================
#>
#> income 'Household income'
#>
#> --------------------------------------------------------------------------------
#>
#> Storage mode: double
#>
#> Min: 363.4518
#> Max: 15476.1649
#> Mean: 2463.7859
#> Std.Dev.: 1687.8259
#>
if (FALSE) {
Write(description(Data),
file="Data-desc.txt")
Write(codebook(Data),
file="Data-cdbk.txt")
}