<- factor(c("a", "b", "b", "a"))
fac * 2
fac #> Warning in Ops.factor(fac, 2): '*' not meaningful for factors
#> [1] NA NA NA NA
attr(fac, "class")
#> [1] "factor"
attr(fac, "class") <- NULL
* 2 # Integer type; Calculations can be done.
fac #> [1] 2 4 4 2
#> attr(,"levels")
#> [1] "a" "b"
S3 Objects and the Base Types
Reading Assignments
Please read the following sections from textbook Advanced R https://adv-r.hadley.nz/index.html
- Section 3.3 Attributes – https://adv-r.hadley.nz/vectors-chap.html#attributes
- Section 3.4 S3 atomic vectors – https://adv-r.hadley.nz/vectors-chap.html#s3-atomic-vectors
- Section 3.5 Lists – https://adv-r.hadley.nz/vectors-chap.html#lists
- Section 3.6 Data frames and tibbles – https://adv-r.hadley.nz/vectors-chap.html#lists
- How to create S3 classes? Chapter 13 in Advanced R – https://adv-r.hadley.nz/s3.html
Attributes
Factors \(\longrightarrow\) Integers
- The class attribute of a factor is “factor”.
- And a factor also has an additional attribute called “levels”.
<- factor(c("a", "b", "c"))
fac1 levels(fac1)
#> [1] "a" "b" "c"
<- fac1 # replicate fac1
fac2 levels(fac2) <- c("c", "b", "a") # reverse the levels
fac1#> [1] a b c
#> Levels: a b c
fac2#> [1] c b a
#> Levels: c b a
unclass(fac1)
#> [1] 1 2 3
#> attr(,"levels")
#> [1] "a" "b" "c"
unclass(fac2) # The integer values stay the same
#> [1] 1 2 3
#> attr(,"levels")
#> [1] "c" "b" "a"
Matrices and Arrays \(\longrightarrow\) Vectors
<- 1:12 # vector
A is.matrix(A)
#> [1] FALSE
The dim attribute
dim(A) <- c(3, 4) # Convert to matrix
is.matrix(A)
#> [1] TRUE
dim(A) <- NULL # Convert back to vector
is.matrix(A)
#> [1] FALSE
- Transpose of a matrix
t()
# Create the 3*4 matrix
<- 1:12
A dim(A) <- c(3, 4)
A#> [,1] [,2] [,3] [,4]
#> [1,] 1 4 7 10
#> [2,] 2 5 8 11
#> [3,] 3 6 9 12
t(A) # Matrix transpose
#> [,1] [,2] [,3]
#> [1,] 1 2 3
#> [2,] 4 5 6
#> [3,] 7 8 9
#> [4,] 10 11 12
t(t(A)) # Transpose t(A)
#> [,1] [,2] [,3] [,4]
#> [1,] 1 4 7 10
#> [2,] 2 5 8 11
#> [3,] 3 6 9 12
identical(A, t(t(A))) # The same
#> [1] TRUE
Data Frame \(\longrightarrow\) List
- The class attribute of data frame is “data.frame”.
<- data.frame(
df name = c("Alice", "Bob", "Charlie"),
age = c(25, 30, 35),
gender = c("Female", "Male", "Male")
)
attributes(df)
#> $names
#> [1] "name" "age" "gender"
#>
#> $class
#> [1] "data.frame"
#>
#> $row.names
#> [1] 1 2 3
attr(df, "class") <- NULL
df#> $name
#> [1] "Alice" "Bob" "Charlie"
#>
#> $age
#> [1] 25 30 35
#>
#> $gender
#> [1] "Female" "Male" "Male"
#>
#> attr(,"row.names")
#> [1] 1 2 3
attr(df, "class") <- "data.frame"
df#> name age gender
#> 1 Alice 25 Female
#> 2 Bob 30 Male
#> 3 Charlie 35 Male
- Transpose of a data frame
t(df) # Data frame transpose
#> [,1] [,2] [,3]
#> name "Alice" "Bob" "Charlie"
#> age "25" "30" "35"
#> gender "Female" "Male" "Male"
t(t(df))
#> name age gender
#> [1,] "Alice" "25" "Female"
#> [2,] "Bob" "30" "Male"
#> [3,] "Charlie" "35" "Male"
identical(df, t(t(df))) # No longer the same
#> [1] FALSE
- The transpose function
t()
, when applied to a data frame, returns a matrix. - The transpose function
t()
coerces every column to the same type.
Base Type
- Every S3 object is built on top of a base type.
- An S3 object consists of a base type along with its attributes.
- Having a turns an object into an S3 object. (Important attribute – class attribute!)
- An S3 object often stores additional information in other attributes (e.g. the levels attribute).
Base Type Summary
S3 Object | Factor | POSIXct | Date | Data Frame | Tibble |
---|---|---|---|---|---|
Base Type | Integer | Double | Double | List | List |
Factors are built on top of integers
- Factors are built on top of integer vectors. In other words, the base type of factor is an integer vector.
- The class attribute of factors, “factor”, makes it behave differently from regular integer vectors.
<- factor(c("a", "b", "b", "a"))
x
x#> [1] a b b a
#> Levels: a b
* 2
x #> Warning in Ops.factor(x, 2): '*' not meaningful for factors
#> [1] NA NA NA NA
unclass(x)*2
#> [1] 2 4 4 2
#> attr(,"levels")
#> [1] "a" "b"
attr(x, "class") <- NULL
* 2
x #> [1] 2 4 4 2
#> attr(,"levels")
#> [1] "a" "b"
identical(c(1L, 2L, 2L, 1L), x)
#> [1] FALSE
attr(x, "levels") <- NULL
identical(c(1L, 2L, 2L, 1L), x)
#> [1] TRUE
<- c(1L, 2L, 2L, 1L)
x attr(x, "levels") <- c("a", "b")
x#> [1] 1 2 2 1
#> attr(,"levels")
#> [1] "a" "b"
attributes(x)
#> $levels
#> [1] "a" "b"
*2 # Calculation can still be done before the class attribute is added back.
x#> [1] 2 4 4 2
#> attr(,"levels")
#> [1] "a" "b"
is.factor(x)
#> [1] FALSE
attr(x, "class") <- "factor"
x#> [1] a b b a
#> Levels: a b
attributes(x)
#> $levels
#> [1] "a" "b"
#>
#> $class
#> [1] "factor"
is.factor(x)
#> [1] TRUE
Remove the class type of a factor
<- factor(c("a", "b", "b", "a")) # Define a factor
x
x#> [1] a b b a
#> Levels: a b
attr(x, "class") <- NULL # Modify the class attribute of x from "factor" to NULL, i.e., remove the class attribute
# Equivalently, you can use structure() to remove the class attribute of x
<- structure(
x
x, class = NULL
)
attributes(x) # Retrieve all attributes information of x
#> $levels
#> [1] "a" "b"
str(attributes(x))
#> List of 1
#> $ levels: chr [1:2] "a" "b"
# Retrieve all attributes information of x,
#and then display the attributes in a structured representation.
Dates are built on top of doubles
- Date vectors are built on top of double vectors. In other words, the base type of date vector is a double vector.
- The value of the double represents the number of days since 1970-01-01.
- The class attribute of dates, “Date”, makes it behave differently from regular double vectors.
library(lubridate)
<- "2024-8-26"
first_day typeof(first_day)
#> [1] "character"
attributes(first_day)
#> NULL
is.Date(first_day)
#> [1] FALSE
<- as.Date("2024-8-26")
first_day typeof(first_day)
#> [1] "double"
attributes(first_day)
#> $class
#> [1] "Date"
is.Date(first_day)
#> [1] TRUE
unclass(first_day)
#> [1] 19961
<- 19961 # Double value corresponding to "2024-8-26"
first_day attr(first_day, "class") <- "Date"
typeof(first_day)
#> [1] "double"
attributes(first_day)
#> $class
#> [1] "Date"
is.Date(first_day)
#> [1] TRUE
<- as.Date("2024-10-22")
today <- difftime(today, first_day)
n_days <- difftime(today, first_day, units = "weeks")
n_weeks as.difftime(n_days, units = "days")
#> Time difference of 57 days
as.difftime(n_weeks, units = "weeks")
#> Time difference of 8.142857 weeks
Difftimes are built on top of doubles
- Date vectors are built on top of double vectors. In other words, the base type of date vector is a double vector.
- The class attribute of dates, “Date”, makes it behave differently from regular double vectors.
<- as.Date("2024-08-26")
first_day <- as.Date("2024-10-22")
today difftime(today, first_day)
#> Time difference of 57 days
<- "2024-08-26"
first_day <- "2024-10-22"
today difftime(today, first_day, units = "weeks")
#> Time difference of 8.142857 weeks
Data frames and tibbles are built on top of lists
- Data frames and tibbles are built on top of lists. In other words, the base type of data frames or tibbles is a list.
- The class attribute of dates, “data.frame” or “tbl”, makes it behave differently from regular lists.
is.data.frame(iris)
#> [1] TRUE
class(iris)
#> [1] "data.frame"
attr(iris, "class") <- NULL
# iris
is.data.frame(iris)
#> [1] FALSE
is.list(iris)
#> [1] TRUE
attr(iris, "class") <- "data.frame"
head(iris)
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> 1 5.1 3.5 1.4 0.2 setosa
#> 2 4.9 3.0 1.4 0.2 setosa
#> 3 4.7 3.2 1.3 0.2 setosa
#> 4 4.6 3.1 1.5 0.2 setosa
#> 5 5.0 3.6 1.4 0.2 setosa
#> 6 5.4 3.9 1.7 0.4 setosa
Why S3
Functions, which are designed to perform specific tasks, serve as Scattered Tools
Object-oriented programming (OOP), like S3/S4/R6, serve as Structured Tools, organized related functions (methods) and data into classes or objects.
S3 is R’s first and simplest OO system.
S3 vs S4 vs R6
- S3 is used for simple, lightweight OOP (e.g., tidyverse packages)
- S4 is used for formal, structured OOP (e.g., Bioconductor packages)
- R6 is used for more complex systems (e.g., Shiny app)
In the R ecosystem, S3 is the most popular object-oriented programming (OOP) system.
Recall: An S3 object is a base type with at least a class attribute (other attributes may be used to store other data).
An S3 object behaves differently from its underlying base type whenever it’s passed to a generic (short for generic function). (use
sloop::ftype()
to check if a function is a generic)Generics, methods, and dispatch – use the
print()
generic as an example- A generic function manages method dispatch, determines which method to call based on the class of the input. (E.g.
print()
,summary()
) - The method contains the actual implementation for that class. (E.g.
print.data.frame()
,summary.lm()
)
- A generic function manages method dispatch, determines which method to call based on the class of the input. (E.g.
library(sloop)
ftype(print) # print() is generic
#> [1] "S3" "generic"
# Print a factor
<- factor(c("a", "b", "b", "a"))
fac
# Equivalent to print(fac)
fac #> [1] a b b a
#> Levels: a b
print(fac) # print.factor() -- method used to print the factor
#> [1] a b b a
#> Levels: a b
# Print a data frame
<- data.frame(
df name = c("Alice", "Bob", "Charlie"),
age = c(25, 30, 35),
gender = c("Female", "Male", "Male")
)
# Equivalent to print(df)
df #> name age gender
#> 1 Alice 25 Female
#> 2 Bob 30 Male
#> 3 Charlie 35 Male
print(df) # print.data.frame() -- method used to print the data frame
#> name age gender
#> 1 Alice 25 Female
#> 2 Bob 30 Male
#> 3 Charlie 35 Male
methods("print")
#> [1] print.acf*
#> [2] print.activeConcordance*
#> [3] print.AES*
#> [4] print.all_vars*
#> [5] print.anova*
#> [6] print.any_vars*
#> [7] print.aov*
#> [8] print.aovlist*
#> [9] print.ar*
#> [10] print.Arima*
#> [11] print.arima0*
#> [12] print.AsIs
#> [13] print.aspell*
#> [14] print.aspell_inspect_context*
#> [15] print.bibentry*
#> [16] print.Bibtex*
#> [17] print.browseVignettes*
#> [18] print.by
#> [19] print.changedFiles*
#> [20] print.check_bogus_return*
#> [21] print.check_code_usage_in_package*
#> [22] print.check_compiled_code*
#> [23] print.check_demo_index*
#> [24] print.check_depdef*
#> [25] print.check_details*
#> [26] print.check_details_changes*
#> [27] print.check_doi_db*
#> [28] print.check_dotInternal*
#> [29] print.check_make_vars*
#> [30] print.check_nonAPI_calls*
#> [31] print.check_package_code_assign_to_globalenv*
#> [32] print.check_package_code_attach*
#> [33] print.check_package_code_data_into_globalenv*
#> [34] print.check_package_code_startup_functions*
#> [35] print.check_package_code_syntax*
#> [36] print.check_package_code_unload_functions*
#> [37] print.check_package_compact_datasets*
#> [38] print.check_package_CRAN_incoming*
#> [39] print.check_package_datalist*
#> [40] print.check_package_datasets*
#> [41] print.check_package_depends*
#> [42] print.check_package_description*
#> [43] print.check_package_description_encoding*
#> [44] print.check_package_license*
#> [45] print.check_packages_in_dir*
#> [46] print.check_packages_used*
#> [47] print.check_po_files*
#> [48] print.check_pragmas*
#> [49] print.check_Rd_line_widths*
#> [50] print.check_Rd_metadata*
#> [51] print.check_Rd_xrefs*
#> [52] print.check_RegSym_calls*
#> [53] print.check_S3_methods_needing_delayed_registration*
#> [54] print.check_so_symbols*
#> [55] print.check_T_and_F*
#> [56] print.check_url_db*
#> [57] print.check_vignette_index*
#> [58] print.checkDocFiles*
#> [59] print.checkDocStyle*
#> [60] print.checkFF*
#> [61] print.checkRd*
#> [62] print.checkRdContents*
#> [63] print.checkReplaceFuns*
#> [64] print.checkS3methods*
#> [65] print.checkTnF*
#> [66] print.checkVignettes*
#> [67] print.citation*
#> [68] print.cli_ansi_html_style*
#> [69] print.cli_ansi_string*
#> [70] print.cli_ansi_style*
#> [71] print.cli_boxx*
#> [72] print.cli_diff_chr*
#> [73] print.cli_doc*
#> [74] print.cli_progress_demo*
#> [75] print.cli_rule*
#> [76] print.cli_sitrep*
#> [77] print.cli_spark*
#> [78] print.cli_spinner*
#> [79] print.cli_tree*
#> [80] print.codoc*
#> [81] print.codocClasses*
#> [82] print.codocData*
#> [83] print.col_spec*
#> [84] print.collector*
#> [85] print.colorConverter*
#> [86] print.compactPDF*
#> [87] print.condition
#> [88] print.connection
#> [89] print.CRAN_package_reverse_dependencies_and_views*
#> [90] print.data.frame
#> [91] print.Date
#> [92] print.date_names*
#> [93] print.default
#> [94] print.dendrogram*
#> [95] print.density*
#> [96] print.difftime
#> [97] print.dist*
#> [98] print.Dlist
#> [99] print.DLLInfo
#> [100] print.DLLInfoList
#> [101] print.DLLRegisteredRoutines
#> [102] print.document_context*
#> [103] print.document_position*
#> [104] print.document_range*
#> [105] print.document_selection*
#> [106] print.dplyr_join_by*
#> [107] print.dplyr_sel_vars*
#> [108] print.dummy_coef*
#> [109] print.dummy_coef_list*
#> [110] print.ecdf*
#> [111] print.eigen
#> [112] print.element*
#> [113] print.evaluate_evaluation*
#> [114] print.factanal*
#> [115] print.factor
#> [116] print.family*
#> [117] print.fileSnapshot*
#> [118] print.findLineNumResult*
#> [119] print.flatGridListing*
#> [120] print.formula*
#> [121] print.fseq*
#> [122] print.ftable*
#> [123] print.fun_list*
#> [124] print.function
#> [125] print.getAnywhere*
#> [126] print.ggplot*
#> [127] print.ggplot2_bins*
#> [128] print.ggproto*
#> [129] print.ggproto_method*
#> [130] print.gList*
#> [131] print.glm*
#> [132] print.glue*
#> [133] print.gpar*
#> [134] print.GridCoords*
#> [135] print.GridGrobCoords*
#> [136] print.GridGTreeCoords*
#> [137] print.grob*
#> [138] print.gtable*
#> [139] print.hashtab*
#> [140] print.hcl_palettes*
#> [141] print.hclust*
#> [142] print.help_files_with_topic*
#> [143] print.hexmode
#> [144] print.hms*
#> [145] print.HoltWinters*
#> [146] print.hsearch*
#> [147] print.hsearch_db*
#> [148] print.htest*
#> [149] print.html*
#> [150] print.html_dependency*
#> [151] print.htmltools.selector*
#> [152] print.htmltools.selector.list*
#> [153] print.htmlwidget*
#> [154] print.infl*
#> [155] print.integrate*
#> [156] print.isoreg*
#> [157] print.json*
#> [158] print.key_missing*
#> [159] print.kmeans*
#> [160] print.knitr_kable*
#> [161] print.last_dplyr_warnings*
#> [162] print.Latex*
#> [163] print.LaTeX*
#> [164] print.libraryIQR
#> [165] print.lifecycle_warnings*
#> [166] print.listof
#> [167] print.lm*
#> [168] print.loadings*
#> [169] print.locale*
#> [170] print.loess*
#> [171] print.logLik*
#> [172] print.ls_str*
#> [173] print.medpolish*
#> [174] print.method_table*
#> [175] print.MethodsFunction*
#> [176] print.mtable*
#> [177] print.NativeRoutineList
#> [178] print.news_db*
#> [179] print.nls*
#> [180] print.noquote
#> [181] print.numeric_version
#> [182] print.object_size*
#> [183] print.octmode
#> [184] print.packageDescription*
#> [185] print.packageInfo
#> [186] print.packageIQR*
#> [187] print.packageStatus*
#> [188] print.paged_df*
#> [189] print.pairwise.htest*
#> [190] print.path*
#> [191] print.person*
#> [192] print.pillar*
#> [193] print.pillar_1e*
#> [194] print.pillar_colonnade*
#> [195] print.pillar_ornament*
#> [196] print.pillar_shaft*
#> [197] print.pillar_squeezed_colonnade*
#> [198] print.pillar_tbl_format_setup*
#> [199] print.pillar_vctr*
#> [200] print.pillar_vctr_attr*
#> [201] print.POSIXct
#> [202] print.POSIXlt
#> [203] print.power.htest*
#> [204] print.ppr*
#> [205] print.prcomp*
#> [206] print.princomp*
#> [207] print.proc_time
#> [208] print.purrr_function_compose*
#> [209] print.purrr_function_partial*
#> [210] print.purrr_rate_backoff*
#> [211] print.purrr_rate_delay*
#> [212] print.quosure*
#> [213] print.quosures*
#> [214] print.R6*
#> [215] print.R6ClassGenerator*
#> [216] print.raster*
#> [217] print.Rconcordance*
#> [218] print.Rd*
#> [219] print.recordedplot*
#> [220] print.rel*
#> [221] print.restart
#> [222] print.RGBcolorConverter*
#> [223] print.RGlyphFont*
#> [224] print.rlang:::list_of_conditions*
#> [225] print.rlang_box_done*
#> [226] print.rlang_box_splice*
#> [227] print.rlang_data_pronoun*
#> [228] print.rlang_dict*
#> [229] print.rlang_dyn_array*
#> [230] print.rlang_envs*
#> [231] print.rlang_error*
#> [232] print.rlang_fake_data_pronoun*
#> [233] print.rlang_lambda_function*
#> [234] print.rlang_message*
#> [235] print.rlang_trace*
#> [236] print.rlang_warning*
#> [237] print.rlang_zap*
#> [238] print.rle
#> [239] print.rlib_bytes*
#> [240] print.rlib_error_3_0*
#> [241] print.rlib_trace_3_0*
#> [242] print.roman*
#> [243] print.SavedPlots*
#> [244] print.scalar*
#> [245] print.sessionInfo*
#> [246] print.shiny.tag*
#> [247] print.shiny.tag.env*
#> [248] print.shiny.tag.list*
#> [249] print.shiny.tag.query*
#> [250] print.simple.list
#> [251] print.smooth.spline*
#> [252] print.socket*
#> [253] print.src*
#> [254] print.srcfile
#> [255] print.srcref
#> [256] print.stepfun*
#> [257] print.stl*
#> [258] print.stringr_view*
#> [259] print.StructTS*
#> [260] print.subdir_tests*
#> [261] print.summarize_CRAN_check_status*
#> [262] print.summary.aov*
#> [263] print.summary.aovlist*
#> [264] print.summary.ecdf*
#> [265] print.summary.glm*
#> [266] print.summary.lm*
#> [267] print.summary.loess*
#> [268] print.summary.manova*
#> [269] print.summary.nls*
#> [270] print.summary.packageStatus*
#> [271] print.summary.ppr*
#> [272] print.summary.prcomp*
#> [273] print.summary.princomp*
#> [274] print.summary.table
#> [275] print.summary.warnings
#> [276] print.summaryDefault
#> [277] print.suppress_viewer*
#> [278] print.table
#> [279] print.tables_aov*
#> [280] print.tbl*
#> [281] print.terms*
#> [282] print.theme*
#> [283] print.tidyverse_conflicts*
#> [284] print.tidyverse_logo*
#> [285] print.transform*
#> [286] print.trunc_mat*
#> [287] print.ts*
#> [288] print.tskernel*
#> [289] print.TukeyHSD*
#> [290] print.tukeyline*
#> [291] print.tukeysmooth*
#> [292] print.undoc*
#> [293] print.uneval*
#> [294] print.unit*
#> [295] print.vctrs_bytes*
#> [296] print.vctrs_sclr*
#> [297] print.vctrs_unspecified*
#> [298] print.vctrs_vctr*
#> [299] print.viewport*
#> [300] print.vignette*
#> [301] print.warnings
#> [302] print.xfun_md_viewable*
#> [303] print.xfun_raw_string*
#> [304] print.xfun_record_results*
#> [305] print.xfun_rename_seq*
#> [306] print.xfun_strict_list*
#> [307] print.xgettext*
#> [308] print.xngettext*
#> [309] print.xtabs*
#> see '?methods' for accessing help and source code