# using read.table()
<- read.table(
dat file = "accounts.txt",
header = TRUE)
is.factor(dat$account)
#> [1] FALSE
is.factor(dat$bank)
#> [1] FALSE
Import and Export
Reading Assignments
Make sure to read the following sections in the textbook: R Coding Basics, https://www.gastonsanchez.com/R-coding-basics/
Importing Data Tables (Example)
Reading space-sepearted files
Please download the accounts.txt file.
Example 1. Importing table in blank separated file
Example 2. Limit the number of rows to read in (first 2 rows):
<- read.table(
dat file = "accounts.txt",
header = TRUE,
nrows = 2)
Example 3. Skip the first row (no header) and limit the number of rows to read in (4 rows)
<- read.table(
dat file = "accounts.txt",
header = FALSE,
skip = 1,
nrows = 4)
Example 4. Skip importing the second and third columns
<- read.table(
dat file = "accounts.txt",
header = TRUE,
colClasses = c(
"character",
"NULL",
"NULL",
"numeric"))
Example 5. Use stringsAsFactors = TRUE
- In versions of R
< 4.0.0
,read.table()
and friends convert character strings into factors by default. - In new versions of R, you need to specify
stringsAsFactors = TRUE
to convert character strings into factor, which should be convenient when you work on categorical data analysis. - Using
StringsAsFactors = TRUE
oras.is = FALSE
are equivalent.
# Specify stringsAsFactors = TRUE and check the type
<- read.table(
dat file = "accounts.txt",
header = TRUE,
stringsAsFactors = TRUE)
is.factor(dat$account)
#> [1] TRUE
is.factor(dat$bank)
#> [1] TRUE
# Equivalently, using as.is = FALSE
<- read.table(
dat file = "accounts.txt",
header = TRUE,
as.is = FALSE)
is.factor(dat$account)
#> [1] TRUE
is.factor(dat$bank)
#> [1] TRUE
Importing Data Tables (Exercise)
Exercise 1: Download any .txt
data from the weblink and try the read.table()
function and the arguments introduced above.
Exercise 2: Download PlantGrowth.txt
data from the weblink and try the read.table()
function and the quote
arguments.
<- read.table(
dat file = "PlantGrowth.txt",
header = TRUE,
quote = "\"'")
head(dat)
#> weight group
#> 1 4.17 ctrl
#> 2 5.58 ctrl
#> 3 5.18 ctrl
#> 4 6.11 ctrl
#> 5 4.50 ctrl
#> 6 4.61 ctrl
<- read.table(
dat file = "PlantGrowth.txt",
header = TRUE,
quote = "")
head(dat)
#> X.weight. X.group.
#> 1 4.17 "ctrl"
#> 2 5.58 "ctrl"
#> 3 5.18 "ctrl"
#> 4 6.11 "ctrl"
#> 5 4.50 "ctrl"
#> 6 4.61 "ctrl"
Exercise 3: Download PlantGrowth.txt
data from the weblink and try the read.table()
function and the quote
arguments.
<- read.table(
dat file = "PlantGrowth.txt",
header = TRUE,
quote = "\"'")
head(dat)
#> weight group
#> 1 4.17 ctrl
#> 2 5.58 ctrl
#> 3 5.18 ctrl
#> 4 6.11 ctrl
#> 5 4.50 ctrl
#> 6 4.61 ctrl
<- read.table(
dat file = "PlantGrowth.txt",
header = TRUE,
quote = "")
head(dat)
#> X.weight. X.group.
#> 1 4.17 "ctrl"
#> 2 5.58 "ctrl"
#> 3 5.18 "ctrl"
#> 4 6.11 "ctrl"
#> 5 4.50 "ctrl"
#> 6 4.61 "ctrl"
Exercise 4: Download airquality.txt
data from the weblink and try the read.table()
function and the na.strings
arguments.
### airquality.txt
<- read.table(
dat file = "airquality.txt",
header = TRUE,
na.strings = "NA")
head(dat)
#> Ozone Solar.R Wind Temp Month Day
#> 1 41 190 7.4 67 5 1
#> 2 36 118 8.0 72 5 2
#> 3 12 149 12.6 74 5 3
#> 4 18 313 11.5 62 5 4
#> 5 NA NA 14.3 56 5 5
#> 6 28 NA 14.9 66 5 6
sum(is.na(dat))
#> [1] 44
<- read.table(
dat file = "airquality.txt",
header = TRUE,
na.strings = "")
head(dat)
#> Ozone Solar.R Wind Temp Month Day
#> 1 41 190 7.4 67 5 1
#> 2 36 118 8.0 72 5 2
#> 3 12 149 12.6 74 5 3
#> 4 18 313 11.5 62 5 4
#> 5 NA NA 14.3 56 5 5
#> 6 28 NA 14.9 66 5 6
sum(is.na(dat))
#> [1] 0
Exercise 5: Download missing_values_dataset.txt data, in which NA
, empty strings "", NULL
, NaN
, or special values like -99
, or 9999
are all missing values. Use na.strings
arguments to identify all of these missing values.
### missing_values_dataset.txt
<- read.table(
dat file = "missing_values_dataset.txt",
header = TRUE,
na.strings = c(NA, " ", "NULL", "NaN", "-99", "9999"))
dat#> Name Age Height Score
#> 1 Alice 25 165 80
#> 2 <NA> 30 175 NA
#> 3 Charlie NA NA 75
#> 4 David 22 NA NA
#> 5 <NA> NA 180 60
sum(is.na(dat))
#> [1] 8
Importing Data Tables – Reading comma-seperated files
read.csv()
vs read.csv2()
- Use read.csv() when the file uses a comma
,
as the field separator and a period.
for decimals. That is, import comma separated values (US and many other countries). - Use read.csv2() when the file uses a semicolon
;
as the field separator and a comma,
for decimals. That is, import semicolon separated values (Europe).
This distinction is important, especially when working with datasets formatted according to different regional standards. Similarly for read.delim()
and read.delim2()
.
Please download the accounts.csv file.
Example 1. We can use read.table()
. Or more conveniently, we can use read.csv()
.
# using read.table()
<- read.table(
dat file = "accounts.csv",
header = TRUE,
sep = ",")
# using read.csv()
<- read.csv(file = "accounts.csv") dat
Exercise 1: Download any .csv
data from the weblink and try the read.csv()
function and the arguments introduced above.
Exporting Tables
write.table()
, write.csv()
Example:
# blank separated (default)
write.table(mtcars, file = 'mtcars.txt', row.names = FALSE)
# tab-separated value
write.table(mtcars, file = 'mtcars.tsv', sep = "\t", row.names = FALSE)
# comma-separated value
write.csv(mtcars, file = 'mtcars.csv', row.names = FALSE)
Exercise 1: What if row.names = TRUE
, which is the default option for write.table()
and write.csv()
?
Exercise 2: Load any dataset from a R package. Write a csv file for the dataset.
Exporting Text (non-tabular data)
Example:
# create a connection to a file
# (assuming output file in working directory)
<- file("mytext.txt")
txt
# write contents to the file
writeLines(text = some_text, con = txt) # con stands for connection
# Equivalently, write contents to the file
writeLines(text = some_text, con = "mytext.txt")
Note: Calling file() just creates the connection object but it does not open it. The function writeLines() is the one that opens the connection, writes the content to the file mytext.txt, and then closes the connection on exiting.
Sending output with cat()
Recall cat()
and sprintf()
functions.
# summary statistics of mpg
min(mtcars$mpg)
max(mtcars$mpg)
median(mtcars$mpg)
mean(mtcars$mpg)
sd(mtcars$mpg)
# summary statistics of mpg
<- min(mtcars$mpg)
mpg_min <- max(mtcars$mpg)
mpg_max <- median(mtcars$mpg)
mpg_med <- mean(mtcars$mpg)
mpg_avg <- sd(mtcars$mpg)
mpg_sd
# name of output file
<- "mpg-statistics.txt"
outfile
# first line of the file
cat("Miles per Gallon Statistics\n\n", file = outfile)
# subsequent lines appended to the output file
cat("Minimum:", mpg_min, "\n", file = outfile, append = TRUE)
cat("Maximum:", mpg_max, "\n", file = outfile, append = TRUE)
cat("Median :", mpg_med, "\n", file = outfile, append = TRUE)
cat("Mean :", mpg_avg, "\n", file = outfile, append = TRUE)
cat("Std Dev:", mpg_sd, "\n", file = outfile, append = TRUE)
# name of output file
<- "mpg-statistics 2.txt"
outfile
cat("Miles per Gallon Statistics\n\n", file = outfile)
cat(sprintf("Minimum: %0.2f", mpg_min), "\n", file = outfile, append = TRUE)
cat(sprintf("Maximum: %0.2f", mpg_max), "\n", file = outfile, append = TRUE)
cat(sprintf("Median : %0.2f", mpg_med), "\n", file = outfile, append = TRUE)
cat(sprintf("Mean : %0.2f", mpg_avg), "\n", file = outfile, append = TRUE)
cat(sprintf("Std Dev: %0.2f", mpg_sd), "\n", file = outfile, append = TRUE)
Redirecting output with sink()
sink()
function can export R output as it is displayed in R’s console.
Example:
Try the code below and get the output in R’s console.
summary(mtcars[ ,c('mpg', 'hp', 'cyl')])
To be able to keep the same output displayed by R, you must use sink()
. This function will divert R output to the specified file.
# sink output
sink(file = "mtcars-stats.txt")
# summary statistics
summary(mtcars[ ,c('mpg', 'hp', 'cyl')])
# stops diverting output
sink()
Do not forget to use sink()
again, without specifying any arguments, to stop the sinking process.
Other Exporting in R
- Exporting and importing R data –
save()
andload()
= mtcars[ ,c('mpg', 'hp', 'cyl')]
mtcars3 save(mtcars3, file = 'mtcars3.RData')
load('mtcars3.RData')
- Exporting Images – will be discussed in Base R Graphics