# read .csv data file
myData = read.table('data_2x20.csv', header=TRUE)
# same, but defining explicitly the separator as tabulator: '\t'
myData = read.table('data_2x20.csv', header=TRUE, sep="\t", row.names=1 )
# download and read .csv data from a web address
myData = read.table( url('http://www.nlpca.org/data_2x20.csv') , header=TRUE)
Dataframe is a special type of a list, with class 'data.frame'.
typeof(myData)
"list"
class(myData)
"data.frame"
# show top first lines
head(myData)
height width
sample01 6.576 3.644
sample02 6.379 3.110
sample03 10.542 4.213
sample04 4.543 2.954
sample05 6.092 3.248
sample06 8.804 3.907
# show last bottom lines
tail(myData)
height width
sample15 6.636 3.293
sample16 9.965 4.176
sample17 7.012 2.990
sample18 9.197 3.977
sample19 9.837 3.872
sample20 10.841 4.197
# get columns headers
colnames(myData)
"height" "width"
# get row headers
rownames(myData)
"sample01" "sample02" "sample03" "sample04" "sample05" "sample06"
# size of data table
dim(myData)
20 2
# show format and structure
str(myData)
'data.frame': 20 obs. of 2 variables:
$ height: num 6.58 6.38 10.54 4.54 6.09 ...
$ width : num 3.64 3.11 4.21 2.95 3.25 ...
# get sum of each column or of each row
colSums(myData, na.rm = TRUE)
height width
150.955 70.257
rowSums(myData, na.rm = TRUE)
sample01 sample02 sample03 sample04
10.220 9.489 14.755 7.497
# get min and max value of each column
apply(myData, 2, min, na.rm = TRUE)
height width
4.543 2.782
apply(myData, 2, max, na.rm = TRUE)
height width
10.841 4.213
# get min value of each row
apply(myData, 1, min, na.rm = TRUE)
sample01 sample02 sample03 sample04 sample05
3.644 3.110 4.213 2.954 3.248
# add product of columns "height" and "width"
myData$prod = myData$height * myData$width
# add group labels ( manual, or using rep() to replicate/repeat values )
myData$group = c("A","A","A","A","A","B","B","B","B","B","C","C","C","C","C","D","D","D","D","D")
myData$group = c(rep("A", 5),rep("B", 5),rep("C", 5),rep("D", 5))
# add new column "subject", initialize by having all values "NA"
myData$subject = NA
myData
height width prod group subject
sample01 6.576 3.644 23.96294 A NA
sample02 6.379 3.110 19.83869 A NA
sample03 10.542 4.213 44.41345 A NA
sample04 4.543 2.954 13.42002 A NA
sample05 6.092 3.248 19.78682 A NA
sample06 8.804 3.907 34.39723 B NA
sample07 6.924 3.543 24.53173 B NA
°°°
# remove column "prod" from dataframe "myData"
mydata$prod = NULL
height width group subject
sample01 6.576 3.644 A NA
sample02 6.379 3.110 A NA
sample03 10.542 4.213 A NA
°°°
# replace all NA with zeros
myData[ is.na(myData) ] = 0
height width group subject
sample01 6.576 3.644 A 0
sample02 6.379 3.110 A 0
sample03 10.542 4.213 A 0
°°°
# replace all zeros with NA
myData[myData == 0] = NA
# get index position of column "height"
which(names(myData)=="width")
2
# get all columns from 1 ("height") to 2 ("width")
datasubset = myData[,1:2]
height width
sample01 6.576 3.644
sample02 6.379 3.110
sample03 10.542 4.213
# get selected columns: 3 ("group"), 1 ("height") and 2 ("width")
datasubset = myData[,c(3,1,2)]
group height width
sample01 A 6.576 3.644
sample02 A 6.379 3.110
sample03 A 10.542 4.213
# read demo .csv data file into a data.frame
myData = read.table( url('http://www.nlpca.org/data_2x20.csv') , header=TRUE)
# converting data.frame to matrix, using as.matrix()
myMatrix = as.matrix(myData)
# check type and class of variable "myData"
class(myData)
"data.frame"
typeof(myData)
"list"
# check type and class of converted variable "myMatrix"
class(myMatrix)
"matrix"
typeof(myMatrix)
"double"
!!! Take care that your data.frame contains only numbers. If a data.frame contains any text-based column such as the "group" column, the converted matrix will be completely a text matrix, including all numbers as text-elements (type: "character" instead of "double").
For text-number mixed data.frames,
use matrix() instead of as.matrix() to keep number-columns numerical, and text-columns as text-character strings
myMatrix = matrix(myData)
use data.matrix() instead of as.matrix() to get a pure numerical matrix by converting all text-elements to NA
myMatrix = data.matrix(myData)