Subset

Subset of a dataset (dataframe)

# read dataset (tabulator \t separated text file)

mydata = read.table('Downloads/data_2x20.csv', header=TRUE)

# get top 3 lines (all columns)

mydata[1:3, ]

height width

sample01 6.576 3.644

sample02 6.379 3.110

sample03 10.542 4.213

# get top 2 values of column 'height'

mydata[1:2,'height']

[1] 6.576 6.379

# get all values of column 'height'

mydata[ ,'height']

[1] 6.576 6.379 10.542 4.543 6.092 8.804 6.924 6.736 5.905 7.071

[11] 8.781 9.490 4.572 5.052 6.636 9.965 7.012 9.197 9.837 10.841

# logical condition: get all lines with height larger than 10

subset(mydata, mydata$height > 10)

height width

sample03 10.542 4.213

sample20 10.841 4.197