Useful commands in R

R is case sensitive
getwd() - Get current working directory

> getwd()

setwd() - Set current working directory

> setwd("C:/Vinay/Great Lakes/Great Lakes/Course/R/Working Directory")

Vector

Create a ordered vector of 100 elements from 1 till 100

> x <- 1: 100

Check the data type of the object

> class(5); # "numeric"
> class("Vinay"); # "character"

NOTE:

Vector is a 1-dimensional array
Matrix is a 2-dimensional array
Arrays in R can be N-dimensional
Array index starts from 1

Function c for combine

Create a non-ordered vector with function c

> x <- c(3, 2, 1, 4, 5)
> c(3, 2, 1 4, 5) -> x
> 1/x # 0.3334, 0.5, 1, .25, 0.2
> x[1] # 2
> c <- 2 * x #Vector arithmetic
> c # 6, 4, 2, 8, 10

Matrix

Function matrix

M <- matrix(1:9, 3, 3)
M # output below
M[1, 2] # 4
M[-1, -2] # output below. Matrix minus 1 row and 2 column
M <- matrix(c(1, 2, 3, 4), nrow = 2, ncol = 2)

Function rowSums, colSums

M <- matrix(c(1, 2, 3, 4), nrow = 2, ncol = 2)
M_rowSums <- rowSums(M); M_colSums <- colSums(M);
M; M_rowSums; M_colSums;

Adding a column using cbind i.e. column bind

M <- matrix(c(1, 2, 3, 4), nrow = 2, ncol = 2)
M; M <- cbind(M, c(5, 6)); M;

Adding a row using rbind i.e. row bind

M <- matrix(c(1, 2, 3, 4), nrow = 2, ncol = 2)
M; M <- rbind(M, c(5, 6)); M;

Matrix index

M <- matrix(c(1:25), nrow = 5, ncol = 5); M;
M[1, 2]; # 6 - element at 1st row and 2nd column
M[1:3, 2:4]; # matrix subsection from 1st to 3rd rows and 2nd to 4th columns
M[,]; # similar to M
M[, 1]; # 1 2 3 4 5 - all row elements from 1st column
M[1, ]; # 1 6 11 16 21 - all column elements from 1 row

Data Frames

Two dimensional objects
Can hold Numeric, Character or Logical values
Within a column the data type is same for all elements
Different columns can have different data types
NOTE: Number of elements in each column should be same
Function data.frame

type <- c("Compact", "MiniVan"); mileage <- c(50, 40); price <- c(1000.00, 5050.00); no.cyl <- c(2, 3);
cars <- data.frame(type, price, mileage, no.cyl)
cars;
cars[1, 2] #1000
cars[1, 1:3] # Compact 1000 50
cars[, 1] # Compact MiniVan
cars[1, ] # Compact 1000 50 2

Lists

Can hold variety of objects including matrices, vectors, data frames, lists, etc
NOTE: Number of elements in each column need not be the same
Function list

vector <- c(1:10) # or vector <- 1:10
Matrix <- matrix(1:10, 5, 2)
myList <- list(vector, Matrix); myList;
myList[[1]][2]; # select 2 element from vector in list index 1 - Output = 2
myList[[2]][4]; # select row 4 and column 1 element from matrix in list index 2 - Output = 4
myList[[2]][1, 2]; # select row 1 and column 2 element from matrix in list index 2 - Output = 6
myList[[2]][1:3, 1:2]; # select subsection of matrix starting at row 1 till 3 and column 1 till 2 from matrix in list index 2 - Output as below:

Packages

Function install.packages

Use install.packages("PackageName") to install required packages

Function library

After installing the package, we can call the package in R code by library(PackageName)

Exercises

Create a vector containing elements 1 to 10 and 21 to 25

vector <- c(1:10, 21:25)

Create a matrix using the vector created above

Matrix <- matrix(vector, 5, 5)

Extract a 3 by 3 subset from the matrix created above

Subset <- Matrix[1:3, 1:3]; Subset;

Data Import

Function read.table/read.csv/read.xls

Use read.table("C:/Users/xyz/Desktop/myText.txt", header = TRUE, sep = ",") to read data from text(.txt) file
Use read.csv("C:/Users/xyz/Desktop/myCSV.csv", header = TRUE) to read data from Comma-Separated(.csv) file
Use read.xls("C:/Users/xyz/Desktop/myExcel.xls", header = TRUE) to read data from Excel(.xls) file. NOTE: Requires package gdata

Importing data from other Statistical software - Needs package foreign

read.spss - SPSS
read.dta - STATA
read.ssd - SAS

Working with data

Consider the sample data below saved in myCSV.csv file:
mydata <- read.csv("C:/Users/xyz/Desktop/myCSV.csv", header = TRUE)
Accessing columns

To read Price column data use mydata$Price

Function head

head(mydata) #Gives by default, first 6 rows in the dataset
head(mydata, 10) # First 10 rows in the dataset

Function tail

tail(mydata) #Gives by default, last 6 rows in the dataset
tail(mydata, 10) # Last 10 rows in the dataset

Function dim

dim(mydata) #Summary of the dataset. Output is number of rows and columns

Function summary

summary(mydata) #Summary of the dataset. Output contains Min, 1st Quadrant, Median, Mean, 3rd Quadrant and Max values for each column

Function view

view(mydata) #Another option to see the dataset

Function table

table(mydata$Fireplace) #Gives the frequency of value occurrence in a selected column Fireplace

Syntax: table(DatasetVariable$Column)/table(Column) if attach(DatasetVariable)

Subsetting/Filtering data

Use logical operators: ==, >, <, <=, >=, <>
To select records with Fireplace set to 1, use mydata[mydata$Fireplace == 1, ]

Syntax: DatasetVariable[DatasetVariable$Column,] or DatasetVariable[Column,] if attach(DatasetName)

Function which

mydata[which(mydata$Price <= 75000), ] #Select from the dataset only records with Price <= 75,000
mydata[which(mydata$Price <= 75000 & mydata$Bedrooms == 3), ] #Select from the dataset only records with Price <= 75,000 and 3 Bedrooms
Syntax: DatasetVariable[which(DatasetVariable$Columns separated with & filter), ]/DatasetVariable[which(Columns separated with & filter), ] if attach(DatasetVariable)

Exercises

Display only Price, Bedrooms and Age from the dataset

mydata[c(1, 4, 6)]

Help in R

?cbind

Function help

help(cbind)

Square using ^

x <- 1:10
xSquare <- x ^ 2 # x raise to the power 2. Output 1 4 9 16 25 36 49 64 81 100

Log using function log

x <- 1:10
xLog <- log(x) # Output 0.0000000 0.6931472 1.0986123 1.3862944 1.6094379 1.7917595 1.9459101 2.0794415 2.1972246 2.3025851

Function plot

Suppose we wish to see relationship between two variables:

x <- 1:10; y <- 2 * x - 8;
plot(x, y)

Function barplot

barplot(table(mydata$Bedrooms))

Function hist

Used to plot the frequencies of data
hist(mydata$Bedrooms) # Similar to barplot plotted above
hist(mydata$Bedrooms, main = "Bedrooms histogram") # Histogram with title
hist(mydata$Bedrooms, main = "Bedrooms histogram", xlab = "Bedrooms", ylab = "Frequency") # Histogram with title and X-Y axis label
hist(mydata$Bedrooms, main = "Bedrooms histogram", xlab = "Bedrooms", ylab = "Frequency", col = "lightBlue") # Histogram with title, X-Y axis label and color

Function boxplot

In Descriptive statistics, a box plot is a convenient way of graphically depicting groups of numerical data through their quartiles
boxplot(mydata$Price) # Create a default vertical boxplot
boxplot(mydata$Price, horizontal = TRUE) # Create a horizontal boxplot
boxplot(mydata$Price, horizontal = TRUE, main = "Price boxplot", xlab = "Price", col = "lightBlue") # Horizontal boxplot with title, X axis label and color

Package ggplot2

ggplot(mydata, aes(x = Price)) + geom_histogram() # with default 30 bins
ggplot(mydata, aes(x = Price)) + geom_histogram(stat = 'bin', binwidth = 10) # with specified bin size

Paneling Graphics using function par

We can partition the graphics panel

Syntax: par(mfrow = c(nrow, ncol)) # nrow = number of rows, ncol = number of columns
par(mfrow = c(2, 2));
x <- 1:10; y <- 2 * x - 8; plot(x, y); boxplot(mydata$Price, horizontal = TRUE); barplot(table(mydata$Bedrooms)); hist(mydata$Bedrooms, main = "Bedrooms histogram", xlab = "Bedrooms", ylab = "Frequency", col = "lightBlue");

Heat colors

hist(mydata$Bedrooms, main = "Bedrooms histogram", xlab = "Bedrooms", ylab = "Frequency", col = heat.colors(14));

Summary Stats

Function mean

mean(mydata$Price)

Function sd

sd(mydata$Price)

Function var

var(mydata$Price)

Function objects() and ls()

Lists all the objects created in a R session
objects(); ls();

Function rm()

Remove objects or functions
rm(cars); objects();

REFERENCES

https://www.greatlearning.in/great-lakes-pgpba/

R

Search This Blog

Useful commands in R

Comments

Post a Comment