- R is case sensitive
- getwd() - Get current working directory
- > getwd()
- setwd() - Set current working directory
- > setwd("C:/Vinay/Great Lakes/Great Lakes/Course/R/Working Directory")
- Vector
- Create a ordered vector of 100 elements from 1 till 100
- > x <- 1: 100
- Check the data type of the object
- > class(5); # "numeric"
- > class("Vinay"); # "character"
- NOTE:
- Vector is a 1-dimensional array
- Matrix is a 2-dimensional array
- Arrays in R can be N-dimensional
- Array index starts from 1
- Function c for combine
- Create a non-ordered vector with function c
- > x <- c(3, 2, 1, 4, 5)
- > c(3, 2, 1 4, 5) -> x
- > 1/x # 0.3334, 0.5, 1, .25, 0.2
- > x[1] # 2
- > c <- 2 * x #Vector arithmetic
- > c # 6, 4, 2, 8, 10
- Matrix
- Function matrix
- M <- matrix(1:9, 3, 3)
- M # output below
- M[1, 2] # 4
- M[-1, -2] # output below. Matrix minus 1 row and 2 column
- M <- matrix(c(1, 2, 3, 4), nrow = 2, ncol = 2)
- Function rowSums, colSums
- M <- matrix(c(1, 2, 3, 4), nrow = 2, ncol = 2)
- M_rowSums <- rowSums(M); M_colSums <- colSums(M);
- M; M_rowSums; M_colSums;
- Adding a column using cbind i.e. column bind
- M <- matrix(c(1, 2, 3, 4), nrow = 2, ncol = 2)
- M; M <- cbind(M, c(5, 6)); M;
- Adding a row using rbind i.e. row bind
- M <- matrix(c(1, 2, 3, 4), nrow = 2, ncol = 2)
- M; M <- rbind(M, c(5, 6)); M;
- Matrix index
- M <- matrix(c(1:25), nrow = 5, ncol = 5); M;
- M[1, 2]; # 6 - element at 1st row and 2nd column
- M[1:3, 2:4]; # matrix subsection from 1st to 3rd rows and 2nd to 4th columns
- M[,]; # similar to M
- M[, 1]; # 1 2 3 4 5 - all row elements from 1st column
- M[1, ]; # 1 6 11 16 21 - all column elements from 1 row
- Data Frames
- Two dimensional objects
- Can hold Numeric, Character or Logical values
- Within a column the data type is same for all elements
- Different columns can have different data types
- NOTE: Number of elements in each column should be same
- Function data.frame
- type <- c("Compact", "MiniVan"); mileage <- c(50, 40); price <- c(1000.00, 5050.00); no.cyl <- c(2, 3);
- cars <- data.frame(type, price, mileage, no.cyl)
- cars;
- cars[1, 2] #1000
- cars[1, 1:3] # Compact 1000 50
- cars[, 1] # Compact MiniVan
- cars[1, ] # Compact 1000 50 2
- Lists
- Can hold variety of objects including matrices, vectors, data frames, lists, etc
- NOTE: Number of elements in each column need not be the same
- Function list
- vector <- c(1:10) # or vector <- 1:10
- Matrix <- matrix(1:10, 5, 2)
- myList <- list(vector, Matrix); myList;
- myList[[1]][2]; # select 2 element from vector in list index 1 - Output = 2
- myList[[2]][4]; # select row 4 and column 1 element from matrix in list index 2 - Output = 4
- myList[[2]][1, 2]; # select row 1 and column 2 element from matrix in list index 2 - Output = 6
- myList[[2]][1:3, 1:2]; # select subsection of matrix starting at row 1 till 3 and column 1 till 2 from matrix in list index 2 - Output as below:
- Packages
- Function install.packages
- Use install.packages("PackageName") to install required packages
- Function library
- After installing the package, we can call the package in R code by library(PackageName)
- Exercises
- Create a vector containing elements 1 to 10 and 21 to 25
- vector <- c(1:10, 21:25)
- Create a matrix using the vector created above
- Matrix <- matrix(vector, 5, 5)
- Extract a 3 by 3 subset from the matrix created above
- Subset <- Matrix[1:3, 1:3]; Subset;
- Data Import
- Function read.table/read.csv/read.xls
- Use read.table("C:/Users/xyz/Desktop/myText.txt", header = TRUE, sep = ",") to read data from text(.txt) file
- Use read.csv("C:/Users/xyz/Desktop/myCSV.csv", header = TRUE) to read data from Comma-Separated(.csv) file
- Use read.xls("C:/Users/xyz/Desktop/myExcel.xls", header = TRUE) to read data from Excel(.xls) file. NOTE: Requires package gdata
- Importing data from other Statistical software - Needs package foreign
- read.spss - SPSS
- read.dta - STATA
- read.ssd - SAS
- Working with data
- Consider the sample data below saved in myCSV.csv file:
- mydata <- read.csv("C:/Users/xyz/Desktop/myCSV.csv", header = TRUE)
- Accessing columns
- To read Price column data use mydata$Price
- Function head
- head(mydata) #Gives by default, first 6 rows in the dataset
- head(mydata, 10) # First 10 rows in the dataset
- Function tail
- tail(mydata) #Gives by default, last 6 rows in the dataset
- tail(mydata, 10) # Last 10 rows in the dataset
- Function dim
- dim(mydata) #Summary of the dataset. Output is number of rows and columns
- Function summary
- summary(mydata) #Summary of the dataset. Output contains Min, 1st Quadrant, Median, Mean, 3rd Quadrant and Max values for each column
- Function view
- view(mydata) #Another option to see the dataset
- Function table
- table(mydata$Fireplace) #Gives the frequency of value occurrence in a selected column Fireplace
- Syntax: table(DatasetVariable$Column)/table(Column) if attach(DatasetVariable)
- Subsetting/Filtering data
- Use logical operators: ==, >, <, <=, >=, <>
- To select records with Fireplace set to 1, use mydata[mydata$Fireplace == 1, ]
- Syntax: DatasetVariable[DatasetVariable$Column,] or DatasetVariable[Column,] if attach(DatasetName)
- Function which
- mydata[which(mydata$Price <= 75000), ] #Select from the dataset only records with Price <= 75,000
- mydata[which(mydata$Price <= 75000 & mydata$Bedrooms == 3), ] #Select from the dataset only records with Price <= 75,000 and 3 Bedrooms
- Syntax: DatasetVariable[which(DatasetVariable$Columns separated with & filter), ]/DatasetVariable[which(Columns separated with & filter), ] if attach(DatasetVariable)
- Exercises
- Display only Price, Bedrooms and Age from the dataset
- mydata[c(1, 4, 6)]
- Help in R
- ?
- ?cbind
- Function help
- help(cbind)
- Square using ^
- x <- 1:10
- xSquare <- x ^ 2 # x raise to the power 2. Output 1 4 9 16 25 36 49 64 81 100
- Log using function log
- x <- 1:10
- xLog <- log(x) # Output 0.0000000 0.6931472 1.0986123 1.3862944 1.6094379 1.7917595 1.9459101 2.0794415 2.1972246 2.3025851
- Function plot
- Suppose we wish to see relationship between two variables:
- x <- 1:10; y <- 2 * x - 8;
- plot(x, y)
- Function barplot
- barplot(table(mydata$Bedrooms))
- Function hist
- Used to plot the frequencies of data
- hist(mydata$Bedrooms) # Similar to barplot plotted above
- hist(mydata$Bedrooms, main = "Bedrooms histogram") # Histogram with title
- hist(mydata$Bedrooms, main = "Bedrooms histogram", xlab = "Bedrooms", ylab = "Frequency") # Histogram with title and X-Y axis label
- hist(mydata$Bedrooms, main = "Bedrooms histogram", xlab = "Bedrooms", ylab = "Frequency", col = "lightBlue") # Histogram with title, X-Y axis label and color
- Function boxplot
- In Descriptive statistics, a box plot is a convenient way of graphically depicting groups of numerical data through their quartiles
- boxplot(mydata$Price) # Create a default vertical boxplot
- boxplot(mydata$Price, horizontal = TRUE) # Create a horizontal boxplot
- boxplot(mydata$Price, horizontal = TRUE, main = "Price boxplot", xlab = "Price", col = "lightBlue") # Horizontal boxplot with title, X axis label and color
- Package ggplot2
- ggplot(mydata, aes(x = Price)) + geom_histogram() # with default 30 bins
- ggplot(mydata, aes(x = Price)) + geom_histogram(stat = 'bin', binwidth = 10) # with specified bin size
- Paneling Graphics using function par
- We can partition the graphics panel
- Syntax: par(mfrow = c(nrow, ncol)) # nrow = number of rows, ncol = number of columns
- par(mfrow = c(2, 2));
- x <- 1:10; y <- 2 * x - 8; plot(x, y); boxplot(mydata$Price, horizontal = TRUE); barplot(table(mydata$Bedrooms)); hist(mydata$Bedrooms, main = "Bedrooms histogram", xlab = "Bedrooms", ylab = "Frequency", col = "lightBlue");
- Heat colors
- hist(mydata$Bedrooms, main = "Bedrooms histogram", xlab = "Bedrooms", ylab = "Frequency", col = heat.colors(14));
- Summary Stats
- Function mean
- mean(mydata$Price)
- Function sd
- sd(mydata$Price)
- Function var
- var(mydata$Price)
- Function objects() and ls()
- Lists all the objects created in a R session
- objects(); ls();
- Function rm()
- Remove objects or functions
- rm(cars); objects();
REFERENCES
Comments
Post a Comment