# is for comments
Input and display
#read files with labels in first row
read.table(filename,header=TRUE) #read a tab or space delimited file
read.table(filename,header=TRUE,sep=',') #read csv files
x <- a="" c="" create="" data="" elements="" nbsp="" p="" specified="" vector="" with="">y <- 1="" a="" c="" create="" data="" elements="" nbsp="" p="" to10="" vector="" with="">n <- 10="" p="">x1 <- a="" c="" create="" deviates="" item="" n="" nbsp="" normal="" of="" p="" random="" rnorm="" vector="">y1 <- a="" added="" c="" create="" distribution="" each="" has="" item="" n="" nbsp="" p="" random="" runif="" that="" to="" uniform="" vector="">z <- binomial="" create="" from="" n="" nbsp="" of="" p="" prob="" probability="" rbinom="" samples="" size="" the="" with="">vect <- and="" c="" combine="" into="" length="" nbsp="" of="" one="" p="" vector="" vectors="" x="" y="">mat <- 2="" a="" and="" cbind="" combine="" into="" matrix="" n="" nbsp="" p="" x="" y="">mat[4,2] #display the 4th row and the 2nd column
mat[3,] #display the 3rd row
mat[,2] #display the 2nd column
subset(dataset,logical) #those objects meeting a logical criterion
subset(data.df,select=variables,logical) #get those objects from a data frame that meet a criterion
data.df[data.df=logical] #yet another way to get a subset
x[order(x$B),] #sort a dataframe by the order of the elements in B
x[rev(order(x$B)),] #sort the dataframe in reverse order
browse.workspace #a Mac menu command that creates a window with information about all variables in the workspace
Moving around
ls() #list the variables in the workspace
rm(x) #remove x from the workspace
rm(list=ls()) #remove all the variables from the workspace
attach(mat) #make the names of the variables in the matrix or data frame available in the workspace
detach(mat) #releases the names (remember to do this each time you attach something)
with(mat, .... ) #a preferred alternative to attach ... detach
new <- column="" drop="" n="" nbsp="" nth="" old="" p="" the="">new <- drop="" n="" nbsp="" nth="" old="" p="" row="" the="">new <- and="" c="" column="" drop="" i="" ith="" j="" jth="" nbsp="" old="" p="" the="">new <- cases="" condition="" logical="" meet="" nbsp="" old="" p="" select="" subset="" that="" the="" those="">complete <- cases="" complete.cases="" data.df="" find="" missing="" nbsp="" no="" p="" subset="" those="" values="" with="">new <- n1:n2="" n1="" n2="" n3:n4="" n3="" n4="" nbsp="" of="" old="" p="" rows="" select="" the="" through="" variables="">
beta(a, b)
choose(n, k)
dnorm(x, mean=0, sd=1, log = FALSE) #normal distribution
pnorm(q, mean=0, sd=1, lower.tail = TRUE, log.p = FALSE)
qnorm(p, mean=0, sd=1, lower.tail = TRUE, log.p = FALSE)
rnorm(n, mean=0, sd=1)
dunif(x, min=0, max=1, log = FALSE) #uniform distribution
punif(q, min=0, max=1, lower.tail = TRUE, log.p = FALSE)
qunif(p, min=0, max=1, lower.tail = TRUE, log.p = FALSE)
runif(n, min=0, max=1)
Data manipulation
replace(x, list, values) #remember to assign this to some object i.e., x <- replace="" x="=-9,NA) </p"> #similar to the operation x[x==-9] <- na="" p="">scrub(x, where, min, max, isvalue,newvalue) #a convenient way to change particular values (in psych package)
cut(x, breaks, labels = NULL,
include.lowest = FALSE, right = TRUE, dig.lab = 3, ...)
x.df <- ...="" a="" combine="" data.frame="" data="" different="" frame="" into="" kinds="" nbsp="" of="" p="" x1="" x2="" x3=""> as.data.frame()
x <- as.matrix="" p="">scale() #converts a data frame to standardized scores
round(x,n) #rounds the values of x to n decimal places
ceiling(x) #vector x of smallest integers > x
floor(x) #vector x of largest interger < x
as.integer(x) #truncates real x to integers (compare to round(x,0)
as.integer(x < cutpoint) #vector x of 0 if less than cutpoint, 1 if greater than cutpoint)
factor(ifelse(a < cutpoint, "Neg", "Pos")) #is another way to dichotomize and to make a factor for analysis
transform(data.df,variable names = some operation) #can be part of a set up for a data set
x%in%y #tests each element of x for membership in y
y%in%x #tests each element of y for membership in x
all(x%in%y) #true if x is a proper subset of y
all(x) # for a vector of logical values, are they all true?
any(x) #for a vector of logical values, is at least one true?
Statistics and transformations
max(x, na.rm=TRUE) #Find the maximum value in the vector x, exclude missing values
min(x, na.rm=TRUE)
mean(x, na.rm=TRUE)
median(x, na.rm=TRUE)
sum(x, na.rm=TRUE)
var(x, na.rm=TRUE) #produces the variance covariance matrix
sd(x, na.rm=TRUE) #standard deviation
mad(x, na.rm=TRUE) #(median absolute deviation)
fivenum(x, na.rm=TRUE) #Tukey fivenumbers min, lowerhinge, median, upper hinge, max
table(x) #frequency counts of entries, ideally the entries are factors(although it works with integers or even reals)
scale(data,scale=FALSE) #centers around the mean but does not scale by the sd)
cumsum(x,na=rm=TRUE) #cumulative sum, etc.
rev(x) #reverse the order of values in x
cor(x,y,use="pair") #correlation matrix for pairwise complete data, use="complete" for complete cases
aov(x~y,data=datafile) #where x and y can be matrices
aov.ex1 = aov(DV~IV,data=data.ex1) #do the analysis of variance or
aov.ex2 = aov(DV~IV1*IV21,data=data.ex2) #do a two way analysis of variance
summary(aov.ex1) #show the summary table
print(model.tables(aov.ex1,"means"),digits=3) #report the means and the number of subjects/cell
boxplot(DV~IV,data=data.ex1) #graphical summary appears in graphics window
lm(x~y,data=dataset) #basic linear model where x and y can be matrices (see plot.lm for plotting options)
power.anova.test(groups = NULL, n = NULL, between.var = NULL,
within.var = NULL, sig.level = 0.05, power = NULL)
power.t.test(n = NULL, delta = NULL, sd = 1, sig.level = 0.05,
power = NULL, type = c("two.sample", "one.sample", "paired"),
alternative = c("two.sided", "one.sided"),strict = FALSE)
Regression, the linear model, factor analysis and principal components analysis (PCA)
t(X) #transpose of X
X %*% Y #matrix multiply X by Y
solve(A) #inverse of A
solve(A,B) #inverse of A * B (may be used for linear regression)
data frames are needed for regression
factanal() (see also fa in the psych package)
princomp() (see principal in the psych package)
Useful additional commands
colSums (x, na.rm = FALSE, dims = 1)
rowSums (x, na.rm = FALSE, dims = 1)
colMeans(x, na.rm = FALSE, dims = 1)
rowMeans(x, na.rm = FALSE, dims = 1)
rowsum(x, group, reorder = TRUE, ...) #finds row sums for each level of a grouping variable
apply(X, MARGIN, FUN, ...) #applies the function (FUN) to either rows (1) or columns (2) on object X
apply(x,1,min) #finds the minimum for each row
apply(x,2,max) #finds the maximum for each column
col.max(x) #another way to find which column has the maximum value for each row
z=apply(x,1,which.min) #tells the row with the minimum value for every column
par(mfrow=c(nrow,mcol)) #number of rows and columns to graph
par(ask=TRUE) #ask for user input before drawing a new graph
par(omi=c(0,0,1,0) ) #set the size of the outer margins
mtext("some global title",3,outer=TRUE,line=1,cex=1.5) #note that we seem to need to add the global title last
#cex = character expansion factor
boxplot(x,main="title") #boxplot (box and whiskers)
title( "some title") #add a title to the first graph
hist() #histogram
par(mfrow=c(1,1)) #change the graph window back to one figure
plot(PA,NAF,pch=symb[group],col=colors[group],bg=colors[condit],cex=1.5,main="Postive vs. Negative Affect by Film condition")
abline(a, b, untf = FALSE, ...)
abline(h=, untf = FALSE, ...)
abline(v=, untf = FALSE, ...)
abline(coef=, untf = FALSE, ...)
abline(reg=, untf = FALSE, ...)
identify(eatar,eanta,labels=labels(energysR[,1]) ) #dynamically puts names on the plots
pairs() #SPLOM (scatter plot Matrix)
pairs.panels () #SPLOM on lower off diagonal, histograms on diagonal, correlations on diagonal
#not standard R, but in the psych package
matplot ()
biplot ())
plot(table(x)) #plot the frequencies of levels in x
x= recordPlot() #save the current plot device output in the object x
replayPlot(x) #replot object x
dev.control #various control functions for printing/saving graphic files
pdf(height=6, width=6) #create a pdf file for output
dev.of() #close the pdf file created with pdf
layout(mat) #specify where multiple graphs go on the page
#experiment with the magic code from Paul Murrell to do fancy graphic location
layout(rbind(c(1, 1, 2, 2, 3, 3),
c(0, 4, 4, 5, 5, 0)))
for (i in 1:5) {
plot(i, type="n")
text(1, i, paste("Plot", i), cex=4)
To generate random samples from a variety of distributions
sample(x, size, replace = FALSE, prob = NULL) #samples with or without replacement
Working with Dates
date <-strptime a="" as.character="" change="" d="" date="" field="" for="" form="" internal="" m="" nbsp="" p="" the="" time="" to="" y=""> #see ?formats and ?POSIXlt
month= months(date) #see also weekdays, Julian
