One thing that i’ve always wanted is have a function that would run the corrplot function of all the numerical variables.

Since i’m still new to R, this is all i could come up with. It definitely needs some more work, but this’ll have to do for now:

explData = function(data, shape = "square", sig = 0.05, insign = "pch") {
library(corrplot)
library(dplyr)
library(ggvis)
#***1. Removing character columns***
limit = ncol(data) #Assign number of columns
z = 0
for(i in ncol(data):1){
if(data[,i] == "character"){
data[,i] = NULL
z = z + 1
}
}
print(paste(z, "columns with class character were deleted.", sep = " "))
y = 0 #Counter to tally how many columns were removed
for(i in limit:1){
curr.column = data[,i]
if(sum(is.na(as.numeric(as.character(curr.column)))) == length(curr.column)){ #If the number of characters in the column are equal to the length of the column
data[,i] = NULL #Then delete column
y = y + 1 #Add one to counter
}
}
print(paste(y, "factor columns were deleted. Could not coerce into integers.", sep = " ")) #Notify how many columns were removed
a = 0
for(i in ncol(data):1){
if(sd(data[,i]) == 0 | is.na(sd(data[,i]))){
data[,i] = NULL
a = a + 1}
}
print(paste(a, "columns with standard deviation equaling zero, were deleted.", sep = " ")) #Notify how many columns were removed
#***_____________________________***
#***2. Creating a dataframe of all p.values from correlation of all variables***
corrs.pvalues = data.frame()
for(i in 1:ncol(data)){
for(j in 1:ncol(data)){
corrs.pvalues[i,j] = cor.test(data[,i], data[,j])$p.value
}
}
#***__________________________________________________________________________***
corrs = cor(data) #matrix of correlation coefficients
#***3. Plot correlation corrplot, crossing out statistically insignificant relationships
corrplot(corrs, p.mat = as.matrix(corrs.pvalues), sig.level = sig, method = shape, type = "lower", order = "FPC", addrect = 2, insig = insign)
}
#______________END!_____________________

### Like this:

Like Loading...

*Related*