## CIwR_rf.R
## (c)2007 by Jim Porzak, Loyalty Matrix, Inc. 
## Licensed for training purposes only.

require(randomForest)
setwd("c:/Projects/CIwR/R")
dir("Data")

Members <- read.delim("Data/MemberTrainingSet.txt", row.names = "MembID")
str(Members)
summary(Members)
Members <- subset(Members, select = -c(FirstCkInDay, LastCkInDay))
Members$DaysSinceLastUse[is.na(Members$DaysSinceLastUse)] <- 999
Members$DaysSinceLastExtra[is.na(Members$DaysSinceLastExtra)] <- 999



Members <- rfImpute(Status ~ ., data = Members)
summary(Members)
save(Members, file = "MemberTrainingSetImputed.rda")

#load("MemberTrainingSetImputed.rda")        ## Restart here
#Members.rf <- randomForest(Status ~ ., data = Members, importance = TRUE, proximity = TRUE)
Members.rf <- randomForest(Members[-1], Members$Status, data = Members, mtry = 3, ntree = 500, importance = TRUE, proximity = TRUE)

#Members.rf <- randomForest(Members[-1], Members$Status, data = Members, mtry = 4, importance = TRUE, proximity = TRUE)
Members.rf <- randomForest(Members[-1], Members$Status, data = Members, mtry = 3, ntree = 500, importance = TRUE, proximity = TRUE)

Members.rf
save(Members.rf, file = "MembersTraining.RF")
#load("MembersTraining.RF")
plot(Members.rf, lty = 1)
varImpPlot(Members.rf)

MDSplot(Members.rf, Members$Status)
MDSplot(Members.rf, Members$Status, k =3)

plot(margin(Members.rf, Members$Status))
abline(h = 0)

## Prediction on training set should be ~ perfect
Members.pred <- predict(Members.rf, Members[-1])
table(Members[[1]], Members.pred)

## Predictions on test set should be ~ OOB errrors
MembersTest <- read.delim("Data/MemberTestSet.txt", row.names = "MembID")
str(MembersTest)
summary(MembersTest)
MembersTest <- subset(MembersTest, select = -c(FirstCkInDay, LastCkInDay))
MembersTest$DaysSinceLastUse[is.na(MembersTest$DaysSinceLastUse)] <- 999
MembersTest$DaysSinceLastExtra[is.na(MembersTest$DaysSinceLastExtra)] <- 999
MembersTest <- rfImpute(Status ~ ., data = MembersTest)
save(MembersTest, file = "MemberTestSetImputed.rda")
MembersTest.pred <- predict(Members.rf, MembersTest[-1])
ct <- table(MembersTest[[1]], MembersTest.pred)
cbind(ct, class.error = c(ct[1,2]/sum(ct[1,]), ct[2,1]/sum(ct[2,])))

## Look at partial dependence plots
partialPlot(Members.rf, Members[-1], MembDays)
abline(h=0, col = "blue")

partialPlot(Members.rf, Members[-1], TotalPaid)
abline(h=0, col = "blue")

partialPlot(Members.rf, Members[-1], DaysSinceLastUse)
abline(h=0, col = "blue")

partialPlot(Members.rf, Members[-1], TotalUses)
abline(h=0, col = "blue")

partialPlot(Members.rf, Members[-1], MonthlyAmt)
abline(h=0, col = "blue")

partialPlot(Members.rf, Members[-1], NumUses1st30d)
abline(h=0, col = "blue")

partialPlot(Members.rf, Members[-1], NumUsesLast30d)
abline(h=0, col = "blue")

partialPlot(Members.rf, Members[-1], Age)
abline(h=0, col = "blue")

partialPlot(Members.rf, Members[-1], Gender)

# Scoring
AtRiskScore <- floor(9.99999 * Members.rf$votes[, 1]) + 1
barplot(table(AtRiskScore), col = "yellow",
        ylab = "# Members", main = "Distribution of At-Risk Scores")

#Members.p <- classCenter(Members[-1], Members[1], Members.rf$prox)
summary(Members$Status)
