#!/usr/bin/env Rscript
# Copyright (C) 2018 Ryan Kavanagh <rkavanagh@cs.cmu.edu>
#
# Processes a CSV file into a format that Canvas will accept.
# Also spits out statistics on the grades and pretty plots.
library(matrixStats)
library(tidyr)
library(ggplot2)
args = commandArgs(trailingOnly=TRUE)
if(length(args)==0) {
stop("supply csv filename as cli arg")
}
HW <- args[1]
metadata <- c("Student", "ID","Section", "SIS Login ID")
hwdata <- read.csv(HW, check.names=FALSE)
grades <- hwdata[, setdiff(names(hwdata), metadata)]
totals <- rowSums(grades)
hwdata["total"] <- totals
grades <- hwdata[, setdiff(names(hwdata), metadata)]
gradesMatrix = as.matrix(grades)
stats <- data.frame( "mean" = colMeans(gradesMatrix, na.rm=TRUE)
, "sd" = colSds(gradesMatrix, na.rm=TRUE)
, "median" = colMedians(gradesMatrix, na.rm=TRUE)
, "max" = colMaxs(gradesMatrix, na.rm=TRUE)
, "min" = colMins(gradesMatrix, na.rm=TRUE))
print("Statistics")
print(stats)
print("Raw anonymized grades")
print(gradesMatrix[sample(nrow(gradesMatrix)),])
myrange <- function(x) max(x) - min(x)
# Binwidth
scottbw <- function(x) 3.49 * sd(x) * length(x)^(1/3)
fdrbw <- function(x) 2 * IQR(x) / length(x)^(1/3)
mybw <- function(x) {
if (myrange(x) <= 5) {
1
} else if (myrange(x) <= 10) {
2
} else {
myrange(x) / 6
}
}
pdf(paste(HW, "_plot.pdf", sep=""))
ggplot(gather(grades), aes(value)) +
geom_histogram(binwidth = function(x) mybw(x)) +
facet_wrap(~key, scales = 'free_x') +
xlim(c(-1,NA))
dev.off()
if(length(args) == 2) {
output <- hwdata[metadata]
output["SIS Login ID"] <- NULL
output[args[2]] <- totals
write.csv(output, file = paste(HW, "_totals.csv", sep=""), na="", row.names=FALSE)
}
warnings()