summaryrefslogblamecommitdiff
path: root/analyze_grades.r
blob: 6b6409928fb02540c5d50eb87fce0f58b681ff82 (plain) (tree)






































































                                                                                      
#!/usr/bin/env Rscript
# Copyright (C) 2018 Ryan Kavanagh <rkavanagh@cs.cmu.edu>
#
# Processes a CSV file into a format that Canvas will accept.
# Also spits out statistics on the grades and pretty plots.

library(matrixStats)
library(tidyr)
library(ggplot2)

args = commandArgs(trailingOnly=TRUE)

if(length(args)==0) {
    stop("supply csv filename as cli arg")
}

HW <- args[1]

metadata <- c("Student", "ID","Section", "SIS Login ID")

hwdata <- read.csv(HW, check.names=FALSE)
grades <- hwdata[, setdiff(names(hwdata), metadata)]
totals <- rowSums(grades)
hwdata["total"] <- totals
grades <- hwdata[, setdiff(names(hwdata), metadata)]

gradesMatrix = as.matrix(grades)

stats <- data.frame( "mean" = colMeans(gradesMatrix, na.rm=TRUE)
                  , "sd" = colSds(gradesMatrix, na.rm=TRUE)
                  , "median" = colMedians(gradesMatrix, na.rm=TRUE)
                  , "max" = colMaxs(gradesMatrix, na.rm=TRUE)
                  , "min" = colMins(gradesMatrix, na.rm=TRUE))

print("Statistics")
print(stats)

print("Raw anonymized grades")
print(gradesMatrix[sample(nrow(gradesMatrix)),])

myrange <- function(x) max(x) - min(x)

                                        # Binwidth
scottbw <- function(x) 3.49 * sd(x) * length(x)^(1/3)
fdrbw <- function(x) 2 * IQR(x) / length(x)^(1/3)

mybw <- function(x) {
    if (myrange(x) <= 5) {
        1
    } else if (myrange(x) <= 10) {
        2
    } else {
        myrange(x) / 6
    }
}

pdf(paste(HW, "_plot.pdf", sep=""))
ggplot(gather(grades), aes(value)) +
    geom_histogram(binwidth = function(x) mybw(x)) +
    facet_wrap(~key, scales = 'free_x') +
    xlim(c(-1,NA))
dev.off()

if(length(args) == 2) {
    output <- hwdata[metadata]
    output["SIS Login ID"] <- NULL
    output[args[2]] <- totals
    write.csv(output, file = paste(HW, "_totals.csv", sep=""), na="", row.names=FALSE)
}

warnings()