diff options
Diffstat (limited to '')
-rwxr-xr-x | analyze_grades.r | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/analyze_grades.r b/analyze_grades.r new file mode 100755 index 0000000..6b64099 --- /dev/null +++ b/analyze_grades.r @@ -0,0 +1,71 @@ +#!/usr/bin/env Rscript +# Copyright (C) 2018 Ryan Kavanagh <rkavanagh@cs.cmu.edu> +# +# Processes a CSV file into a format that Canvas will accept. +# Also spits out statistics on the grades and pretty plots. + +library(matrixStats) +library(tidyr) +library(ggplot2) + +args = commandArgs(trailingOnly=TRUE) + +if(length(args)==0) { + stop("supply csv filename as cli arg") +} + +HW <- args[1] + +metadata <- c("Student", "ID","Section", "SIS Login ID") + +hwdata <- read.csv(HW, check.names=FALSE) +grades <- hwdata[, setdiff(names(hwdata), metadata)] +totals <- rowSums(grades) +hwdata["total"] <- totals +grades <- hwdata[, setdiff(names(hwdata), metadata)] + +gradesMatrix = as.matrix(grades) + +stats <- data.frame( "mean" = colMeans(gradesMatrix, na.rm=TRUE) + , "sd" = colSds(gradesMatrix, na.rm=TRUE) + , "median" = colMedians(gradesMatrix, na.rm=TRUE) + , "max" = colMaxs(gradesMatrix, na.rm=TRUE) + , "min" = colMins(gradesMatrix, na.rm=TRUE)) + +print("Statistics") +print(stats) + +print("Raw anonymized grades") +print(gradesMatrix[sample(nrow(gradesMatrix)),]) + +myrange <- function(x) max(x) - min(x) + + # Binwidth +scottbw <- function(x) 3.49 * sd(x) * length(x)^(1/3) +fdrbw <- function(x) 2 * IQR(x) / length(x)^(1/3) + +mybw <- function(x) { + if (myrange(x) <= 5) { + 1 + } else if (myrange(x) <= 10) { + 2 + } else { + myrange(x) / 6 + } +} + +pdf(paste(HW, "_plot.pdf", sep="")) +ggplot(gather(grades), aes(value)) + + geom_histogram(binwidth = function(x) mybw(x)) + + facet_wrap(~key, scales = 'free_x') + + xlim(c(-1,NA)) +dev.off() + +if(length(args) == 2) { + output <- hwdata[metadata] + output["SIS Login ID"] <- NULL + output[args[2]] <- totals + write.csv(output, file = paste(HW, "_totals.csv", sep=""), na="", row.names=FALSE) +} + +warnings() |