1 files changed, 122 insertions, 0 deletions
diff --git a/bin/copyright.awk b/bin/copyright.awk
new file mode 100755
index 0000000..371a9c2
--- /dev/null
+++ b/bin/copyright.awk
@@ -0,0 +1,122 @@
+#!/usr/bin/gawk -f
+# Copyright (C) 2013 Ryan Kavanagh <rak@debian.org>
+# Given a series of lines in the format
+#  Copyright (c) NNNN, MMMM-MMMM, ..., NNNN John Smith <jsmith@example.org>
+# group years and emails by person.
+
+{
+    match($0, /.*Copyright.*[0-9][,]? +/);
+    DATE_LENGTH = RLENGTH;
+    match($0, /<.*>/);
+    EMAIL_START = RSTART;
+    if (RLENGTH != -1) {
+        NAME = substr($0, DATE_LENGTH + 1, EMAIL_START - DATE_LENGTH - 2);
+        EMAIL = substr($0, EMAIL_START);
+    } else {
+        # No email on this line
+        NAME = substr($0, DATE_LENGTH + 1);
+    }
+    match($0, /.*Copyright +\([cC]\) +/);
+    DATE_START = RLENGTH + 1;
+    YEARS = substr($0, DATE_START, DATE_LENGTH - DATE_START);
+    gsub(/, +/, " ", YEARS);
+    gsub(/,/, " ", YEARS);
+    people_years[NAME] = people_years[NAME] " " YEARS;
+    if (EMAIL_LENGTH != -1) {
+        email_pattern = "/.*" EMAIL ".*/";
+        if (!(NAME in people_emails)) {
+            people_emails[NAME] = EMAIL;
+        } else if (!match(people_emails[NAME], EMAIL)) {
+            people_emails[NAME] = people_emails[NAME] "," EMAIL;
+        }
+    }
+} END {
+    for (person in people_years) {
+        delete years_array;
+        split(people_years[person], years_array); 
+        # Split any hyphenated years;
+        for (year in years_array) {
+            if (years_array[year] ~ /[0-9]+-[0-9]+/) {
+                delete split_year;
+                split(years_array[year], split_year, /-/);
+                years_array[year] = split_year[1];
+                if (split_year[1] != split_year[2]) {
+                    # Make sure it isn't some crappy input like 2012-2012
+                    for (j = 1; j <= split_year[2] - split_year[1]; j++) {
+                        years_array[length(years_array) + 1] = \
+                                   years_array[year] + j;
+                    }
+                }
+            }
+        }
+        # Sort the years
+        asort(years_array);
+        # Delete any duplicates:
+        for (i = 1; i <= length(years_array); i++) {
+            if (i > 1 && years_array[i-1] == years_array[i]) {
+                # Delete years_array[i-1] instead of years_array[i] so that we
+                # can still check the next year with ease
+                delete years_array[i-1];
+            }
+        }
+        # Final sort
+        asort(years_array);
+        # Remove duplicates and generate year string
+        year_string = "";
+        # Force AWK to access the years in order
+        added_hyphen = 0;
+        for (i = 1; i <= length(years_array); i++) {
+            if (i > 1) {
+                if (years_array[i - 1] != years_array[i]) {
+                    # added_hyphen tracks if the last character in the string is
+                    # a hyphen
+                    if ((!added_hyphen) && (years_array[i - 1] == years_array[i] - 1)) {
+                        # year_string isn't terminated by a hyphen, and the year
+                        # at i-1 is one less than the current one
+                        year_string = year_string "-";
+                        added_hyphen = 1;
+                    } else if (added_hyphen && (years_array[i - 1] != years_array[i] - 1)) {
+                        # The string is terminated by a hyphen, but the current
+                        # year does not immediately follow the preceeding
+                        # one
+                        year_string = year_string years_array[i-1] ", " years_array[i];
+                        added_hyphen = 0;
+                    } else if (!added_hyphen) {
+                        year_string = year_string ", " years_array[i];
+                    }
+                }
+            } else {
+                year_string = years_array[i];
+            }
+        }
+        # We've added a hyphen, but run out of years to check, terminate it
+        if (added_hyphen) {
+            year_string = year_string years_array[length(years_array)];
+        }
+        final_line[years_array[length(years_array)]][length(years_array)][person] = \
+            "Copyright (C) " year_string "\t" person " " people_emails[person];
+    }
+    # We can't sort the years indices with asorti because we want a numerical,
+    # not lexicographic sort of the indices.
+    j = 0;
+    delete years_sorted;
+    for (i in final_line) years_sorted[j++] = i+0;
+    n_years_entries = asort(years_sorted);
+    # And output the lines with the most recent contributor first
+    for (y = n_years_entries; y >= 1; y--) {
+        # Sort the contributors with most recent contribution in year
+        # by_year[y] by number of years contributed:
+        j = 0;
+        delete contributions_sorted;
+        for (i in final_line[years_sorted[y]]) contributions_sorted[j++] = i+0;
+        n_contrib_entries = asort(contributions_sorted);
+        for (c = n_contrib_entries; c >= 1; c--) {
+            # Finally, sort by contributor name
+            asorti(final_line[years_sorted[y]][contributions_sorted[c]], by_person);
+            # And output the lines in alphabetical order by person name
+            for (n = 1; n <= length(by_person); n++) {
+                print final_line[years_sorted[y]][contributions_sorted[c]][by_person[n]];
+            }
+        }
+    }
+}