aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyan Kavanagh <rak@debian.org>2013-07-25 08:21:09 -0400
committerRyan Kavanagh <rak@debian.org>2013-07-25 08:45:10 -0400
commit13ed5e9a51a8720d40d8b8d55aeb0d14e04da2d9 (patch)
tree07fd4ab63572a1ebd358cc8943c562b8e98144d2
parentAdded my jackd connect_usb script for my USB soundcard (diff)
Added AWK script to sort and join copyright lines
Diffstat (limited to '')
-rwxr-xr-xbin/copyright.awk122
1 files changed, 122 insertions, 0 deletions
diff --git a/bin/copyright.awk b/bin/copyright.awk
new file mode 100755
index 0000000..371a9c2
--- /dev/null
+++ b/bin/copyright.awk
@@ -0,0 +1,122 @@
+#!/usr/bin/gawk -f
+# Copyright (C) 2013 Ryan Kavanagh <rak@debian.org>
+# Given a series of lines in the format
+# Copyright (c) NNNN, MMMM-MMMM, ..., NNNN John Smith <jsmith@example.org>
+# group years and emails by person.
+
+{
+ match($0, /.*Copyright.*[0-9][,]? +/);
+ DATE_LENGTH = RLENGTH;
+ match($0, /<.*>/);
+ EMAIL_START = RSTART;
+ if (RLENGTH != -1) {
+ NAME = substr($0, DATE_LENGTH + 1, EMAIL_START - DATE_LENGTH - 2);
+ EMAIL = substr($0, EMAIL_START);
+ } else {
+ # No email on this line
+ NAME = substr($0, DATE_LENGTH + 1);
+ }
+ match($0, /.*Copyright +\([cC]\) +/);
+ DATE_START = RLENGTH + 1;
+ YEARS = substr($0, DATE_START, DATE_LENGTH - DATE_START);
+ gsub(/, +/, " ", YEARS);
+ gsub(/,/, " ", YEARS);
+ people_years[NAME] = people_years[NAME] " " YEARS;
+ if (EMAIL_LENGTH != -1) {
+ email_pattern = "/.*" EMAIL ".*/";
+ if (!(NAME in people_emails)) {
+ people_emails[NAME] = EMAIL;
+ } else if (!match(people_emails[NAME], EMAIL)) {
+ people_emails[NAME] = people_emails[NAME] "," EMAIL;
+ }
+ }
+} END {
+ for (person in people_years) {
+ delete years_array;
+ split(people_years[person], years_array);
+ # Split any hyphenated years;
+ for (year in years_array) {
+ if (years_array[year] ~ /[0-9]+-[0-9]+/) {
+ delete split_year;
+ split(years_array[year], split_year, /-/);
+ years_array[year] = split_year[1];
+ if (split_year[1] != split_year[2]) {
+ # Make sure it isn't some crappy input like 2012-2012
+ for (j = 1; j <= split_year[2] - split_year[1]; j++) {
+ years_array[length(years_array) + 1] = \
+ years_array[year] + j;
+ }
+ }
+ }
+ }
+ # Sort the years
+ asort(years_array);
+ # Delete any duplicates:
+ for (i = 1; i <= length(years_array); i++) {
+ if (i > 1 && years_array[i-1] == years_array[i]) {
+ # Delete years_array[i-1] instead of years_array[i] so that we
+ # can still check the next year with ease
+ delete years_array[i-1];
+ }
+ }
+ # Final sort
+ asort(years_array);
+ # Remove duplicates and generate year string
+ year_string = "";
+ # Force AWK to access the years in order
+ added_hyphen = 0;
+ for (i = 1; i <= length(years_array); i++) {
+ if (i > 1) {
+ if (years_array[i - 1] != years_array[i]) {
+ # added_hyphen tracks if the last character in the string is
+ # a hyphen
+ if ((!added_hyphen) && (years_array[i - 1] == years_array[i] - 1)) {
+ # year_string isn't terminated by a hyphen, and the year
+ # at i-1 is one less than the current one
+ year_string = year_string "-";
+ added_hyphen = 1;
+ } else if (added_hyphen && (years_array[i - 1] != years_array[i] - 1)) {
+ # The string is terminated by a hyphen, but the current
+ # year does not immediately follow the preceeding
+ # one
+ year_string = year_string years_array[i-1] ", " years_array[i];
+ added_hyphen = 0;
+ } else if (!added_hyphen) {
+ year_string = year_string ", " years_array[i];
+ }
+ }
+ } else {
+ year_string = years_array[i];
+ }
+ }
+ # We've added a hyphen, but run out of years to check, terminate it
+ if (added_hyphen) {
+ year_string = year_string years_array[length(years_array)];
+ }
+ final_line[years_array[length(years_array)]][length(years_array)][person] = \
+ "Copyright (C) " year_string "\t" person " " people_emails[person];
+ }
+ # We can't sort the years indices with asorti because we want a numerical,
+ # not lexicographic sort of the indices.
+ j = 0;
+ delete years_sorted;
+ for (i in final_line) years_sorted[j++] = i+0;
+ n_years_entries = asort(years_sorted);
+ # And output the lines with the most recent contributor first
+ for (y = n_years_entries; y >= 1; y--) {
+ # Sort the contributors with most recent contribution in year
+ # by_year[y] by number of years contributed:
+ j = 0;
+ delete contributions_sorted;
+ for (i in final_line[years_sorted[y]]) contributions_sorted[j++] = i+0;
+ n_contrib_entries = asort(contributions_sorted);
+ for (c = n_contrib_entries; c >= 1; c--) {
+ # Finally, sort by contributor name
+ asorti(final_line[years_sorted[y]][contributions_sorted[c]], by_person);
+ # And output the lines in alphabetical order by person name
+ for (n = 1; n <= length(by_person); n++) {
+ print final_line[years_sorted[y]][contributions_sorted[c]][by_person[n]];
+ }
+ }
+ }
+}