From 13ed5e9a51a8720d40d8b8d55aeb0d14e04da2d9 Mon Sep 17 00:00:00 2001 From: Ryan Kavanagh Date: Thu, 25 Jul 2013 08:21:09 -0400 Subject: Added AWK script to sort and join copyright lines --- bin/copyright.awk | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100755 bin/copyright.awk (limited to 'bin/copyright.awk') diff --git a/bin/copyright.awk b/bin/copyright.awk new file mode 100755 index 0000000..371a9c2 --- /dev/null +++ b/bin/copyright.awk @@ -0,0 +1,122 @@ +#!/usr/bin/gawk -f +# Copyright (C) 2013 Ryan Kavanagh +# Given a series of lines in the format +# Copyright (c) NNNN, MMMM-MMMM, ..., NNNN John Smith +# group years and emails by person. + +{ + match($0, /.*Copyright.*[0-9][,]? +/); + DATE_LENGTH = RLENGTH; + match($0, /<.*>/); + EMAIL_START = RSTART; + if (RLENGTH != -1) { + NAME = substr($0, DATE_LENGTH + 1, EMAIL_START - DATE_LENGTH - 2); + EMAIL = substr($0, EMAIL_START); + } else { + # No email on this line + NAME = substr($0, DATE_LENGTH + 1); + } + match($0, /.*Copyright +\([cC]\) +/); + DATE_START = RLENGTH + 1; + YEARS = substr($0, DATE_START, DATE_LENGTH - DATE_START); + gsub(/, +/, " ", YEARS); + gsub(/,/, " ", YEARS); + people_years[NAME] = people_years[NAME] " " YEARS; + if (EMAIL_LENGTH != -1) { + email_pattern = "/.*" EMAIL ".*/"; + if (!(NAME in people_emails)) { + people_emails[NAME] = EMAIL; + } else if (!match(people_emails[NAME], EMAIL)) { + people_emails[NAME] = people_emails[NAME] "," EMAIL; + } + } +} END { + for (person in people_years) { + delete years_array; + split(people_years[person], years_array); + # Split any hyphenated years; + for (year in years_array) { + if (years_array[year] ~ /[0-9]+-[0-9]+/) { + delete split_year; + split(years_array[year], split_year, /-/); + years_array[year] = split_year[1]; + if (split_year[1] != split_year[2]) { + # Make sure it isn't some crappy input like 2012-2012 + for (j = 1; j <= split_year[2] - split_year[1]; j++) { + years_array[length(years_array) + 1] = \ + years_array[year] + j; + } + } + } + } + # Sort the years + asort(years_array); + # Delete any duplicates: + for (i = 1; i <= length(years_array); i++) { + if (i > 1 && years_array[i-1] == years_array[i]) { + # Delete years_array[i-1] instead of years_array[i] so that we + # can still check the next year with ease + delete years_array[i-1]; + } + } + # Final sort + asort(years_array); + # Remove duplicates and generate year string + year_string = ""; + # Force AWK to access the years in order + added_hyphen = 0; + for (i = 1; i <= length(years_array); i++) { + if (i > 1) { + if (years_array[i - 1] != years_array[i]) { + # added_hyphen tracks if the last character in the string is + # a hyphen + if ((!added_hyphen) && (years_array[i - 1] == years_array[i] - 1)) { + # year_string isn't terminated by a hyphen, and the year + # at i-1 is one less than the current one + year_string = year_string "-"; + added_hyphen = 1; + } else if (added_hyphen && (years_array[i - 1] != years_array[i] - 1)) { + # The string is terminated by a hyphen, but the current + # year does not immediately follow the preceeding + # one + year_string = year_string years_array[i-1] ", " years_array[i]; + added_hyphen = 0; + } else if (!added_hyphen) { + year_string = year_string ", " years_array[i]; + } + } + } else { + year_string = years_array[i]; + } + } + # We've added a hyphen, but run out of years to check, terminate it + if (added_hyphen) { + year_string = year_string years_array[length(years_array)]; + } + final_line[years_array[length(years_array)]][length(years_array)][person] = \ + "Copyright (C) " year_string "\t" person " " people_emails[person]; + } + # We can't sort the years indices with asorti because we want a numerical, + # not lexicographic sort of the indices. + j = 0; + delete years_sorted; + for (i in final_line) years_sorted[j++] = i+0; + n_years_entries = asort(years_sorted); + # And output the lines with the most recent contributor first + for (y = n_years_entries; y >= 1; y--) { + # Sort the contributors with most recent contribution in year + # by_year[y] by number of years contributed: + j = 0; + delete contributions_sorted; + for (i in final_line[years_sorted[y]]) contributions_sorted[j++] = i+0; + n_contrib_entries = asort(contributions_sorted); + for (c = n_contrib_entries; c >= 1; c--) { + # Finally, sort by contributor name + asorti(final_line[years_sorted[y]][contributions_sorted[c]], by_person); + # And output the lines in alphabetical order by person name + for (n = 1; n <= length(by_person); n++) { + print final_line[years_sorted[y]][contributions_sorted[c]][by_person[n]]; + } + } + } +} -- cgit v1.2.3