diff --git a/CONTRIBUTORS.json b/CONTRIBUTORS.json
new file mode 100644
index 000000000..5c50c1db3
--- /dev/null
+++ b/CONTRIBUTORS.json
@@ -0,0 +1,143 @@
+{
+  "forked_at": "2017-11-27T11:53:17-05:00",
+  "excluded_fork": null,
+  "pre_fork_contributors_by_name": null,
+  "contributors_by_name": {
+    "Alex Balashov": {
+      "emails": [
+        "alexander_balashov@hms.harvard.edu"
+      ],
+      "names": [
+        "Alex Balashov",
+        "Alexander Balashov"
+      ]
+    },
+    "Alexander Veit": {
+      "emails": [
+        "53857412+alexander-veit@users.noreply.github.com"
+      ],
+      "names": [
+        "Alexander Veit"
+      ]
+    },
+    "Andrea Cosolo": {
+      "emails": [
+        "58441550+andreacosolo@users.noreply.github.com"
+      ],
+      "names": [
+        "Andrea Cosolo"
+      ]
+    },
+    "Andy Schroeder": {
+      "emails": [
+        "andrew_schroeder@hms.harvard.edu"
+      ],
+      "names": [
+        "Andy Schroeder",
+        "aschroed"
+      ]
+    },
+    "Carl Vitzthum": {
+      "emails": [
+        "carl.vitzthum@gmail.com",
+        "carl_vitzthum@hms.harvard.edu"
+      ],
+      "names": [
+        "Carl Vitzthum"
+      ]
+    },
+    "David Michaels": {
+      "emails": [
+        "david_michaels@hms.harvard.edu",
+        "dmichaels@gmail.com",
+        "105234079+dmichaels-harvard@users.noreply.github.com"
+      ],
+      "names": [
+        "David Michaels",
+        "dmichaels-harvard"
+      ]
+    },
+    "Douglas Rioux": {
+      "emails": [
+        "douglas_rioux@hms.harvard.edu",
+        "58236592+drio18@users.noreply.github.com"
+      ],
+      "names": [
+        "Douglas Rioux",
+        "drio18"
+      ]
+    },
+    "Eric Berg": {
+      "emails": [
+        "Eric_Berg@hms.harvard.edu"
+      ],
+      "names": [
+        "Eric Berg"
+      ]
+    },
+    "Jeremy Johnson": {
+      "emails": [
+        "jeremy@softworks.com.my"
+      ],
+      "names": [
+        "Jeremy Johnson",
+        "j1z0"
+      ]
+    },
+    "Kent M Pitman": {
+      "emails": [
+        "netsettler@users.noreply.github.com",
+        "kent_pitman@hms.harvard.edu"
+      ],
+      "names": [
+        "Kent M Pitman",
+        "Kent Pitman"
+      ]
+    },
+    "Koray K\u0131rl\u0131": {
+      "emails": [
+        "KorayKirli@users.noreply.github.com",
+        "koray_kirli@hms.harvard.edu"
+      ],
+      "names": [
+        "Koray K\u0131rl\u0131"
+      ]
+    },
+    "Luisa Mercado": {
+      "emails": [
+        "42242797+Lmercadom@users.noreply.github.com"
+      ],
+      "names": [
+        "Lmercadom",
+        "Luisa Mercado"
+      ]
+    },
+    "Sarah Reiff": {
+      "emails": [
+        "sarah_reiff@hms.harvard.edu"
+      ],
+      "names": [
+        "Sarah",
+        "Sarah Reiff"
+      ]
+    },
+    "Soo Lee": {
+      "emails": [
+        "duplexa@gmail.com"
+      ],
+      "names": [
+        "Soo Lee",
+        "SooLee"
+      ]
+    },
+    "Will Ronchetti": {
+      "emails": [
+        "wrr33@cornell.edu"
+      ],
+      "names": [
+        "Will Ronchetti",
+        "William Ronchetti"
+      ]
+    }
+  }
+}
diff --git a/dcicutils/contribution_scripts.py b/dcicutils/contribution_scripts.py
new file mode 100644
index 000000000..99d818a8d
--- /dev/null
+++ b/dcicutils/contribution_scripts.py
@@ -0,0 +1,39 @@
+import argparse
+
+from dcicutils.command_utils import script_catch_errors, ScriptFailure
+from .contribution_utils import Contributions, PROJECT_HOME
+
+
+EPILOG = __doc__
+
+
+def show_contributors(repo, exclude_fork=None, verbose=False, save_contributors=False, test=False):
+    contributions = Contributions(repo=repo, exclude_fork=exclude_fork, verbose=verbose)
+    if save_contributors:
+        contributions.save_contributor_data()
+    contributions.show_repo_contributors(error_class=ScriptFailure if test else None)
+
+
+def show_contributors_main(*, simulated_args=None):
+    parser = argparse.ArgumentParser(  # noqa - PyCharm wrongly thinks the formatter_class is specified wrong here.
+        description=(f"Show authors of a specified repository, which will be presumed"
+                     f" to have been cloned as a subdirectory of $PROJECT_HOME ({PROJECT_HOME})"),
+        epilog=EPILOG,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument('repo', default=None,
+                        help="name of repository to show contributors for")
+    parser.add_argument('--exclude', '-x', default=None,
+                        help="name of repository that repo was forked from, whose contributors to exclude")
+    parser.add_argument('--save-contributors', '-s', action="store_true", default=False,
+                        help="whether to store contributor data to CONTRIBUTORS.json")
+    parser.add_argument('--test', '-t', action="store_true", default=False,
+                        help="whether to treat this as a test, erring if a cache update is needed")
+    parser.add_argument('--verbose', '-v', action="store_true", default=False,
+                        help="whether to do verbose output while working")
+    args = parser.parse_args(args=simulated_args)
+
+    with script_catch_errors():
+
+        show_contributors(repo=args.repo, exclude_fork=args.exclude, verbose=args.verbose,
+                          save_contributors=args.save_contributors, test=args.test)
diff --git a/dcicutils/contribution_utils.py b/dcicutils/contribution_utils.py
new file mode 100644
index 000000000..41dd84a9f
--- /dev/null
+++ b/dcicutils/contribution_utils.py
@@ -0,0 +1,532 @@
+import datetime
+import git
+import io
+import json
+import os
+import re
+
+from collections import defaultdict
+from dcicutils.diff_utils import DiffManager
+from dcicutils.lang_utils import n_of
+from dcicutils.misc_utils import PRINT, ignored, environ_bool
+from typing import Dict, List, Optional, Set, Type
+
+
+DEBUG_CONTRIBUTIONS = environ_bool("DEBUG_CONTRIBUTIONS")
+
+GITHUB_USER_REGEXP = re.compile('(?:[0-9]+[+])?(.*)[@]users[.]noreply[.]github[.]com')
+PROJECT_HOME = os.environ.get('PROJECT_HOME', os.path.dirname(os.path.abspath(os.curdir)))
+
+
+class GitAnalysis:
+
+    @classmethod
+    def find_repo(cls, repo_name: str) -> git.Repo:
+        repo_path = os.path.join(PROJECT_HOME, repo_name)
+        repo = git.Repo(repo_path)
+        return repo
+
+    @classmethod
+    def git_commits(cls, repo_name) -> List[git.Commit]:
+        repo = cls.find_repo(repo_name)
+        commit: git.Commit
+        for commit in repo.iter_commits():
+            yield cls.json_for_commit(commit)
+
+    @classmethod
+    def json_for_actor(cls, actor: git.Actor) -> Dict:
+        return {
+            "name": actor.name,
+            "email": actor.email,
+        }
+
+    @classmethod
+    def json_for_commit(cls, commit: git.Commit) -> Dict:
+        return {
+            'commit': commit.hexsha,
+            'date': commit.committed_datetime.isoformat(),
+            'author': cls.json_for_actor(commit.author),
+            'coauthors': [cls.json_for_actor(co_author) for co_author in commit.co_authors],
+            'message': commit.message,
+        }
+
+
+class Contributor:
+
+    @classmethod
+    def create(cls, *, author: git.Actor) -> 'Contributor':
+        return Contributor(email=author.email, name=author.name)
+
+    def __init__(self, *, email: Optional[str] = None, name: Optional[str] = None,
+                 emails: Optional[Set[str]] = None, names: Optional[Set[str]] = None,
+                 primary_name: Optional[str] = None):
+        # Both email and name are required keyword arguments, though name is allowed to be None,
+        # even though email is not. The primary_name is not required, and defaults to None, so will
+        # be heuristically computed based on available names.
+        if not email and not emails:
+            raise ValueError("One of email= or emails= is required.")
+        if email and emails:
+            raise ValueError("Only one of email= and emails= may be provided.")
+        if not emails:
+            emails = {email}
+        if name and names:
+            raise ValueError("Only one of name= and names= may be provided.")
+        if name and not names:
+            names = {name}
+        self.emails: Set[str] = emails
+        self.names: Set[str] = names or set()
+        self._primary_name = primary_name
+        if primary_name:
+            self.names.add(primary_name)
+
+    def __str__(self):
+        maybe_primary = f" {self._primary_name!r}" if self._primary_name else ""
+        emails = ",".join(sorted(map(repr, self.emails), key=lambda x: x.lower()))
+        names = ",".join(sorted(map(repr, self.names), key=lambda x: x.lower()))
+        return f"<{self.__class__.__name__}{maybe_primary} emails={emails} names={names} {id(self)}>"
+
+    def copy(self):
+        return Contributor(emails=self.emails.copy(), names=self.names.copy(), primary_name=self._primary_name)
+
+    @property
+    def primary_name(self):
+        if self._primary_name:
+            return self._primary_name
+        return sorted(self.names, key=self.name_variation_spelling_sort_key, reverse=True)[0]
+
+    def set_primary_name(self, primary_name: str):
+        self.names.add(primary_name)
+        self._primary_name = primary_name
+
+    def notice_mention_as(self, *, email: Optional[str] = None, name: Optional[str] = None):
+        if email is not None and email not in self.emails:
+            self.emails.add(email)
+        if name is not None and name not in self.names:
+            self.names.add(name)
+
+    def as_dict(self):
+        # Note that the sort is case-sensitive alphabetic just because that's easiest here.
+        # Making it be case-insensitive would require a special case for non-strings,
+        # and all we really care about is some easy degree of determinism for testing.
+        data = {
+            "emails": sorted(self.emails),
+            "names": sorted(self.names),
+        }
+        return data
+
+    @classmethod
+    def from_dict(cls, data: Dict, *, key: Optional[str] = None) -> 'Contributor':
+        emails = data["emails"]
+        names = data["names"]
+        contributor = Contributor(email=emails[0], name=names[0])
+        contributor.emails = set(emails)
+        contributor.names = set(names)
+        if key:
+            contributor.set_primary_name(key)
+        return contributor
+
+    @classmethod
+    def name_variation_spelling_sort_key(cls, name):
+        return (
+            ' ' in name,  # we prefer names like 'jane doe' over jdoe123
+            len(name),  # longer names are usually more formal; william smith vs will smith
+            name,  # this names by alphabetical order not because one is better, but to make sort results deterministic
+        )
+
+
+ContributorIndex = Optional[Dict[str, Contributor]]
+
+
+class BasicContributions(GitAnalysis):
+
+    VERBOSE = False
+
+    def __init__(self, *, repo: Optional[str] = None,
+                 exclude_fork: Optional[str] = None,
+                 verbose: Optional[bool] = None):
+        self.email_timestamps: Dict[str, datetime.datetime] = {}
+        self.name_timestamps: Dict[str, datetime.datetime] = {}
+        self.exclude_fork: Optional[str] = exclude_fork
+        if not repo:
+            # Doing it this way gets around an ambiguity about '/foo/' vs '/foo' since both
+            # os.path.join('/foo/', 'bar') and os.path.join('/foo', 'bar') yield '/foo/bar',
+            # and from there one can do os.path.basename(os.path.dirname(...)) to get 'foo' out.
+            cache_file = os.path.join(os.path.abspath(os.path.curdir), self.CONTRIBUTORS_CACHE_FILE)
+            dir = os.path.dirname(cache_file)
+            repo = os.path.basename(dir)
+        self.repo: str = repo
+        self.excluded_contributions = None
+        self.forked_at: Optional[datetime.datetime] = None
+        self.contributors_by_name: Optional[ContributorIndex] = None
+        self.contributors_by_email: Optional[ContributorIndex] = None
+        self.pre_fork_contributors_by_email: Optional[ContributorIndex] = None
+        self.pre_fork_contributors_by_name: Optional[ContributorIndex] = None
+        self.loaded_contributor_data = None
+        self.cache_discrepancies: Optional[dict] = None
+        self.verbose = self.VERBOSE if verbose is None else verbose
+
+    CONTRIBUTORS_CACHE_FILE = 'CONTRIBUTORS.json'
+
+    def contributors_json_file(self):
+        """
+        Returns the name of the CONTRIBUTORS.json file for the repo associated with this class.
+        """
+        return os.path.join(PROJECT_HOME, self.repo, self.CONTRIBUTORS_CACHE_FILE)
+
+    def existing_contributors_json_file(self):
+        """
+        Returns the name of the CONTRIBUTORS.json file for the repo associated with this class if that file exists,
+        or None if there is no such file.
+        """
+        file = self.contributors_json_file()
+        if os.path.exists(file):
+            return file
+        else:
+            return None
+
+    @classmethod
+    def notice_reference_time(cls, key: str, timestamp: datetime.datetime, timestamps: Dict[str, datetime.datetime]):
+        reference_timestamp: datetime.datetime = timestamps.get(key)
+        if not reference_timestamp:
+            timestamps[key] = timestamp
+        elif timestamp > reference_timestamp:
+            timestamps[key] = timestamp
+
+    def email_reference_time(self, email):
+        return self.email_timestamps.get(email)
+
+    def name_reference_time(self, name):
+        return self.name_timestamps.get(name)
+
+    @classmethod
+    def contributor_values_as_dicts(cls, contributor_index: Optional[ContributorIndex]):
+        if contributor_index is None:
+            return None
+        else:
+            return {
+                key: contributor.as_dict()
+                for key, contributor in contributor_index.items()
+            }
+
+    @classmethod
+    def contributor_values_as_objects(cls, contributor_index: Optional[Dict]):
+        if contributor_index is None:
+            return None
+        else:
+            return {
+                key: Contributor.from_dict(value, key=key)
+                for key, value in contributor_index.items()
+            }
+
+    def checkpoint_state(self):
+        return self.as_dict()
+
+    def as_dict(self):
+        data = {
+            "forked_at": self.forked_at.isoformat() if self.forked_at else None,
+            "excluded_fork": self.exclude_fork,
+            "pre_fork_contributors_by_name": self.contributor_values_as_dicts(self.pre_fork_contributors_by_name),
+            "contributors_by_name": self.contributor_values_as_dicts(self.contributors_by_name),
+        }
+        return data
+
+    def save_contributor_data(self, filename: Optional[str] = None) -> str:
+        if filename is None:
+            filename = self.contributors_json_file()
+        with io.open(filename, 'w') as fp:
+            PRINT(json.dumps(self.as_dict(), indent=2), file=fp)
+        return filename
+
+    def repo_contributor_names(self, with_email=False):
+        for name, contributor in self.contributors_by_name.items():
+            if with_email:
+                yield f"{name} ({', '.join([self.pretty_email(email) for email in contributor.emails])})"
+            else:
+                yield name
+
+    @classmethod
+    def pretty_email(cls, email):
+        m = GITHUB_USER_REGEXP.match(email)
+        if m:
+            user_name = m.group(1)
+            return f"{user_name}@github"
+        else:
+            return email
+
+    @classmethod
+    def get_contributors_json_from_file_cache(cls, filename):
+        try:
+            with io.open(filename, 'r') as fp:
+                data = json.load(fp)
+        except Exception:
+            PRINT(f"Error while reading data from {filename!r}.")
+            raise
+        return data
+
+
+class Contributions(BasicContributions):
+
+    def __init__(self, *, repo: Optional[str] = None,
+                 exclude_fork: Optional[str] = None,
+                 verbose: Optional[bool] = None):
+        super().__init__(repo=repo, exclude_fork=exclude_fork, verbose=verbose)
+        existing_contributor_data_file = self.existing_contributors_json_file()
+        if existing_contributor_data_file:
+            # This will set .loaded_contributor_data and other values from CONTRIBUTORS.json
+            self.load_contributors_from_json_file_cache(existing_contributor_data_file)
+
+        if exclude_fork and not self.excluded_contributions:
+            self.excluded_contributions = Contributions(repo=exclude_fork)
+        checkpoint1 = self.checkpoint_state()
+        self.reconcile_contributors_with_github_log()
+        checkpoint2 = self.checkpoint_state()
+
+        def list_to_dict_normalizer(*, label, item):
+            ignored(label)
+            if isinstance(item, list):
+                return {elem: elem for elem in item}
+            else:
+                return item
+
+        if existing_contributor_data_file:
+            diff_manager = DiffManager(label="contributors")
+            contributors1 = checkpoint1['contributors_by_name']
+            contributors2 = checkpoint2['contributors_by_name']
+            diffs = diff_manager.diffs(contributors1, contributors2, normalizer=list_to_dict_normalizer)
+            added = diffs.get('added')
+            changed = diffs.get('changed')
+            removed = diffs.get('removed')
+            self.cache_discrepancies = cache_discrepancies = {}
+            if added:
+                cache_discrepancies['to_add'] = added
+            if changed:
+                cache_discrepancies['to_change'] = changed
+            if removed:
+                cache_discrepancies['to_remove'] = removed
+
+    def load_contributors_from_json_file_cache(self, filename):
+        self.loaded_contributor_data = data = self.get_contributors_json_from_file_cache(filename)
+        self.load_from_dict(data)
+
+        if DEBUG_CONTRIBUTIONS:  # pragma: no cover - debugging only
+            PRINT("After load_contributors_from_json_file_cache...")
+            PRINT(f"{n_of(self.pre_fork_contributors_by_name, 'pre-fork contributor by name')}")
+            PRINT(f"{n_of(self.pre_fork_contributors_by_email, 'pre-fork contributor by email')}")
+            PRINT(f"{n_of(self.contributors_by_name, 'contributor by name')}")
+            PRINT(f"{n_of(self.contributors_by_email, 'contributor by email')}")
+
+    def reconcile_contributors_with_github_log(self):
+        """
+        Rummages the GitHub log entries for contributors we don't know about.
+        That data is merged against our existing structures.
+        """
+        if DEBUG_CONTRIBUTIONS:  # pragma: no cover - debugging only
+            PRINT("Reconciling with git log.")
+        excluded_fork_contributors_by_email = (self.excluded_contributions.contributors_by_email
+                                               if self.excluded_contributions
+                                               else {})
+        # excluded_fork_contributors_by_name = (self.excluded_contributions.contributors_by_name
+        #                                       if self.excluded_contributions
+        #                                       else {})
+        fork_contributor_emails = set(excluded_fork_contributors_by_email.keys())
+
+        post_fork_contributors_seen = defaultdict(lambda: [])
+
+        contributors_by_email: ContributorIndex = self.contributors_by_email or {}
+        git_repo = self.find_repo(repo_name=self.repo)
+
+        def notice_author(*, author: git.Actor, date: datetime.datetime):
+            if self.forked_at:
+                if date < self.forked_at:
+                    return
+                    # raise Exception("Commits are out of order.")
+            elif author.email not in fork_contributor_emails:
+                PRINT(f"Forked {self.repo} at {date} by {author.email}")
+                self.forked_at = date
+
+            if self.forked_at and date >= self.forked_at:
+                if author.email in fork_contributor_emails:
+                    # PRINT(f"Post-fork contribution from {author.email} ({date})")
+                    post_fork_contributors_seen[author.email].append(date)
+                self.notice_reference_time(key=author.email, timestamp=date, timestamps=self.email_timestamps)
+                self.notice_reference_time(key=author.name, timestamp=date, timestamps=self.name_timestamps)
+
+                contributor_by_email = contributors_by_email.get(author.email)
+                if contributor_by_email:  # already exists, so update it
+                    contributor_by_email.notice_mention_as(email=author.email, name=author.name)
+                else:  # need to create it new
+                    contributor_by_email = Contributor.create(author=author)
+                    contributors_by_email[author.email] = contributor_by_email
+            else:
+                # print("skipped")
+                pass
+
+        n = 0
+        for commit in reversed(list(git_repo.iter_commits())):
+            n += 1
+            commit_date = commit.committed_datetime
+            notice_author(author=commit.author, date=commit_date)
+            for co_author in commit.co_authors:
+                notice_author(author=co_author, date=commit_date)
+        if DEBUG_CONTRIBUTIONS:
+            PRINT(f"{n_of(n, 'commit')} processed.")
+
+        for email, dates in post_fork_contributors_seen.items():
+            when = str(dates[0].date())
+            if len(dates) > 1:
+                when += f" to {dates[-1].date()}"
+            PRINT(f"{n_of(dates, 'post-fork commit')} seen for {email} ({when}).")
+
+        contributors_by_name: ContributorIndex = {}
+
+        for contributor_by_email in contributors_by_email.values():
+            self.traverse(root=contributor_by_email,
+                          cursor=contributor_by_email,
+                          contributors_by_email=contributors_by_email,
+                          contributors_by_name=contributors_by_name)
+            for name in list(contributor_by_email.names):
+                contributors_by_name[name] = contributor_by_email
+            for email in list(contributor_by_email.emails):
+                contributors_by_email[email] = contributor_by_email
+
+        # Note that the name table is somewhat unified, merging related cells, but the email table isn't.
+        # In part that's because I was lazy, but also we don't really need the email table to be so careful.
+        # It's the human names that really matter.
+        # if not self.pre_fork_contributors_by_name:
+        #     self.pre_fork_contributors_by_name = excluded_fork_contributors_by_name
+
+        # if not self.pre_fork_contributors_by_email:
+        #     self.pre_fork_contributors_by_email = excluded_fork_contributors_by_email
+
+        self.contributors_by_name = self.contributor_index_by_primary_name(contributors_by_name)
+        self.contributors_by_email = contributors_by_email
+
+        if DEBUG_CONTRIBUTIONS:  # pragma: no cover - debugging only
+            PRINT("After reconcile_contributors_with_github_log...")
+            PRINT(f"{n_of(self.pre_fork_contributors_by_name, 'pre-fork contributor by name')}")
+            PRINT(f"{n_of(self.pre_fork_contributors_by_email, 'pre-fork contributor by email')}")
+            PRINT(f"{n_of(self.contributors_by_name, 'contributor by name')}")
+            PRINT(f"{n_of(self.contributors_by_email, 'contributor by email')}")
+
+    @classmethod
+    def contributor_index_by_primary_name(cls, contributors_by_name: ContributorIndex) -> ContributorIndex:
+        """
+        Given a by-name contributor index:
+
+        * Makes sure that all contributors have only one name, indexed by the contributor's primary name
+        * Sorts the resulting index using a case-insensitive alphabetic sort
+
+        and then returns the result.
+
+        :param contributors_by_name: a contributor index indexed by human name
+        :return: a contributor index
+        """
+        seen = set()
+        nicknames_seen = set()
+        contributor_items = []
+        contributors = {}
+        for name, contributor in contributors_by_name.items():
+            if contributor not in seen:
+                for nickname in contributor.names:
+                    if nickname in nicknames_seen:
+                        raise Exception(f"Name improperly shared between {contributor}"
+                                        f" and {contributors_by_name[nickname]}")
+                    nicknames_seen.add(nickname)
+                contributor_items.append((contributor.primary_name, contributor))
+                seen.add(contributor)
+        for name, contributor in sorted(contributor_items,
+                                        # Having selected the longest names, now sort names ignoring case
+                                        key=lambda pair: pair[0].lower()):
+            contributors[name] = contributor
+        return contributors
+
+    @classmethod
+    def traverse(cls,
+                 root: Contributor,
+                 cursor: Optional[Contributor],
+                 contributors_by_email: ContributorIndex,
+                 contributors_by_name: ContributorIndex,
+                 seen: Optional[Set[Contributor]] = None):
+        if seen is None:
+            seen = set()
+        if cursor in seen:  # It's slightly possible that a person has a name of None that slipped in. Ignore that.
+            return
+        seen.add(cursor)
+        for name in list(cursor.names):
+            root.names.add(name)
+        for email in list(cursor.emails):
+            root.emails.add(email)
+        for name in list(cursor.names):
+            contributor = contributors_by_name.get(name)
+            if contributor and contributor not in seen:
+                cls.traverse(root=root, cursor=contributor, contributors_by_email=contributors_by_email,
+                             contributors_by_name=contributors_by_name, seen=seen)
+        for email in list(cursor.emails):
+            contributor = contributors_by_email.get(email)
+            if contributor and contributor not in seen:
+                cls.traverse(root=root, cursor=contributor, contributors_by_email=contributors_by_email,
+                             contributors_by_name=contributors_by_name, seen=seen)
+
+    def load_from_dict(self, data: Dict):
+        forked_at: Optional[str] = data.get('forked_at')
+        excluded_fork = data.get('excluded_fork')
+        self.forked_at: Optional[datetime.datetime] = (None
+                                                       if forked_at is None
+                                                       else datetime.datetime.fromisoformat(forked_at))
+        self.exclude_fork = excluded_fork
+
+        fork_contributors_by_name_json = data.get('pre_fork_contributors_by_name') or {}
+        fork_contributors_by_name = self.contributor_values_as_objects(fork_contributors_by_name_json)
+        self.pre_fork_contributors_by_name = fork_contributors_by_name
+        fork_contributors_by_email_json = data.get('pre_fork_contributors_by_email') or {}
+        if fork_contributors_by_email_json:
+            self.pre_fork_contributors_by_email = self.contributor_values_as_objects(fork_contributors_by_email_json)
+        else:
+            self.pre_fork_contributors_by_email = self.by_email_from_by_name(fork_contributors_by_name)
+
+        contributors_by_name_json = data.get('contributors_by_name', {})
+        self.contributors_by_name = contributors_by_name = self.contributor_values_as_objects(contributors_by_name_json)
+        contributors_by_email_json = data.get('contributors_by_email', {})
+        if contributors_by_email_json:
+            self.contributors_by_email = self.contributor_values_as_objects(contributors_by_email_json)
+        else:
+            self.contributors_by_email = self.by_email_from_by_name(contributors_by_name)
+
+    @classmethod
+    def by_email_from_by_name(cls, contributors_by_email_json):
+        result = {}
+        seen = set()
+        for email_key, entry in contributors_by_email_json.items():
+            ignored(email_key)
+            seen_key = id(entry)
+            if seen_key in seen:
+                continue
+            seen.add(seen_key)
+            for email in entry.get("emails", []) if isinstance(entry, dict) else entry.emails:
+                if result.get(email):
+                    raise Exception(f"email address {email} is used more than once.")
+                result[email] = entry
+        return result
+
+    def show_repo_contributors(self, analyze_discrepancies: bool = True, with_email: bool = True,
+                               error_class: Optional[Type[BaseException]] = None):
+        for author_name in self.repo_contributor_names(with_email=with_email):
+            PRINT(author_name)
+        if analyze_discrepancies:
+            file = self.existing_contributors_json_file()
+            if not file:
+                message = f"Need to create a {self.CONTRIBUTORS_CACHE_FILE} file for {self.repo}."
+                if error_class:
+                    raise error_class(message)
+                else:
+                    PRINT(message)
+            elif self.cache_discrepancies:
+                message = "There are contributor cache discrepancies."
+                PRINT(f"===== {message.rstrip('.').upper()} =====")
+                for action, items in self.cache_discrepancies.items():
+                    action: str
+                    PRINT(f"{action.replace('_', ' ').title()}:")
+                    for item in items:
+                        PRINT(f" * {item}")
+                if error_class:
+                    raise error_class(message)
diff --git a/dcicutils/license_utils.py b/dcicutils/license_utils.py
index b2124a43d..698379470 100644
--- a/dcicutils/license_utils.py
+++ b/dcicutils/license_utils.py
@@ -2,10 +2,11 @@
 import datetime
 import io
 import json
-import logging
+# import logging
 import os
 import re
 import subprocess
+import warnings
 
 try:
     import piplicenses
@@ -30,8 +31,8 @@
 from dcicutils.misc_utils import PRINT, get_error_message, local_attrs
 
 
-logging.basicConfig()
-logger = logging.getLogger(__name__)
+# logging.basicConfig()
+# logger = logging.getLogger(__name__)
 
 _FRAMEWORK = 'framework'
 _LANGUAGE = 'language'
@@ -270,7 +271,7 @@ def report(message):
             if analysis:
                 analysis.miscellaneous.append(message)
             else:
-                logger.warning(message)
+                warnings.warn(message)
         parsed = cls.parse_simple_license_file(filename=filename)
         if check_license_title:
             license_title = parsed['license-title']
@@ -488,11 +489,12 @@ class MyOrgLicenseChecker(LicenseChecker):
         cls.analyze_license_file(analysis=analysis)
         cls.show_unacceptable_licenses(analysis=analysis)
         if analysis.unexpected_missing:
-            logger.warning(there_are(analysis.unexpected_missing, kind='unexpectedly missing license', punctuate=True))
+            warnings.warn(there_are(analysis.unexpected_missing, kind='unexpectedly missing license', punctuate=True))
         if analysis.no_longer_missing:
-            logger.warning(there_are(analysis.no_longer_missing, kind='no-longer-missing license', punctuate=True))
+            # This is not so major as to need a warning, but it's still something that should show up somewhere.
+            PRINT(there_are(analysis.no_longer_missing, kind='no-longer-missing license', punctuate=True))
         for message in analysis.miscellaneous:
-            logger.warning(message)
+            warnings.warn(message)
         if analysis.unacceptable:
             raise LicenseAcceptabilityCheckFailure(unacceptable_licenses=analysis.unacceptable)
 
@@ -702,6 +704,11 @@ class C4InfrastructureLicenseChecker(LicenseChecker):
         # Ref: https://github.com/getsentry/responses/blob/master/LICENSE
         'responses',
 
+        # This seems to get flagged sometimes, but is not the pypi snovault library, it's what our dcicsnovault
+        # calls itself internally.. In any case, it's under MIT license and OK.
+        # Ref: https://github.com/4dn-dcic/snovault/blob/master/LICENSE.txt
+        'snovault',
+
         # PyPi identifies the supervisor library license as "BSD-derived (http://www.repoze.org/LICENSE.txt)"
         # Ref: https://pypi.org/project/supervisor/
         # In fact, though, the license is a bit more complicated, though apparently still permissive.
@@ -710,7 +717,7 @@ class C4InfrastructureLicenseChecker(LicenseChecker):
 
         # This seems to be a BSD-3-Clause-Modification license.
         # Ref: https://github.com/Pylons/translationstring/blob/master/LICENSE.txt
-        'translationstring',
+        # 'translationstring',
 
         # This seems to be a BSD-3-Clause-Modification license.
         # Ref: https://github.com/Pylons/venusian/blob/master/LICENSE.txt
diff --git a/dcicutils/qa_checkers.py b/dcicutils/qa_checkers.py
index 61a79cc77..291135bd1 100644
--- a/dcicutils/qa_checkers.py
+++ b/dcicutils/qa_checkers.py
@@ -8,6 +8,7 @@
 from collections import defaultdict
 from typing import Optional, List, Dict, Type
 from typing_extensions import Literal
+from .contribution_utils import Contributions
 from .lang_utils import conjoined_list, n_of, there_are
 from .misc_utils import PRINT, remove_prefix, remove_suffix, getattr_customized, CustomizableProperty
 
@@ -436,3 +437,11 @@ def summarize(problems):
             detail += f"\n In {file}, {summarize(matches)}."
         message = f"{n_of(n, 'problem')} detected:" + detail
         raise AssertionError(message)
+
+
+class ContributionsChecker:
+
+    @classmethod
+    def validate(cls):
+        contributions = Contributions()  # no repo specified, so use current directory
+        contributions.show_repo_contributors(error_class=AssertionError)
diff --git a/docs/source/dcicutils.rst b/docs/source/dcicutils.rst
index b18d0a952..7fdaba7ea 100644
--- a/docs/source/dcicutils.rst
+++ b/docs/source/dcicutils.rst
@@ -37,6 +37,20 @@ command_utils
    :members:
 
 
+contribution_scripts
+^^^^^^^^^^^^^^^^^^^^
+
+.. automodule:: dcicutils.contribution_scripts
+   :members:
+
+
+contribution_utils
+^^^^^^^^^^^^^^^^^^
+
+.. automodule:: dcicutils.contribution_utils
+   :members:
+
+
 creds_utils
 ^^^^^^^^^^^
 
diff --git a/poetry.lock b/poetry.lock
index da9155f6b..d428e79a8 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -70,18 +70,18 @@ lxml = ["lxml"]
 
 [[package]]
 name = "boto3"
-version = "1.26.101"
+version = "1.28.11"
 description = "The AWS SDK for Python"
 category = "main"
 optional = false
 python-versions = ">= 3.7"
 files = [
-    {file = "boto3-1.26.101-py3-none-any.whl", hash = "sha256:5f5279a63b359ba8889e9a81b319e745b14216608ffb5a39fcbf269d1af1ea83"},
-    {file = "boto3-1.26.101.tar.gz", hash = "sha256:670ae4d1875a2162e11c6e941888817c3e9cf1bb9a3335b3588d805b7d24da31"},
+    {file = "boto3-1.28.11-py3-none-any.whl", hash = "sha256:e24460d50001b517c6734dcf1c879feb43aa2062d88d9bdbb8703c986cb05941"},
+    {file = "boto3-1.28.11.tar.gz", hash = "sha256:0fe7a35cf0041145c8eefebd3ae2ddf41baed62d7c963e5042b8ed8c297f648f"},
 ]
 
 [package.dependencies]
-botocore = ">=1.29.101,<1.30.0"
+botocore = ">=1.31.11,<1.32.0"
 jmespath = ">=0.7.1,<2.0.0"
 s3transfer = ">=0.6.0,<0.7.0"
 
@@ -386,14 +386,14 @@ xray = ["mypy-boto3-xray (>=1.18.18)"]
 
 [[package]]
 name = "botocore"
-version = "1.29.101"
+version = "1.31.11"
 description = "Low-level, data-driven core of boto 3."
 category = "main"
 optional = false
 python-versions = ">= 3.7"
 files = [
-    {file = "botocore-1.29.101-py3-none-any.whl", hash = "sha256:60c7a7bf8e2a288735e507007a6769be03dc24815f7dc5c7b59b12743f4a31cf"},
-    {file = "botocore-1.29.101.tar.gz", hash = "sha256:7bb60d9d4c49500df55dfb6005c16002703333ff5f69dada565167c8d493dfd5"},
+    {file = "botocore-1.31.11-py3-none-any.whl", hash = "sha256:d3cbffe554c9a1ba2ac6973734c43c21b8e7985a2ac4a4c31a09811b8029445c"},
+    {file = "botocore-1.31.11.tar.gz", hash = "sha256:b17ff973bb70b02b227928c2abe4992f1cfc46d13aee0228516c8f32572b88c6"},
 ]
 
 [package.dependencies]
@@ -402,7 +402,7 @@ python-dateutil = ">=2.1,<3.0.0"
 urllib3 = ">=1.25.4,<1.27"
 
 [package.extras]
-crt = ["awscrt (==0.16.9)"]
+crt = ["awscrt (==0.16.26)"]
 
 [[package]]
 name = "botocore-stubs"
@@ -820,14 +820,14 @@ smmap = ">=3.0.1,<6"
 
 [[package]]
 name = "gitpython"
-version = "3.1.31"
+version = "3.1.32"
 description = "GitPython is a Python library used to interact with Git repositories"
 category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "GitPython-3.1.31-py3-none-any.whl", hash = "sha256:f04893614f6aa713a60cbbe1e6a97403ef633103cdd0ef5eb6efe0deb98dbe8d"},
-    {file = "GitPython-3.1.31.tar.gz", hash = "sha256:8ce3bcf69adfdf7c7d503e78fd3b1c492af782d58893b650adb2ac8912ddd573"},
+    {file = "GitPython-3.1.32-py3-none-any.whl", hash = "sha256:e3d59b1c2c6ebb9dfa7a184daf3b6dd4914237e7488a1730a6d8f6f5d0b4187f"},
+    {file = "GitPython-3.1.32.tar.gz", hash = "sha256:8d9b8cb1e80b9735e8717c9362079d3ce4c6e5ddeebedd0361b228c3a67a62f6"},
 ]
 
 [package.dependencies]
diff --git a/pyproject.toml b/pyproject.toml
index d79690d4f..2715c6ac4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dcicutils"
-version = "7.6.0.2b3"
+version = "7.6.0.2b7"
 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
 authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
 license = "MIT"
@@ -80,15 +80,18 @@ pytest-runner = ">=5.1"
 
 [tool.poetry.scripts]
 publish-to-pypi = "dcicutils.scripts.publish_to_pypi:main"
-
+show-contributors = "dcicutils.contribution_scripts:show_contributors_main"
 
 [tool.pytest.ini_options]
 addopts = "--basetemp=/tmp/pytest"
 redis_exec = "/usr/local/bin/redis-server"
 filterwarnings = [
+    # TODO: These next two are about use of the Version class:
     "ignore:distutils Version classes are deprecated. Use packaging.version instead.:DeprecationWarning",
+    "ignore:Setuptools is replacing distutils.:UserWarning",
     # This is a pip-licenses problem (still present in 3.5.5):
     "ignore:pkg_resources is deprecated as an API:DeprecationWarning",
+    "ignore:Boto3 will no longer support Python 3.7 starting December 13, 2023.*:",
 ]
 markers = [
     "working: test should work",
diff --git a/test/test_contribution_scripts.py b/test/test_contribution_scripts.py
new file mode 100644
index 000000000..8fa040c1e
--- /dev/null
+++ b/test/test_contribution_scripts.py
@@ -0,0 +1,63 @@
+import pytest
+
+from dcicutils import contribution_scripts as contribution_scripts_module
+from dcicutils.command_utils import ScriptFailure
+from dcicutils.contribution_scripts import show_contributors, show_contributors_main
+from unittest import mock
+
+
+class MockContributions:
+
+    def __init__(self, repo, exclude_fork=None, verbose=False):
+        self.repo = repo
+        self.exclude_fork = exclude_fork
+        self.verbose = verbose
+
+
+def test_show_contributors():
+    with mock.patch.object(contribution_scripts_module, "Contributions") as mock_contributions:
+        contributions_object = mock.MagicMock()
+        mock_contributions.return_value = contributions_object
+
+        show_contributors(repo='my-repo')
+
+        assert contributions_object.save_contributor_data.call_count == 0
+
+        mock_contributions.assert_called_once_with(repo='my-repo', exclude_fork=None, verbose=False)
+        contributions_object.show_repo_contributors.assert_called_once_with(error_class=None)
+
+        mock_contributions.reset_mock()
+        contributions_object = mock.MagicMock()
+        mock_contributions.return_value = contributions_object
+
+        show_contributors(repo='another-repo', exclude_fork='whatever', save_contributors=True, verbose=True, test=True)
+
+        mock_contributions.assert_called_once_with(repo='another-repo', exclude_fork='whatever', verbose=True)
+        contributions_object.show_repo_contributors.assert_called_once_with(error_class=ScriptFailure)
+        contributions_object.save_contributor_data.assert_called_once_with()
+
+
+def test_show_contributors_main():
+
+    with mock.patch.object(contribution_scripts_module, "show_contributors") as mock_show_contributors:
+
+        with pytest.raises(SystemExit) as exc:
+
+            show_contributors_main(simulated_args=['some-repo'])
+
+        assert exc.value.code == 0
+
+        mock_show_contributors.assert_called_once_with(repo='some-repo', exclude_fork=None, verbose=False,
+                                                       save_contributors=False, test=False)
+
+        mock_show_contributors.reset_mock()
+
+        with pytest.raises(SystemExit) as exc:
+
+            show_contributors_main(simulated_args=['my-repo', '--exclude', 'their-repo',
+                                                   '--save-contributors', '--test', '--verbose'])
+
+        assert exc.value.code == 0
+
+        mock_show_contributors.assert_called_once_with(repo='my-repo', exclude_fork='their-repo',
+                                                       save_contributors=True, test=True, verbose=True)
diff --git a/test/test_contribution_utils.py b/test/test_contribution_utils.py
new file mode 100644
index 000000000..fee2def01
--- /dev/null
+++ b/test/test_contribution_utils.py
@@ -0,0 +1,630 @@
+import contextlib
+import datetime
+import git
+import io
+import json
+import os
+import pytest
+import re
+
+from dcicutils import contribution_utils as contribution_utils_module
+from dcicutils.contribution_utils import Contributor, BasicContributions, Contributions, GitAnalysis
+from dcicutils.misc_utils import make_counter, file_contents  # , override_environ
+from dcicutils.qa_utils import MockId, MockFileSystem, printed_output
+from typing import List, Optional
+from unittest import mock
+
+
+class MockGitActor:
+    def __init__(self, name: Optional[str], email: str):
+        self.name = name
+        self.email = email
+
+
+class MockGitCommit:
+    def __init__(self, hexsha: str, committed_datetime: str, author: dict,
+                 message: str, co_authors: Optional[List[dict]] = None):
+        self.hexsha = hexsha
+        self.committed_datetime = datetime.datetime.fromisoformat(committed_datetime)
+        self.author = MockGitActor(**author)
+        self.co_authors = [MockGitActor(**co_author) for co_author in (co_authors or [])]
+        self.message = message
+
+
+class MockGitRepo:
+
+    def __init__(self, name, path, mocked_commits=None):
+        self.name = name
+        self.path = path
+        self.mocked_commits = mocked_commits or []
+
+    def iter_commits(self):
+        for json_data in self.mocked_commits:
+            yield MockGitCommit(**json_data)
+
+
+class MockGitModule:
+
+    def __init__(self, mocked_commits=None):
+        if mocked_commits is None:
+            mocked_commits = {}
+        self._mocked_commits = mocked_commits
+
+    def Repo(self, path):
+        repo_name = os.path.basename(path)
+        return MockGitRepo(name=repo_name, path=path, mocked_commits=self._mocked_commits.get(repo_name, []))
+
+    Actor = MockGitActor
+
+
+SAMPLE_USER_HOME = "/home/jdoe"
+SAMPLE_PROJECT_HOME = f"{SAMPLE_USER_HOME}/repos"
+SAMPLE_FOO_HOME = f"{SAMPLE_PROJECT_HOME}/foo"
+
+
+@contextlib.contextmanager
+def git_context(project_home=SAMPLE_PROJECT_HOME, mocked_commits=None):
+    with mock.patch.object(contribution_utils_module, "git", MockGitModule(mocked_commits=mocked_commits)):
+        with mock.patch.object(contribution_utils_module, "PROJECT_HOME", project_home):
+            yield
+
+
+def test_git_analysis_find_repo():
+
+    with git_context():
+        foo_repo = GitAnalysis.find_repo("foo")
+        assert isinstance(foo_repo, MockGitRepo)
+        assert foo_repo.name == 'foo'
+        assert foo_repo.path == SAMPLE_FOO_HOME
+        assert foo_repo.mocked_commits == []
+
+
+def test_git_analysis_git_commits():
+
+    with git_context(mocked_commits={"foo": [
+        {
+            "hexsha": "aaaa",
+            "committed_datetime": "2020-01-01 01:23:45",
+            "author": {"name": "Jdoe", "email": "jdoe@foo"},
+            "message": "something"
+        },
+        {
+            "hexsha": "bbbb",
+            "committed_datetime": "2020-01-02 12:34:56",
+            "author": {"name": "Sally", "email": "ssmith@foo"},
+            "message": "something else"
+        }
+    ]}):
+        foo_repo = GitAnalysis.find_repo('foo')
+        foo_commits = list(foo_repo.iter_commits())
+        assert len(foo_commits) == 2
+        assert all(isinstance(commit, MockGitCommit) for commit in foo_commits)
+        assert GitAnalysis.json_for_actor(foo_commits[0].author) == {'name': 'Jdoe', 'email': 'jdoe@foo'}
+        assert GitAnalysis.json_for_actor(foo_commits[1].author) == {'name': 'Sally', 'email': 'ssmith@foo'}
+        assert foo_commits[0].hexsha == 'aaaa'
+        assert foo_commits[1].hexsha == 'bbbb'
+
+        assert [GitAnalysis.json_for_commit(commit) for commit in foo_commits] == list(GitAnalysis.git_commits('foo'))
+
+
+def test_git_analysis_iter_commits_scenario():  # Tests .iter_commits, .json_for_commit, .json_for_actor, .git_commits
+
+    with git_context(mocked_commits={"foo": [
+        {
+            "hexsha": "aaaa",
+            "committed_datetime": "2020-01-01T01:23:45-05:00",
+            "author": {"name": "Jdoe", "email": "jdoe@foo"},
+            "message": "something"
+        },
+        {
+            "hexsha": "bbbb",
+            "committed_datetime": "2020-01-02T12:34:56-05:00",
+            "author": {"name": "Sally", "email": "ssmith@foo"},
+            "co_authors": [{"name": "William Simmons", "email": "bill@someplace"}],
+            "message": "something else"
+        }
+    ]}):
+        foo_repo = GitAnalysis.find_repo("foo")
+        foo_commits_as_json = [GitAnalysis.json_for_commit(commit) for commit in foo_repo.iter_commits()]
+        assert foo_commits_as_json == [
+            {
+                'author': {'email': 'jdoe@foo', 'name': 'Jdoe'},
+                'coauthors': [],
+                'commit': 'aaaa',
+                'date': '2020-01-01T01:23:45-05:00',
+                'message': 'something'
+            },
+            {
+                'author': {'email': 'ssmith@foo', 'name': 'Sally'},
+                'coauthors': [{'email': 'bill@someplace', 'name': 'William Simmons'}],
+                'commit': 'bbbb',
+                'date': '2020-01-02T12:34:56-05:00',
+                'message': 'something else'
+            }
+        ]
+
+        assert foo_commits_as_json == list(GitAnalysis.git_commits('foo'))
+
+
+def test_contributor_create():
+
+    # This should execute without error
+    c1 = Contributor(name="John Doe", email="john@whatever")
+
+    with mock.patch.object(git, "Actor", MockGitActor):
+
+        c2 = Contributor.create(author=git.Actor(name="John Doe", email="john@whatever"))
+
+    assert c1.as_dict() == c2.as_dict()
+
+
+def test_contributor_str():
+
+    with mock.patch.object(contribution_utils_module, "id", MockId(1000)):
+
+        john = Contributor(names={"John Doe", "jdoe"}, email="john@whatever")
+
+        assert str(john) == "<Contributor emails='john@whatever' names='jdoe','John Doe' 1000>"
+
+        jdoe = Contributor(names={"John Doe", "jdoe"}, primary_name="jdoe", email="john@whatever")
+
+        assert str(jdoe) == "<Contributor 'jdoe' emails='john@whatever' names='jdoe','John Doe' 1001>"
+
+        # Edge cases...
+
+        with pytest.raises(ValueError) as exc:
+            Contributor()
+        assert str(exc.value) == "One of email= or emails= is required."
+
+        with pytest.raises(ValueError) as exc:
+            Contributor(email='foo@bar', emails={'foo@bar'})
+        assert str(exc.value) == 'Only one of email= and emails= may be provided.'
+
+        with pytest.raises(ValueError) as exc:
+            Contributor(name='John', names={'John'}, email="foo@bar")
+        assert str(exc.value) == 'Only one of name= and names= may be provided.'
+
+
+def test_contributor_set_primary_name():
+
+    john = Contributor(names={"John Doe", "jdoe"}, email="john@whatever")
+    assert john.primary_name == "John Doe"
+
+    john.names.add("John Q Doe")
+    assert john.primary_name == "John Q Doe"
+
+    john.set_primary_name("John Doe")
+    assert john.primary_name == "John Doe"
+
+    john.names.add("John Quincy Doe")
+    assert john.primary_name == "John Doe"
+
+    assert "Quincy" not in john.names
+    john.set_primary_name("Quincy")
+    assert john.primary_name == "Quincy"
+    assert "Quincy" in john.names
+
+
+def test_contributor_copy():
+
+    john = Contributor(name="john", email="john@foo")
+    jack = john.copy()
+
+    assert john.names == {"john"}
+    assert john.emails == {"john@foo"}
+    assert john.primary_name == "john"
+
+    jack.names.add("jack")
+    jack.emails.add("jack@bar")
+    jack.set_primary_name("jack")
+
+    assert john.names == {"john"}
+    assert john.emails == {"john@foo"}
+    assert john.primary_name == "john"
+
+    assert jack.names == {"john", "jack"}
+    assert jack.emails == {"john@foo", "jack@bar"}
+    assert jack.primary_name == "jack"
+
+
+def test_contributor_primary_name():
+
+    email_counter = make_counter()
+
+    def make_contributor(names, primary_name=None):
+        some_email = f"user{email_counter()}@something.foo"  # unique but we don't care about the specific value
+        return Contributor(names=set(names), email=some_email, primary_name=primary_name)
+
+    assert make_contributor({"John Doe", "jdoe"}).primary_name == "John Doe"
+    assert make_contributor({"jdoe", "John Doe"}).primary_name == "John Doe"
+    assert make_contributor({"jdoe", "John Doe", "jdoe123456789"}).primary_name == "John Doe"
+    assert make_contributor({"jdoe123456789", "John Doe", "jdoe"}).primary_name == "John Doe"
+
+    assert make_contributor({"jdoe123456789", "John Doe", "John Q Doe", "jdoe"}).primary_name == "John Q Doe"
+    assert make_contributor({"jdoe123456789", "John Q Doe", "John Doe", "jdoe"}).primary_name == "John Q Doe"
+
+    assert make_contributor({"John Doe", "jdoe"}, primary_name="jdoe").primary_name == "jdoe"
+    assert make_contributor({"jdoe", "John Doe"}, primary_name="jdoe").primary_name == "jdoe"
+    assert make_contributor({"jdoe", "John Doe", "jdoe123456789"}, primary_name="jdoe").primary_name == "jdoe"
+    assert make_contributor({"jdoe123456789", "John Doe", "jdoe"}, primary_name="jdoe").primary_name == "jdoe"
+
+
+def test_contributor_from_dict():
+
+    joe_json = {'names': ["Joe"], "emails": ["joe@wherever"]}
+
+    joe_obj = Contributor.from_dict(joe_json)
+
+    assert isinstance(joe_obj, Contributor)
+    assert list(joe_obj.emails) == joe_json['emails']
+    assert list(joe_obj.names) == joe_json['names']
+
+    assert joe_obj.as_dict() == joe_json
+
+    joe_obj2 = Contributor.from_dict(joe_json, key="Joseph")
+    assert joe_obj.names != joe_obj2.names
+    assert joe_obj.names | {'Joseph'} == joe_obj2.names
+
+
+def test_contributor_notice_mention_as():
+
+    john = Contributor(name="john", email="john@foo")
+
+    assert john.names == {"john"}
+    assert john.emails == {"john@foo"}
+
+    john.notice_mention_as(name="jack", email="john@foo")
+
+    assert john.names == {"john", "jack"}
+    assert john.emails == {"john@foo"}
+
+    john.notice_mention_as(name="john", email="jack@bar")
+
+    assert john.names == {"john", "jack"}
+    assert john.emails == {"john@foo", "jack@bar"}
+
+    john.notice_mention_as(name="john", email="john@foo")
+
+    assert john.names == {"john", "jack"}
+    assert john.emails == {"john@foo", "jack@bar"}
+
+
+def test_contributor_as_dict():
+
+    john = Contributor(name="john", email="john@foo")
+
+    assert john.as_dict() == {
+        "names": ["john"],
+        "emails": ["john@foo"]
+    }
+
+    john.notice_mention_as(name="jack", email="john@foo")
+
+    assert john.as_dict() == {
+        "names": ["jack", "john"],
+        "emails": ["john@foo"]
+    }
+
+    john.notice_mention_as(name="Richard", email="john@foo")
+
+    assert john.as_dict() == {
+        # As it happens, our sort is case-sensitive. See notes in source, but that's just because it was easiest.
+        # This test is just to notice on a simple example if that changes, since bigger examples could be harder
+        # to debug. -kmp 27-Jul-2023
+        "names": ["Richard", "jack", "john"],
+        "emails": ["john@foo"]
+    }
+
+
+def test_contributor_values_as_objects():
+
+    assert Contributions.contributor_values_as_objects(None) is None
+
+    idx_json = {'jdoe': {'names': ['jdoe'], 'emails': ['jdoe@somewhere']}}
+    idx = Contributions.contributor_values_as_objects(idx_json)
+    jdoe = idx['jdoe']
+
+    assert isinstance(jdoe, Contributor)
+    assert jdoe.names == set(idx_json['jdoe']['names'])
+    assert jdoe.emails == set(idx_json['jdoe']['emails'])
+
+
+def test_contributor_values_as_dicts():
+
+    assert Contributions.contributor_values_as_dicts(None) is None
+
+    jdoe = Contributor(names={'jdoe'}, emails={'jdoe@somewhere'})
+    idx = {'jdoe': jdoe}
+    idx_json = Contributions.contributor_values_as_dicts(idx)
+    jdoe_json = idx_json['jdoe']
+
+    assert isinstance(jdoe_json, dict)
+    assert set(jdoe_json['names']) == jdoe.names
+    assert set(jdoe_json['emails']) == jdoe.emails
+
+
+def test_contributor_index_by_primary_name():
+
+    john = Contributor(names={"John Doe", "jdoe"}, emails={"jdoe@a.foo"})
+    jane = Contributor(names={"jsmith", "Jane Smith"}, emails={"jsmith@b.foo"})
+
+    idx1 = {
+        "John Doe": john,
+        "Jane Smith": jane,
+        "jdoe": john,
+        "jsmith": jane,
+    }
+
+    assert list(Contributions.contributor_index_by_primary_name(idx1).items()) == [
+        ("Jane Smith", jane),
+        ("John Doe", john),
+    ]
+
+    idx2 = {
+        "jdoe": john,
+        "jsmith": jane,
+    }
+
+    assert list(Contributions.contributor_index_by_primary_name(idx2).items()) == [
+        ("Jane Smith", jane),
+        ("John Doe", john),
+    ]
+
+    idx3 = {
+        "John Doe": john,
+        "jdoe": john.copy(),  # if the info is here twice, it must be in a shared pointer, not a separate object
+        "jsmith": jane,
+    }
+
+    with pytest.raises(Exception) as exc:
+        # This should notice the improper duplication.
+        Contributions.contributor_index_by_primary_name(idx3)
+
+    assert str(exc.value).startswith("Name improperly shared")
+
+
+def test_contributions_by_name_from_by_email():
+
+    email_a = "a@somewhere"
+    email_alpha = "alpha@somewhere"
+
+    email_b = "b@somewhere"
+    email_beta = "beta@somewhere"
+
+    name_ajones = "ajones"
+    name_art = "Art Jones"
+
+    name_bsmith = "bsmith"
+    name_betty = "Betty Smith"
+
+    art_jones = Contributor(names={name_art, name_ajones}, emails={email_a, email_alpha})
+    betty_smith = Contributor(names={name_betty, name_bsmith}, emails={email_b, email_beta})
+
+    # We should really just need one entry per each person
+
+    by_name_contributors_short = {
+        name_art: art_jones,
+        name_betty: betty_smith,
+    }
+
+    assert Contributions.by_email_from_by_name(by_name_contributors_short) == {
+        email_a: art_jones,
+        email_alpha: art_jones,
+        email_b: betty_smith,
+        email_beta: betty_smith,
+    }
+
+    # It's OK for there to be more than one entry, as long as the entries share a single contributor object
+    # (or contributor json dictionary)
+
+    by_name_contributors = {
+        name_ajones: art_jones,
+        name_art: art_jones,
+        name_bsmith: betty_smith,
+        name_betty: betty_smith,
+    }
+
+    assert Contributions.by_email_from_by_name(by_name_contributors) == {
+        email_a: art_jones,
+        email_alpha: art_jones,
+        email_b: betty_smith,
+        email_beta: betty_smith,
+    }
+
+    # It works for the targets of the dictionary to be either Contributors or JSON represented Contributors.
+
+    art_jones_json = art_jones.as_dict()
+    betty_smith_json = betty_smith.as_dict()
+
+    by_name_json = {
+        name_ajones: art_jones_json,
+        name_art: art_jones_json,
+        name_bsmith: betty_smith_json,
+        name_betty: betty_smith_json,
+    }
+
+    assert Contributions.by_email_from_by_name(by_name_json) == {
+        email_a: art_jones_json,
+        email_alpha: art_jones_json,
+        email_b: betty_smith_json,
+        email_beta: betty_smith_json,
+    }
+
+    # You can't have conflicts between email addresses. That's supposed to have been resolved earlier by unification.
+
+    by_name_json_with_dups = {
+        "Art Jones": {"names": ["Art Jones"], "emails": ["ajones@somewhere"]},
+        "Arthur Jones": {"names": ["Arthur Jones"], "emails": ["ajones@somewhere"]}
+    }
+
+    with pytest.raises(Exception) as exc:
+        Contributions.by_email_from_by_name(by_name_json_with_dups)
+    assert str(exc.value) == "email address ajones@somewhere is used more than once."
+
+
+def test_contributions_traverse_terminal_node():
+
+    mariela = Contributor(names={'mari', 'mariela'}, emails={'mariela@somewhere', 'mari@elsewhere'})
+    mari = Contributor(names={'mari'}, emails={'mariela@somewhere', 'mari@elsewhere'})
+
+    seen = {mari, mariela}
+    originally_seen = seen.copy()
+
+    Contributions.traverse(root=mariela, contributors_by_name={}, contributors_by_email={},
+                           # We're testing the cursor=None case
+                           seen=seen, cursor=mari)
+
+    assert seen == originally_seen
+
+
+def test_contributions_pretty_email():
+
+    assert Contributions.pretty_email('joe@foo.com') == 'joe@foo.com'
+    assert Contributions.pretty_email('joe@users.noreply.github.com') == 'joe@github'
+    assert Contributions.pretty_email('12345+joe@users.noreply.github.com') == 'joe@github'
+
+
+def test_notice_reference_time():
+
+    timestamps = {}
+    timestamp0 = datetime.datetime(2020, 7, 1, 12, 0, 0)
+    timestamp1 = datetime.datetime(2020, 7, 1, 12, 0, 1)
+    timestamp2 = datetime.datetime(2020, 7, 1, 12, 0, 2)
+    key = 'joe'
+
+    Contributions.notice_reference_time(key=key, timestamp=timestamp1, timestamps=timestamps)
+    assert timestamps == {key: timestamp1}
+
+    Contributions.notice_reference_time(key=key, timestamp=timestamp0, timestamps=timestamps)
+    assert timestamps == {key: timestamp1}
+
+    Contributions.notice_reference_time(key=key, timestamp=timestamp2, timestamps=timestamps)
+    assert timestamps == {key: timestamp2}
+
+
+def test_existing_contributors_json_file():
+
+    mfs = MockFileSystem()
+
+    contributions = BasicContributions(repo='foo')  # don't need all the git history, etc. loaded for this test
+
+    with mfs.mock_exists_open_remove():
+
+        assert contributions.existing_contributors_json_file() is None
+
+        cache_file = contributions.contributors_json_file()
+        print("cache_file=", cache_file)
+        with io.open(cache_file, 'w') as fp:
+            fp.write('something')
+
+        assert contributions.existing_contributors_json_file() == cache_file
+
+
+def test_contributions_email_reference_time():
+
+    contributions = BasicContributions()
+    now = datetime.datetime.now()
+    then = now - datetime.timedelta(seconds=10)
+    contributions.email_timestamps = {'foo@somewhere': now, 'bar@elsewhere': then}
+
+    assert contributions.email_reference_time('foo@somewhere') == now
+    assert contributions.email_reference_time('bar@elsewhere') == then
+    assert contributions.email_reference_time('baz@anywhere') is None
+
+
+def test_contributions_name_reference_time():
+
+    contributions = BasicContributions()
+    now = datetime.datetime.now()
+    then = now - datetime.timedelta(seconds=10)
+    contributions.name_timestamps = {'foo': now, 'bar': then}
+
+    assert contributions.name_reference_time('foo') == now
+    assert contributions.name_reference_time('bar') == then
+    assert contributions.name_reference_time('baz') is None
+
+
+def test_file_cache_error_reporting():
+
+    mfs = MockFileSystem()
+    with mfs.mock_exists_open_remove():
+
+        with io.open(Contributions.CONTRIBUTORS_CACHE_FILE, 'w') as fp:
+            fp.write("{bad json}")
+
+        with printed_output() as printed:
+            with pytest.raises(json.JSONDecodeError) as exc:
+                Contributions.get_contributors_json_from_file_cache(Contributions.CONTRIBUTORS_CACHE_FILE,)
+            assert re.match("Expecting.*line 1 column 2.*", str(exc.value))
+            assert printed.lines == ["Error while reading data from 'CONTRIBUTORS.json'."]
+
+
+def test_save_contributor_data():
+
+    mfs = MockFileSystem()
+
+    with mfs.mock_exists_open_remove_abspath_getcwd_chdir():
+
+        contributions = BasicContributions()
+        cache_file = contributions.save_contributor_data()
+
+        assert file_contents(cache_file) == (
+            '{\n'
+            '  "forked_at": null,\n'
+            '  "excluded_fork": null,\n'
+            '  "pre_fork_contributors_by_name": null,\n'
+            '  "contributors_by_name": null\n'
+            '}\n'
+        )
+
+        cache_file_2 = contributions.save_contributor_data('some.file')
+        assert cache_file_2 == 'some.file'
+        assert file_contents(cache_file_2) == (
+            '{\n'
+            '  "forked_at": null,\n'
+            '  "excluded_fork": null,\n'
+            '  "pre_fork_contributors_by_name": null,\n'
+            '  "contributors_by_name": null\n'
+            '}\n'
+        )
+
+
+def test_repo_contributor_names():
+
+    contributions = BasicContributions()
+    tony = Contributor(names={"Tony", "Anthony"}, emails={"tony@foo"})
+    juan = Contributor(names={"Juan"}, emails={"juan@foo"})
+    contributions.contributors_by_name = {
+        "Tony": tony,
+        "Juan": juan,
+    }
+    assert list(contributions.repo_contributor_names(with_email=False)) == [
+        "Tony",
+        "Juan",
+    ]
+    assert list(contributions.repo_contributor_names(with_email=True)) == [
+        "Tony (tony@foo)",
+        "Juan (juan@foo)",
+    ]
+    # We assume de-duplication has already occurred, so something like this will happen.
+    # This behavior is not a feature, but then again, there's no reason to check/redo what should be done earlier.
+    contributions.contributors_by_name = {
+        "Tony": tony,
+        "Juan": juan,
+        "Anthony": tony,
+    }
+    assert list(contributions.repo_contributor_names(with_email=True)) == [
+        "Tony (tony@foo)",
+        "Juan (juan@foo)",
+        "Anthony (tony@foo)",
+    ]
+    # Note, too, that it's trusting the key because our save code puts the primary_key in the dictionary key,
+    # and if it's been edited by a human, we'll prefer that.
+    contributions.contributors_by_name = {
+        "Tony": tony,
+        "John": juan,
+    }
+    assert list(contributions.repo_contributor_names(with_email=True)) == [
+        "Tony (tony@foo)",
+        "John (juan@foo)",
+    ]
diff --git a/test/test_license_utils.py b/test/test_license_utils.py
index ea7d7666f..670b5b0e5 100644
--- a/test/test_license_utils.py
+++ b/test/test_license_utils.py
@@ -11,7 +11,7 @@
     LicenseFrameworkRegistry, LicenseFramework, PythonLicenseFramework, JavascriptLicenseFramework,
     LicenseAnalysis, LicenseChecker, LicenseStatus, LicenseFileParser,
     LicenseCheckFailure, LicenseOwnershipCheckFailure, LicenseAcceptabilityCheckFailure,
-    logger as license_logger,
+    warnings as license_utils_warnings_module,
 )
 from dcicutils.misc_utils import ignored, file_contents
 from dcicutils.qa_utils import printed_output, MockFileSystem
@@ -326,7 +326,8 @@ def checker(printed, license_warnings):
                                  ' Bar License: library2, library8, libraryA, libraryB',
                                  ' Baz License: library4, library7, library9',
                                  ' Big-Org-Approved: libraryB',
-                                 ' Misc-Copyleft: libraryD']
+                                 ' Misc-Copyleft: libraryD',
+                                 'There is 1 no-longer-missing license: library1.']
 
         javascript_failure = None
         for warning in license_warnings:
@@ -342,7 +343,7 @@ def checker(printed, license_warnings):
 
         assert edited_license_warnings == [
             "There is 1 unexpectedly missing license: library6.",
-            "There is 1 no-longer-missing license: library1.",
+            # "There is 1 no-longer-missing license: library1.",
         ]
 
     do_it()
@@ -371,7 +372,7 @@ def check_license_checker_full_scenario_failing(*, perturb_setup, checker,
                       file=fp)
                 print(license_text_for_testing, file=fp)
 
-            with mock.patch.object(license_logger, 'warning') as mock_license_logger:
+            with mock.patch.object(license_utils_warnings_module, 'warn') as mock_license_logger:
 
                 license_warnings = []
 
@@ -520,7 +521,7 @@ def test_license_checker_full_scenario_succeeding():
 
         with mock.patch.object(PythonLicenseFramework, "get_dependencies") as mock_get_dependencies:
 
-            with mock.patch.object(license_logger, 'warning') as mock_license_logger:
+            with mock.patch.object(license_utils_warnings_module, 'warn') as mock_license_logger:
 
                 license_warnings = []
 
@@ -655,7 +656,7 @@ def test_validate_simple_license_file():
 
     with mfs.mock_exists_open_remove():
 
-        with mock.patch.object(license_logger, 'warning') as mock_license_logger:
+        with mock.patch.object(license_utils_warnings_module, 'warn') as mock_license_logger:
 
             license_warnings = []
 
diff --git a/test/test_misc.py b/test/test_misc.py
index f77253cc6..b790b8d6a 100644
--- a/test/test_misc.py
+++ b/test/test_misc.py
@@ -2,7 +2,7 @@
 import pytest
 
 from dcicutils.license_utils import C4PythonInfrastructureLicenseChecker
-from dcicutils.qa_checkers import DocsChecker, DebuggingArtifactChecker, ChangeLogChecker
+from dcicutils.qa_checkers import DocsChecker, DebuggingArtifactChecker, ChangeLogChecker, ContributionsChecker
 
 from .conftest_settings import REPOSITORY_ROOT_DIR
 
@@ -48,3 +48,8 @@ class MyChangeLogChecker(ChangeLogChecker):
 def test_license_compatibility():
 
     C4PythonInfrastructureLicenseChecker.validate()
+
+
+@pytest.mark.static
+def test_contributions():
+    ContributionsChecker.validate()
diff --git a/test/test_qa_checkers.py b/test/test_qa_checkers.py
index 9f0d0ca85..f1aa706c3 100644
--- a/test/test_qa_checkers.py
+++ b/test/test_qa_checkers.py
@@ -1,12 +1,18 @@
+import io
+import json
 import os
 import pytest
 
+from dcicutils import contribution_utils as contribution_utils_module
+from dcicutils.contribution_utils import BasicContributions
 from dcicutils.misc_utils import lines_printed_to, remove_prefix
 from dcicutils.qa_checkers import (
     DebuggingArtifactChecker, DocsChecker, ChangeLogChecker, VersionChecker, confirm_no_uses, find_uses,
+    ContributionsChecker
 )
 from dcicutils.qa_utils import MockFileSystem, printed_output
 from unittest import mock
+from .test_contribution_utils import git_context, SAMPLE_PROJECT_HOME
 from .conftest_settings import TEST_DIR
 
 
@@ -252,7 +258,7 @@ def test_debugging_artifact_checker():
                 assert isinstance(exc.value, AssertionError)
                 assert str(exc.value) == "1 problem detected:\n In foo/bar.py, 1 call to print."
 
-                assert printed.lines == []  # We might at some point print the actual problems, but we don't now.
+                assert printed.lines == []  # We might at some point print the actual problems, but we don't know.
 
                 with lines_printed_to("foo/bar.py") as out:
                     out('x = 1')
@@ -262,4 +268,60 @@ def test_debugging_artifact_checker():
                 assert isinstance(exc.value, AssertionError)
                 assert str(exc.value) == "1 problem detected:\n In foo/bar.py, 1 active use of pdb.set_trace."
 
-                assert printed.lines == []  # We might at some point print the actual problems, but we don't now.
+                assert printed.lines == []  # We might at some point print the actual problems, but we don't know.
+
+
+@mock.patch.object(contribution_utils_module, "PROJECT_HOME", SAMPLE_PROJECT_HOME)
+def test_contribution_checker():
+
+    print()  # start on a fresh line
+    some_repo_name = 'foo'
+    mfs = MockFileSystem()
+    with mfs.mock_exists_open_remove_abspath_getcwd_chdir():
+        # Predict which cache file we'll need, so we can make it ahead of time.
+        contributions_cache_file = BasicContributions(repo=some_repo_name).contributors_json_file()
+        print(f"contributions_cache_file={contributions_cache_file}")
+        os.chdir(os.path.join(SAMPLE_PROJECT_HOME, some_repo_name))
+        print(f"working dir={os.getcwd()}")
+        with io.open(contributions_cache_file, 'w') as fp:
+            cache_data = {
+                "forked_at": "2015-01-01T12:34:56-05:00",
+                "excluded_fork": None,
+                "pre_fork_contributors_by_name": None,
+                "contributors_by_name": {
+                    "John Smith": {
+                        "emails": ["jsmith@somewhere"],
+                        "names": ["John Smith"],
+                    }
+                }
+            }
+            json.dump(cache_data, fp=fp)
+        mocked_commits = {
+            some_repo_name: [
+                {
+                    "hexsha": "aaaa",
+                    "committed_datetime": "2016-01-01T01:23:45-05:00",
+                    "author": {"name": "John Smith", "email": "jsmith@somewhere"},
+                    "message": "something"
+                },
+                {
+                    "hexsha": "bbbb",
+                    "committed_datetime": "2017-01-02T12:34:56-05:00",
+                    "author": {"name": "Sally", "email": "ssmith@elsewhere"},
+                    "message": "something else"
+                }
+            ]
+        }
+        with git_context(mocked_commits=mocked_commits):
+            with printed_output() as printed:
+                with pytest.raises(AssertionError) as exc:
+                    ContributionsChecker.validate()
+                assert str(exc.value) == "There are contributor cache discrepancies."
+                assert printed.lines == [
+                    "John Smith (jsmith@somewhere)",
+                    "Sally (ssmith@elsewhere)",
+                    "===== THERE ARE CONTRIBUTOR CACHE DISCREPANCIES =====",
+                    "To Add:",
+                    " * contributors.Sally.emails.ssmith@elsewhere",
+                    " * contributors.Sally.names.Sally",
+                ]