import os from os import path from git import Repo, FetchInfo, RemoteReference ACCESS_RIGHTS = 0o700 def diff_size(diff): """ Computes the size of the diff by comparing the size of the blobs. """ if diff: if diff.b_blob is None and diff.deleted_file: # This is a deletion, so return negative the size of the original. return diff.a_blob.size * -1 if diff.a_blob is None and diff.new_file: # This is a new file, so return the size of the new value. return diff.b_blob.size # Otherwise just return the size a-b return diff.a_blob.size - diff.b_blob.size else: print("Could not get diff size because the diff was undefined") return 0 def get_change_data(git_commit): total_insertions = 0 total_deletions = 0 total_lines = 0 total_size = 0 # Determine the parent of the commit to diff against. # If no parent, this is the first commit, so use empty tree. # Then create a mapping of path to diff for each file changed. parent = git_commit.parents[0] if git_commit.parents else None if parent: diffs = { diff.a_path: diff for diff in git_commit.diff(parent) } for objpath, stats in git_commit.stats.files.items(): # Select the diff for the path in the stats diff = diffs.get(objpath) # If the path is not in the dictionary, it's because it was # renamed, so search through the b_paths for the current name. if not diff: for diff in diffs.values(): if diff.b_path == path and diff.renamed: break # Update the stats with the additional information insertions = stats['insertions'] deletions = stats['deletions'] lines = stats['lines'] size = diff_size(diff) total_insertions += insertions total_deletions += deletions total_lines += lines total_size += size return {'insertions': total_insertions, 'deletions': total_deletions, 'lines': total_lines, 'size': total_size} def get_commits(git_commit): # Iterator over repository changes starting with the given commit. processed = set() yield git_commit # return the first commit itself parents = set(git_commit.parents) next_parents = set() while len(parents) > 0: # iterate for parent in parents: # go over all parents if parent.hexsha not in processed: yield parent # return each parent processed.add(parent.hexsha) next_parents.update(parent.parents) # for the next iteration parents = set(next_parents) next_parents = set() # start again if __name__ == "__main__": workdir = "/tmp/repos" repo_url = "http://bot:bot@192.168.50.5:7990/scm/tes/test_repo.git" repo_dir = path.join(workdir, "test_repo") if not path.isdir(repo_dir): os.makedirs(repo_dir, ACCESS_RIGHTS) bare_repo = Repo.init(repo_dir, bare=True) if not bare_repo.remote(): bare_repo.create_remote('origin', repo_url) updates = [] fetch = bare_repo.remote().fetch() for info in fetch: updates.append(info) if len(updates) > 0: # Filter updates so that all items refer to "remote heads", ie. not eg. tags remote_heads = [i for i in updates if isinstance(i.ref, RemoteReference)] for info in remote_heads: print("Fetching commits for {}".format(info)) # Get the commits commits = get_commits(info.commit) for commit in commits: changes = get_change_data(commit) print("Commit '{}' changes {}".format(commit.hexsha, changes))