script to get statistics from git repo
This commit is contained in:
parent
3d3de17eb0
commit
814cd775bc
2 changed files with 87 additions and 19 deletions
1
scripts/metrics/.gitignore
vendored
Normal file
1
scripts/metrics/.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
tmp
|
|
@ -13,14 +13,16 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# generate statistics on the llvm github repository
|
||||
|
||||
import csv
|
||||
from collections import OrderedDict
|
||||
import datetime
|
||||
import git
|
||||
import re
|
||||
import os
|
||||
from typing import Dict, Optional, List
|
||||
|
||||
import random
|
||||
import string
|
||||
|
||||
REVISION_REGEX = re.compile(
|
||||
r'^Differential Revision: https://reviews\.llvm\.org/(.*)$',
|
||||
|
@ -30,10 +32,13 @@ REVERT_REGEX = re.compile(r'^Revert "(.+)"')
|
|||
|
||||
class MyCommit:
|
||||
|
||||
SALT = ''.join(random.choices(
|
||||
string.ascii_lowercase + string.ascii_uppercase + string.digits, k=16))
|
||||
|
||||
def __init__(self, commit: git.Commit):
|
||||
self.chash = commit.hexsha # type: str
|
||||
self.author = commit.author.email # type: str
|
||||
self.commiter = commit.committer.email # type:str
|
||||
self.author = hash(commit.author.email + MyCommit.SALT) # type: int
|
||||
self.commiter = hash(commit.committer.email + MyCommit.SALT) # type:int
|
||||
self.summary = commit.summary # type: str
|
||||
self.date = datetime.datetime.fromtimestamp(
|
||||
commit.committed_date) # type: datetime.datetime
|
||||
|
@ -61,13 +66,30 @@ class MyCommit:
|
|||
def __str__(self):
|
||||
return self.chash
|
||||
|
||||
@property
|
||||
def was_reverted(self) -> bool:
|
||||
return self.reverted_by is not None
|
||||
|
||||
@property
|
||||
def was_reviewed(self) -> bool:
|
||||
return self.phab_revision is not None
|
||||
|
||||
@property
|
||||
def is_revert(self) -> bool:
|
||||
return self.reverts is not None
|
||||
|
||||
@property
|
||||
def week(self) -> str:
|
||||
return '{}-w{}'.format(self.date.year, self.date.isocalendar()[1])
|
||||
|
||||
|
||||
class RepoStats:
|
||||
|
||||
def __init__(self):
|
||||
self.commit_by_hash = dict() # type: Dict[str, MyCommit]
|
||||
self.commit_by_summary = dict() # type: Dict[str, List[MyCommit]]
|
||||
self.commit_by_day = dict() # type: Dict[datetime.date, List[MyCommit]]
|
||||
self.commit_by_week = dict() # type: Dict[str, List[MyCommit]]
|
||||
self.commit_by_author = dict() # type: Dict[int, List[MyCommit]]
|
||||
|
||||
def parse_repo(self, git_dir: str, maxage: datetime.datetime):
|
||||
repo = git.Repo(git_dir)
|
||||
|
@ -76,10 +98,12 @@ class RepoStats:
|
|||
break
|
||||
mycommit = MyCommit(commit)
|
||||
self.commit_by_hash[mycommit.chash] = mycommit
|
||||
self.commit_by_summary.setdefault(mycommit.summary, [])
|
||||
self.commit_by_summary[mycommit.summary].append(mycommit)
|
||||
self.commit_by_day.setdefault(mycommit.day, [])
|
||||
self.commit_by_day[mycommit.day].append(mycommit)
|
||||
self.commit_by_summary.setdefault(mycommit.summary, [])\
|
||||
.append(mycommit)
|
||||
self.commit_by_week.setdefault(mycommit.week, []).append(mycommit)
|
||||
self.commit_by_author.setdefault(mycommit.author, [])\
|
||||
.append(mycommit)
|
||||
|
||||
print('Read {} commits'.format(len(self.commit_by_hash)))
|
||||
|
||||
def find_reverts(self):
|
||||
|
@ -100,22 +124,21 @@ class RepoStats:
|
|||
# TODO: try weekly stats, they might be smoother
|
||||
# https://stackoverflow.com/questions/2600775/how-to-get-week-number-in-python
|
||||
def dump_daily_stats(self):
|
||||
fieldnames = ["day", "num_commits", "num_reverts", "percentage_reverts",
|
||||
fieldnames = ["week", "num_commits", "num_reverts", "percentage_reverts",
|
||||
"num_reviewed", "percentage_reviewed"]
|
||||
csvfile = open('llvm-project-daily.csv', 'w')
|
||||
csvfile = open('tmp/llvm-project-weekly.csv', 'w')
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames,
|
||||
dialect=csv.excel)
|
||||
writer.writeheader()
|
||||
for day in sorted(self.commit_by_day.keys()):
|
||||
commits = self.commit_by_day[day]
|
||||
for week in sorted(self.commit_by_week.keys()):
|
||||
commits = self.commit_by_week[week]
|
||||
num_commits = len(commits)
|
||||
num_reverts = len([c for c in commits if c.reverts is not None])
|
||||
num_reverts = len([c for c in commits if c.is_revert])
|
||||
percentage_reverts = 100.0*num_reverts / num_commits
|
||||
num_reviewed = len([c for c in commits
|
||||
if c.phab_revision is not None])
|
||||
num_reviewed = len([c for c in commits if c.was_reviewed])
|
||||
percentage_reviewed = 100*num_reviewed / (num_commits - num_reverts)
|
||||
writer.writerow({
|
||||
"day": day,
|
||||
"week": week,
|
||||
"num_commits": num_commits,
|
||||
"num_reverts": num_reverts,
|
||||
"percentage_reverts": percentage_reverts,
|
||||
|
@ -126,24 +149,68 @@ class RepoStats:
|
|||
def dump_overall_stats(self):
|
||||
num_commits = len(self.commit_by_hash)
|
||||
num_reverts = len([c for c in self.commit_by_hash.values()
|
||||
if c.reverted_by is not None])
|
||||
if c.is_revert])
|
||||
print('Number of commits: {}'.format(num_commits))
|
||||
print('Number of reverts: {}'.format(num_reverts))
|
||||
print('percentage of reverts: {:0.2f}'.format(
|
||||
100*num_reverts / num_commits))
|
||||
|
||||
num_reviewed = len([c for c in self.commit_by_hash.values()
|
||||
if c.phab_revision is not None])
|
||||
if c.was_reviewed])
|
||||
print('Number of reviewed commits: {}'.format(num_reviewed))
|
||||
print('percentage of reviewed commits: {:0.2f}'.format(
|
||||
100*num_reviewed / num_commits))
|
||||
|
||||
num_reviewed_reverted = len([c for c in self.commit_by_hash.values()
|
||||
if c.was_reviewed and c.was_reverted])
|
||||
num_not_reviewed_reverted = len([c for c in self.commit_by_hash.values()
|
||||
if not c.was_reviewed and
|
||||
c.was_reverted])
|
||||
print('Number of reviewed that were reverted: {}'.format(num_reviewed_reverted))
|
||||
print('Number of NOT reviewed that were reverted: {}'.format(num_not_reviewed_reverted))
|
||||
print('percentage of reviewed that were reverted: {:0.2f}'.format(
|
||||
100*num_reviewed_reverted / num_reviewed))
|
||||
print('percentage of NOT reviewed that were reverted: {:0.2f}'.format(
|
||||
100*num_not_reviewed_reverted / (num_commits-num_reviewed)))
|
||||
|
||||
num_foreign_committer = len([c for c in self.commit_by_hash.values()
|
||||
if c.author != c.commiter])
|
||||
print('Number of commits where author != committer: {}'.format(
|
||||
num_foreign_committer))
|
||||
print('Percentage of commits where author != committer: {:0.2f}'.format(
|
||||
100*num_foreign_committer/num_commits))
|
||||
|
||||
def dump_author_stats(self):
|
||||
print('Number of authors: {}'.format(len(self.commit_by_author)))
|
||||
fieldnames = ["author", "num_commits", "num_reverts", "percentage_reverts",
|
||||
"num_reviewed", "percentage_reviewed"]
|
||||
csvfile = open('tmp/llvm-project-authors.csv', 'w')
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames,
|
||||
dialect=csv.excel)
|
||||
writer.writeheader()
|
||||
for author, commits in self.commit_by_author.items():
|
||||
num_commits = len(commits)
|
||||
num_reverts = len([c for c in commits if c.was_reverted])
|
||||
percentage_reverts = 100 * num_reverts / num_commits
|
||||
num_reviewed = len([c for c in commits if c.was_reviewed])
|
||||
percentage_reviewed = 100*num_reviewed / num_commits
|
||||
writer.writerow({
|
||||
"author": author,
|
||||
"num_commits": num_commits,
|
||||
"num_reverts": num_reverts,
|
||||
"percentage_reverts": percentage_reverts,
|
||||
"num_reviewed": num_reviewed,
|
||||
"percentage_reviewed": percentage_reviewed,
|
||||
})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
max_age = datetime.datetime(year=2019, month=10, day=1,
|
||||
tzinfo=datetime.timezone.utc)
|
||||
rs = RepoStats()
|
||||
# TODO: make the path configurable, and `git clone/pull`
|
||||
rs.parse_repo(os.path.expanduser('~/git/llvm-project'), max_age)
|
||||
rs.find_reverts()
|
||||
rs.dump_daily_stats()
|
||||
rs.dump_overall_stats()
|
||||
rs.dump_author_stats()
|
||||
|
|
Loading…
Reference in a new issue