Always use the CommitCache, and make it a singleton (#78203)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/78203
Approved by: https://github.com/soulitzer
This commit is contained in:
John Clow
2022-07-07 12:34:55 -07:00
committed by PyTorch MergeBot
parent da549f58d5
commit 62bf807113
4 changed files with 25 additions and 12 deletions

1
scripts/release_notes/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
results/*

View File

@ -1,12 +1,12 @@
import argparse
import os
import textwrap
from common import categories, topics, CommitDataCache
from common import categories, topics, get_commit_data_cache
from commitlist import CommitList
class Categorizer:
def __init__(self, path, category='Uncategorized'):
self.cache = CommitDataCache()
self.cache = get_commit_data_cache()
self.commits = CommitList.from_existing(path)
# Special categories: 'Uncategorized'

View File

@ -2,9 +2,10 @@ import argparse
from common import run, topics, get_features
from collections import defaultdict
import os
from pathlib import Path
import csv
import pprint
from common import CommitDataCache
from common import get_commit_data_cache, features_to_dict
import re
import dataclasses
from typing import List
@ -68,6 +69,7 @@ class CommitList:
def write_to_disk(self):
path = self.path
rows = self.commits
os.makedirs(Path(path).parent, exist_ok=True)
with open(path, 'w') as csvfile:
writer = csv.writer(csvfile)
for commit in rows:
@ -81,7 +83,8 @@ class CommitList:
@staticmethod
def gen_commit(commit_hash):
features = get_features(commit_hash, return_dict=True)
feature_item = get_commit_data_cache().get(commit_hash)
features = features_to_dict(feature_item)
category, topic = CommitList.categorize(features)
a1, a2, a3 = (features["accepters"] + ("", "", ""))[:3]
return Commit(commit_hash, category, topic, features["title"], features["author"], a1, a2, a3)
@ -253,7 +256,7 @@ def to_markdown(commit_list, category):
return commit.title
return match.group(1)
cdc = CommitDataCache()
cdc = get_commit_data_cache()
lines = [f'\n## {category}\n']
for topic in topics:
lines.append(f'### {topic}\n')

View File

@ -1,5 +1,5 @@
from collections import namedtuple
from os.path import expanduser
from pathlib import Path
import locale
import subprocess
import re
@ -132,7 +132,7 @@ def parse_pr_number(body, commit_hash, title):
def get_ghstack_token():
pattern = 'github_oauth = (.*)'
with open(expanduser('~/.ghstackrc'), 'r+') as f:
with open(Path('~/.ghstackrc').expanduser(), 'r+') as f:
config = f.read()
matches = re.findall(pattern, config)
if len(matches) == 0:
@ -190,7 +190,7 @@ def github_data(pr_number):
return labels, author, accepters
def get_features(commit_hash, return_dict=False):
def get_features(commit_hash):
title, body, files_changed = (
commit_title(commit_hash),
commit_body(commit_hash),
@ -202,16 +202,25 @@ def get_features(commit_hash, return_dict=False):
if pr_number is not None:
labels, author, accepters = github_data(pr_number)
result = Features(title, body, pr_number, files_changed, labels, author, accepters)
if return_dict:
return features_to_dict(result)
return result
class CommitDataCache:
def __init__(self, path='results/data.json'):
_commit_data_cache = None
def get_commit_data_cache(path='results/data.json'):
global _commit_data_cache
if _commit_data_cache is None:
_commit_data_cache = _CommitDataCache(path)
return _commit_data_cache
class _CommitDataCache:
def __init__(self, path):
self.path = path
self.data = {}
if os.path.exists(path):
self.data = self.read_from_disk()
else:
os.makedirs(Path(path).parent, exist_ok=True)
def get(self, commit):
if commit not in self.data.keys():