diff --git a/dev/archery/MANIFEST.in b/dev/archery/MANIFEST.in new file mode 100644 index 000000000000..90fe034c2134 --- /dev/null +++ b/dev/archery/MANIFEST.in @@ -0,0 +1,4 @@ +include ../../LICENSE.txt +include ../../NOTICE.txt + +include archery/reports/* diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index 7cb2ed2c5ff3..490e6281b87e 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -22,6 +22,7 @@ import json import logging import os +import pathlib import sys from .benchmark.compare import RunnerComparator, DEFAULT_THRESHOLD @@ -833,5 +834,106 @@ def docker_compose_images(obj): click.echo(' - {}'.format(image)) +@archery.group('release') +@click.option("--src", metavar="", default=None, + callback=validate_arrow_sources, + help="Specify Arrow source directory.") +@click.option("--jira-cache", type=click.Path(), default=None, + help="File path to cache queried JIRA issues per version.") +@click.pass_obj +def release(obj, src, jira_cache): + """Release releated commands.""" + from .release import Jira, CachedJira + + jira = Jira() + if jira_cache is not None: + jira = CachedJira(jira_cache, jira=jira) + + obj['jira'] = jira + obj['repo'] = src.path + + +@release.command('curate') +@click.argument('version') +@click.pass_obj +def release_curate(obj, version): + """Release curation.""" + from .release import Release + + release = Release.from_jira(version, jira=obj['jira'], repo=obj['repo']) + curation = release.curate() + + click.echo(curation.render('console')) + + +@release.group('changelog') +def release_changelog(): + """Release changelog.""" + pass + + +@release_changelog.command('add') +@click.argument('version') +@click.pass_obj +def release_changelog_add(obj, version): + """Prepend the changelog with the current release""" + from .release import Release + + jira, repo = obj['jira'], obj['repo'] + + # just handle the current version + release = Release.from_jira(version, jira=jira, repo=repo) + if release.is_released: + raise ValueError('This version has been already released!') + + changelog = release.changelog() + changelog_path = pathlib.Path(repo) / 'CHANGELOG.md' + + current_content = changelog_path.read_text() + new_content = changelog.render('markdown') + current_content + + changelog_path.write_text(new_content) + click.echo("CHANGELOG.md is updated!") + + +@release_changelog.command('regenerate') +@click.pass_obj +def release_changelog_regenerate(obj): + """Regeneretate the whole CHANGELOG.md file""" + from .release import Release + + jira, repo = obj['jira'], obj['repo'] + changelogs = [] + + for version in jira.arrow_versions(): + if not version.released: + continue + release = Release.from_jira(version, jira=jira, repo=repo) + click.echo('Querying changelog for version: {}'.format(version)) + changelogs.append(release.changelog()) + + click.echo('Rendering new CHANGELOG.md file...') + changelog_path = pathlib.Path(repo) / 'CHANGELOG.md' + with changelog_path.open('w') as fp: + for cl in changelogs: + fp.write(cl.render('markdown')) + + +@release.command('cherry-pick') +@click.pass_obj +def release_cherry_pick(obj): + """Cherry pick commits.""" + from .release import PatchRelease + + release = obj['release'] + if not isinstance(release, PatchRelease): + raise click.UsageError('Cherry-pick command only supported for patch ' + 'releases') + + commands = release.generate_update_branch_commands() + for cmd in commands: + click.echo(cmd) + + if __name__ == "__main__": archery(obj={}) diff --git a/dev/archery/archery/release.py b/dev/archery/archery/release.py new file mode 100644 index 000000000000..cdbf372aea4e --- /dev/null +++ b/dev/archery/archery/release.py @@ -0,0 +1,421 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from collections import defaultdict +import functools +import os +import re +import shelve +import warnings + +from git import Repo +from jira import JIRA +from semver import VersionInfo as SemVer + +from .utils.source import ArrowSources +from .utils.report import JinjaReport + + +def cached_property(fn): + return property(functools.lru_cache(maxsize=1)(fn)) + + +class JiraVersion(SemVer): + + __slots__ = SemVer.__slots__ + ('released', 'release_date') + + def __init__(self, original_jira_version): + super().__init__(**SemVer.parse(original_jira_version.name).to_dict()) + self.released = original_jira_version.released + self.release_date = getattr(original_jira_version, 'releaseDate', None) + + +class JiraIssue: + + def __init__(self, original_jira_issue): + self.key = original_jira_issue.key + self.type = original_jira_issue.fields.issuetype.name + self.summary = original_jira_issue.fields.summary + + @property + def project(self): + return self.key.split('-')[0] + + @property + def number(self): + return int(self.key.split('-')[1]) + + +class Jira(JIRA): + + def __init__(self, user=None, password=None): + user = user or os.environ.get('APACHE_JIRA_USER') + password = password or os.environ.get('APACHE_JIRA_PASSWORD') + super().__init__( + {'server': 'https://issues.apache.org/jira'}, + basic_auth=(user, password) + ) + + def arrow_version(self, version_string): + # query version from jira to populated with additional metadata + versions = self.arrow_versions() + # JiraVersion instances are comparable with strings + return versions[versions.index(version_string)] + + def arrow_versions(self): + versions = [] + for v in self.project_versions('ARROW'): + try: + versions.append(JiraVersion(v)) + except ValueError: + # ignore invalid semantic versions like JS-0.4.0 + continue + return sorted(versions, reverse=True) + + def issue(self, key): + return JiraIssue(super().issue(key)) + + def arrow_issues(self, version): + query = "project=ARROW AND fixVersion={}".format(version) + issues = self.search_issues(query, maxResults=False) + return list(map(JiraIssue, issues)) + + +class CachedJira: + + def __init__(self, cache_path, jira=None): + self.jira = jira or Jira() + self.cache_path = cache_path + + def __getattr__(self, name): + attr = getattr(self.jira, name) + return self._cached(name, attr) if callable(attr) else attr + + def _cached(self, name, method): + def wrapper(*args, **kwargs): + key = str((name, args, kwargs)) + with shelve.open(self.cache_path) as cache: + try: + result = cache[key] + except KeyError: + cache[key] = result = method(*args, **kwargs) + return result + return wrapper + + +_TITLE_REGEX = re.compile( + r"(?P(?P(ARROW|PARQUET))\-\d+)?\s*:?\s*" + r"(?P\[.*\])?\s*(?P.*)" +) +_COMPONENT_REGEX = re.compile(r"\[([^\[\]]+)\]") + + +class CommitTitle: + + def __init__(self, summary, project=None, issue=None, components=None): + self.project = project + self.issue = issue + self.components = components or [] + self.summary = summary + + def __str__(self): + out = "" + if self.issue: + out += "{}: ".format(self.issue) + if self.components: + for component in self.components: + out += "[{}]".format(component) + out += " " + out += self.summary + return out + + @classmethod + def parse(cls, headline): + matches = _TITLE_REGEX.match(headline) + if matches is None: + warnings.warn( + "Unable to parse commit message `{}`".format(headline) + ) + return CommitTitle(headline) + + values = matches.groupdict() + components = values.get('components') or '' + components = _COMPONENT_REGEX.findall(components) + + return CommitTitle( + values['summary'], + project=values.get('project'), + issue=values.get('issue'), + components=components + ) + + +class Commit: + + def __init__(self, wrapped): + self._title = CommitTitle.parse(wrapped.summary) + self._wrapped = wrapped + + def __getattr__(self, attr): + if hasattr(self._title, attr): + return getattr(self._title, attr) + else: + return getattr(self._wrapped, attr) + + def __repr__(self): + template = '' + return template.format(self.hexsha, self.issue, self.components, + self.summary) + + @property + def url(self): + return 'https://github.com/apache/arrow/commit/{}'.format(self.hexsha) + + @property + def title(self): + return self._title + + +class ReleaseCuration(JinjaReport): + templates = { + 'console': 'release_curation.txt.j2' + } + fields = [ + 'release', + 'within', + 'outside', + 'nojira', + 'parquet', + 'nopatch' + ] + + +class JiraChangelog(JinjaReport): + templates = { + 'markdown': 'release_changelog.md.j2', + 'html': 'release_changelog.html.j2' + } + fields = [ + 'release', + 'categories' + ] + + +class Release: + + def __init__(self): + raise TypeError("Do not initialize Release class directly, use " + "Release.from_jira(version) instead.") + + def __repr__(self): + if self.version.released: + status = "released_at={!r}".format(self.version.release_date) + else: + status = "pending" + return "<{} {!r} {}>".format(self.__class__.__name__, + str(self.version), status) + + @classmethod + def from_jira(cls, version, jira=None, repo=None): + jira = jira or Jira() + + if repo is None: + arrow = ArrowSources.find() + repo = Repo(arrow.path) + else: + repo = Repo(repo) + + if isinstance(version, str): + version = jira.arrow_version(version) + elif not isinstance(version, JiraVersion): + raise TypeError(version) + + # decide the type of the release based on the version number + klass = Release if version.patch == 0 else PatchRelease + + # prevent instantiating release object directly + obj = klass.__new__(klass) + obj.version = version + obj.jira = jira + obj.repo = repo + + return obj + + @property + def is_released(self): + return self.version.released + + @property + def tag(self): + return "apache-arrow-{}".format(str(self.version)) + + @property + def branch(self): + # TODO(kszucs): add apache remote + return "master" + + @cached_property + def previous(self): + # select all non-patch releases + versions = [v for v in self.jira.arrow_versions() if v.patch == 0] + position = versions.index(self.version) + 1 + if position == len(versions): + # first release doesn't have a previous one + return None + previous = versions[position] + return Release.from_jira(previous) + + @cached_property + def issues(self): + return {i.key: i for i in self.jira.arrow_issues(self.version)} + + @cached_property + def commits(self): + """ + All commits applied between two versions on the master branch. + """ + if self.previous is None: + # first release + lower = '' + else: + lower = self.repo.tags[self.previous.tag] + + if self.version.released: + upper = self.repo.tags[self.tag] + else: + try: + upper = self.repo.branches[self.branch] + except IndexError: + warnings.warn("Release branch `{}` doesn't exist." + .format(self.branch)) + return [] + + commit_range = "{}..{}".format(lower, upper) + return list(map(Commit, self.repo.iter_commits(commit_range))) + + def curate(self): + # handle commits with parquet issue key specially and query them from + # jira and add it to the issues + release_issues = self.issues + + within, outside, nojira, parquet = [], [], [], [] + for c in self.commits: + if c.issue is None: + nojira.append(c) + elif c.issue in release_issues: + within.append((release_issues[c.issue], c)) + elif c.project == 'PARQUET': + parquet.append((self.jira.issue(c.issue), c)) + else: + outside.append((self.jira.issue(c.issue), c)) + + # remaining jira tickets + within_keys = {i.key for i, c in within} + nopatch = [issue for key, issue in release_issues.items() + if key not in within_keys] + + return ReleaseCuration(release=self, within=within, outside=outside, + nojira=nojira, parquet=parquet, nopatch=nopatch) + + def changelog(self): + release_issues = [] + + # get organized report for the release + curation = self.curate() + + # jira tickets having patches in the release + for issue, _ in curation.within: + release_issues.append(issue) + + # jira tickets without patches + for issue in curation.nopatch: + release_issues.append(issue) + + # parquet patches in the release + for issue, _ in curation.parquet: + release_issues.append(issue) + + # organize issues into categories + issue_types = { + 'Bug': 'Bug Fixes', + 'Improvement': 'New Features and Improvements', + 'New Feature': 'New Features and Improvements', + 'Sub-task': 'New Features and Improvements', + 'Task': 'New Features and Improvements', + 'Test': 'Bug Fixes', + 'Wish': 'New Features and Improvements', + } + categories = defaultdict(list) + for issue in release_issues: + categories[issue_types[issue.type]].append(issue) + + # sort issues by the issue key in ascending order + for name, issues in categories.items(): + issues.sort(key=lambda issue: (issue.project, issue.number)) + + return JiraChangelog(release=self, categories=categories) + + +class PatchRelease(Release): + + @property + def branch(self): + # TODO(kszucs): add apache remote + return "maint-{}.{}.x".format(self.version.major, self.version.minor) + + @cached_property + def previous(self): + # select all releases under this minor + versions = [v for v in self.jira.arrow_versions() + if v.minor == self.version.minor] + previous = versions[versions.index(self.version) + 1] + return Release.from_jira(previous) + + def generate_update_branch_commands(self): + # cherry pick not yet cherry picked commits on top of the maintenance + # branch + try: + target = self.repo.branches[self.branch] + except IndexError: + # maintenance branch doesn't exist yet, so create one based off of + # the previous git tag + target = self.repo.create_head(self.branch, self.previous.tag) + + # collect commits applied on master since the root of the maintenance + # branch (the minor release of this patch release) + commit_range = "apache-arrow-{}.{}.0..master".format( + self.version.major, self.version.minor + ) + commits = list(map(Commit, self.repo.iter_commits(commit_range))) + + # iterate over commits applied on master and keep the original order of + # the commits to minimize the merge conflicts during cherry-picks + patch_commits = [c for c in commits if c.issue in self.issues] + + commands = [ + 'git checkout -b {} {}'.format(target, self.previous.tag) + ] + for c in reversed(patch_commits): + commands.append( + 'git cherry-pick {} # {}'.format(c.hexsha, c.title) + ) + + return commands + + # TODO(kszucs): update_branch method which tries to cherry pick to a + # temporary branch and if the patches apply cleanly then update the maint + # reference diff --git a/dev/release/update-changelog.sh b/dev/archery/archery/templates/release_changelog.md.j2 old mode 100755 new mode 100644 similarity index 71% rename from dev/release/update-changelog.sh rename to dev/archery/archery/templates/release_changelog.md.j2 index 4fda016d1d59..c0406ddf4e22 --- a/dev/release/update-changelog.sh +++ b/dev/archery/archery/templates/release_changelog.md.j2 @@ -1,5 +1,4 @@ -#!/bin/bash -# +{# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -16,16 +15,15 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -# -set -e - -SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +#} +# Apache Arrow {{ release.version }} ({{ release.version.release_date or today() }}) -version=$1 +{% for category, issues in categories.items() -%} -CHANGELOG=$SOURCE_DIR/../../CHANGELOG.md +## {{ category }} -${PYTHON:-python} $SOURCE_DIR/changelog.py $version 0 $CHANGELOG +{% for issue in issues -%} +* [{{ issue.key }}](https://issues.apache.org/jira/browse/{{ issue.key }}) - {{ issue.summary | md }} +{% endfor %} -git add $CHANGELOG -git commit -m "[Release] Update CHANGELOG.md for $version" +{% endfor %} diff --git a/dev/archery/archery/templates/release_curation.txt.j2 b/dev/archery/archery/templates/release_curation.txt.j2 new file mode 100644 index 000000000000..a5d11e9d4af5 --- /dev/null +++ b/dev/archery/archery/templates/release_curation.txt.j2 @@ -0,0 +1,41 @@ +{# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +#} +Total number of JIRA tickets assigned to version {{ release.version }}: {{ release.issues|length }} + +Total number of applied patches since version {{ release.previous.version }}: {{ release.commits|length }} + +Patches with assigned issue in version {{ release.version }}: +{% for issue, commit in within -%} + - {{ commit.url }} {{ commit.title }} +{% endfor %} + +Patches with assigned issue outside of version {{ release.version }}: +{% for issue, commit in outside -%} + - {{ commit.url }} {{ commit.title }} +{% endfor %} + +Patches in version {{ release.version }} without a linked issue: +{% for commit in nojira -%} + - {{ commit.url }} {{ commit.title }} +{% endfor %} + +JIRA issues in version {{ release.version }} without a linked patch: +{% for issue in nopatch -%} + - https://issues.apache.org/jira/browse/{{ issue.key }} +{% endfor %} diff --git a/dev/archery/archery/utils/report.py b/dev/archery/archery/utils/report.py new file mode 100644 index 000000000000..6c7587ddd872 --- /dev/null +++ b/dev/archery/archery/utils/report.py @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from abc import ABCMeta, abstractmethod +import datetime + +import jinja2 + + +def markdown_escape(s): + for char in ('*', '#', '_', '~', '`', '>'): + s = s.replace(char, '\\' + char) + return s + + +class Report(metaclass=ABCMeta): + + def __init__(self, **kwargs): + for field in self.fields: + if field not in kwargs: + raise ValueError('Missing keyword argument {}'.format(field)) + self._data = kwargs + + def __getattr__(self, key): + return self._data[key] + + @abstractmethod + def fields(self): + pass + + @property + @abstractmethod + def templates(self): + pass + + +class JinjaReport(Report): + + def __init__(self, **kwargs): + self.env = jinja2.Environment( + loader=jinja2.PackageLoader('archery', 'templates') + ) + self.env.filters['md'] = markdown_escape + self.env.globals['today'] = datetime.date.today + super().__init__(**kwargs) + + def render(self, template_name): + template_path = self.templates[template_name] + template = self.env.get_template(template_path) + return template.render(**self._data) diff --git a/dev/archery/setup.py b/dev/archery/setup.py index dd1b55542b41..8823ace45cad 100755 --- a/dev/archery/setup.py +++ b/dev/archery/setup.py @@ -27,7 +27,8 @@ extras = { 'benchmark': ['pandas'], 'bot': ['ruamel.yaml', 'pygithub'], - 'docker': ['ruamel.yaml', 'python-dotenv'] + 'docker': ['ruamel.yaml', 'python-dotenv'], + 'release': ['jinja2', 'jira', 'semver', 'gitpython'] } extras['all'] = list(set(functools.reduce(operator.add, extras.values()))) @@ -45,6 +46,7 @@ 'archery.lang', 'archery.utils' ], + include_package_data=True, install_requires=['click>=7'], tests_require=['pytest', 'responses'], extras_require=extras, diff --git a/dev/release/00-prepare.sh b/dev/release/00-prepare.sh index 2e357f09d68a..7b054e9ef9f2 100755 --- a/dev/release/00-prepare.sh +++ b/dev/release/00-prepare.sh @@ -195,7 +195,9 @@ tag=apache-arrow-${version} if [ ${PREPARE_CHANGELOG} -gt 0 ]; then echo "Updating changelog for $version" # Update changelog - $SOURCE_DIR/update-changelog.sh $version + archery release changelog add $version + git add ${SOURCE_DIR}/../../CHANGELOG.md + git commit -m "[Release] Update CHANGELOG.md for $version" fi if [ ${PREPARE_LINUX_PACKAGES} -gt 0 ]; then diff --git a/dev/release/changelog.py b/dev/release/changelog.py deleted file mode 100755 index 4e85e46a0765..000000000000 --- a/dev/release/changelog.py +++ /dev/null @@ -1,247 +0,0 @@ -#!/usr/bin/env python - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Utility for generating changelogs for fix versions -# requirements: pip install jira -# Set $JIRA_USERNAME, $JIRA_PASSWORD environment variables - -from __future__ import print_function - -from collections import defaultdict -from datetime import datetime -from io import StringIO -import locale -import os -import re -import sys - -import jira.client - -# ASF JIRA username -JIRA_USERNAME = os.environ["APACHE_JIRA_USERNAME"] -# ASF JIRA password -JIRA_PASSWORD = os.environ["APACHE_JIRA_PASSWORD"] - -JIRA_API_BASE = "https://issues.apache.org/jira" - -asf_jira = jira.client.JIRA(options={'server': JIRA_API_BASE}, - basic_auth=(JIRA_USERNAME, JIRA_PASSWORD)) - - -locale.setlocale(locale.LC_ALL, 'en_US.utf-8') - - -release_dir = os.path.realpath(os.path.dirname(__file__)) -ARROW_ROOT_DEFAULT = os.path.join(release_dir, '..', '..') -ARROW_ROOT = os.environ.get("ARROW_ROOT", ARROW_ROOT_DEFAULT) - - -def get_issues_for_version(version): - jql = ("project=ARROW " - "AND fixVersion='{0}' " - "AND status = Resolved " - "AND resolution in (Fixed, Done) " - "ORDER BY issuetype DESC").format(version) - - return asf_jira.search_issues(jql, maxResults=9999) - - -def get_last_major_version(current_version): - # TODO: This doesn't work for generating a changelog for the _first_ major - # release, but we probably don't care - major_versions = [ - v for v in asf_jira.project('ARROW').versions - if v.name[0].isdigit() and v.name.split('.')[-1] == '0' - ] - - # Sort the versions - def sort_version(x): - major, minor, patch = x.name.split('.') - return int(major), int(minor) - - major_versions.sort(key=sort_version) - - # Find index of version being released - current_version_index = ([x.name for x in major_versions] - .index(current_version)) - - return major_versions[current_version_index - 1] - - -def get_jiras_from_git_changelog(current_version): - # We use this to get the resolved PARQUET JIRAs - from subprocess import check_output - - last_major_version = get_last_major_version(current_version) - - # Path to .git directory - git_dir = os.path.join(ARROW_ROOT, '.git') - - cmd = ['git', '--git-dir', git_dir, 'log', '--pretty=format:%s', - 'apache-arrow-{}..apache-arrow-{}'.format(last_major_version, - current_version)] - output = check_output(cmd).decode('utf-8') - - resolved_jiras = [] - regex = re.compile(r'[a-zA-Z]+-[0-9]+') - for desc in output.splitlines(): - maybe_jira = desc.split(':')[0] - - # Sometimes people forget the colon - maybe_jira = maybe_jira.split(' ')[0] - if regex.match(maybe_jira): - resolved_jiras.append(maybe_jira) - - return resolved_jiras - - -LINK_TEMPLATE = '[{0}](https://issues.apache.org/jira/browse/{0})' - - -def format_changelog_markdown(issues, out): - issues_by_type = defaultdict(list) - for issue in issues: - issues_by_type[issue.fields.issuetype.name].append(issue) - - for issue_type, issue_group in sorted(issues_by_type.items()): - issue_group.sort(key=lambda x: x.key) - - out.write('## {0}\n\n'.format(_escape_for_markdown(issue_type))) - for issue in issue_group: - markdown_summary = _escape_for_markdown(issue.fields.summary) - out.write('* {0} - {1}\n'.format(issue.key, - markdown_summary)) - out.write('\n') - - -def _escape_for_markdown(x): - return ( - x.replace('_', r'\_') # underscores - .replace('`', r'\`') # backticks - .replace('*', r'\*') # asterisks - ) - - -def format_changelog_website(issues, out): - NEW_FEATURE = 'New Features and Improvements' - BUGFIX = 'Bug Fixes' - - CATEGORIES = { - 'New Feature': NEW_FEATURE, - 'Improvement': NEW_FEATURE, - 'Wish': NEW_FEATURE, - 'Task': NEW_FEATURE, - 'Test': BUGFIX, - 'Bug': BUGFIX, - 'Sub-task': NEW_FEATURE - } - - issues_by_category = defaultdict(list) - for issue in issues: - issue_type = issue.fields.issuetype.name - website_category = CATEGORIES[issue_type] - issues_by_category[website_category].append(issue) - - WEBSITE_ORDER = [NEW_FEATURE, BUGFIX] - - for issue_category in WEBSITE_ORDER: - issue_group = issues_by_category[issue_category] - issue_group.sort(key=lambda x: x.key) - - out.write('## {0}\n\n'.format(issue_category)) - for issue in issue_group: - name = LINK_TEMPLATE.format(issue.key) - markdown_summary = _escape_for_markdown(issue.fields.summary) - out.write('* {0} - {1}\n' - .format(name, markdown_summary)) - out.write('\n') - - -def get_resolved_parquet_issues(version): - git_resolved_jiras = set(get_jiras_from_git_changelog(version)) - - # We don't assume that resolved Parquet issues are found in a single Fix - # Version, so for now we query them all and then select only the ones that - # are found in the git log - jql = ("project=PARQUET " - "AND component='parquet-cpp' " - "AND status = Resolved " - "AND resolution in (Fixed, Done) " - "ORDER BY issuetype DESC") - - all_issues = asf_jira.search_issues(jql, maxResults=9999) - return [issue for issue in all_issues if issue.key in git_resolved_jiras] - - -def get_changelog(version, for_website=False): - issues_for_version = get_issues_for_version(version) - - # Infer resolved Parquet issues, since these can only really be known by - # looking at the git log - parquet_issues = get_resolved_parquet_issues(version) - issues_for_version.extend(parquet_issues) - - buf = StringIO() - - if for_website: - format_changelog_website(issues_for_version, buf) - else: - format_changelog_markdown(issues_for_version, buf) - - return buf.getvalue() - - -def append_changelog(version, changelog_path): - new_changelog = get_changelog(version) - - with open(changelog_path, 'r') as f: - old_changelog = f.readlines() - - result = StringIO() - # Header - print(''.join(old_changelog[:18]), file=result) - - # New version - today = datetime.today().strftime('%d %B %Y') - print('# Apache Arrow {0} ({1})'.format(version, today), - end='', file=result) - print('\n', file=result) - print(new_changelog, end='', file=result) - - # Prior versions - print(''.join(old_changelog[19:]), file=result) - - with open(changelog_path, 'w') as f: - f.write(result.getvalue().rstrip() + '\n') - - -if __name__ == '__main__': - if len(sys.argv) < 2: - print('Usage: changelog.py $FIX_VERSION [$IS_WEBSITE] ' - '[$CHANGELOG_TO_UPDATE]') - - for_website = len(sys.argv) > 2 and sys.argv[2] == '1' - - version = sys.argv[1] - - if len(sys.argv) > 3: - changelog_path = sys.argv[3] - append_changelog(version, changelog_path) - else: - print(get_changelog(version, for_website=for_website)) diff --git a/dev/release/post-03-website.sh b/dev/release/post-03-website.sh index 58121f584b8b..08e5f3a3262a 100755 --- a/dev/release/post-03-website.sh +++ b/dev/release/post-03-website.sh @@ -140,6 +140,7 @@ cat <> "${announce_file}" ANNOUNCE +# TODO(kszucs): needs to update for the new changelog generation with archery ${PYTHON:-python} "${SOURCE_DIR}/changelog.py" ${version} 1 | \ sed -e 's/^#/##/g' >> "${announce_file}" diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index 158790d33203..25fff1996321 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -10,6 +10,7 @@ .github/ISSUE_TEMPLATE/question.md ci/etc/rprofile ci/etc/*.patch +CHANGELOG.md cpp/CHANGELOG_PARQUET.md cpp/src/arrow/io/mman.h cpp/src/arrow/util/random.h @@ -41,6 +42,7 @@ cpp/src/plasma/thirdparty/dlmalloc.c cpp/thirdparty/flatbuffers/include/flatbuffers/base.h cpp/thirdparty/flatbuffers/include/flatbuffers/flatbuffers.h cpp/thirdparty/flatbuffers/include/flatbuffers/stl_emulation.h +dev/archery/MANIFEST.in dev/archery/requirements*.txt dev/archery/archery/tests/fixtures/* dev/release/rat_exclude_files.txt diff --git a/dev/tasks/crossbow.py b/dev/tasks/crossbow.py index adabe66daa4f..cb14de6415b0 100755 --- a/dev/tasks/crossbow.py +++ b/dev/tasks/crossbow.py @@ -29,7 +29,7 @@ from io import StringIO from pathlib import Path from textwrap import dedent -from datetime import datetime, date +from datetime import date from functools import partial import click @@ -66,19 +66,6 @@ CWD = Path(__file__).parent.absolute() -NEW_FEATURE = 'New Features and Improvements' -BUGFIX = 'Bug Fixes' - - -def md(template, *args, **kwargs): - """Wraps string.format with naive markdown escaping""" - def escape(s): - for char in ('*', '#', '_', '~', '`', '>'): - s = s.replace(char, '\\' + char) - return s - return template.format(*map(escape, args), **toolz.valmap(escape, kwargs)) - - def unflatten(mapping): """Converts a flat tuple => object mapping to hierarchical one""" result = {} @@ -162,95 +149,6 @@ def unflatten_tree(files): } -class JiraChangelog: - - def __init__(self, version, username, password, - server='https://issues.apache.org/jira'): - import jira.client - self.server = server - # clean version to the first numbers - self.version = '.'.join(version.split('.')[:3]) - query = ("project=ARROW " - "AND fixVersion='{0}' " - "AND status = Resolved " - "AND resolution in (Fixed, Done) " - "ORDER BY issuetype DESC").format(self.version) - self.client = jira.client.JIRA({'server': server}, - basic_auth=(username, password)) - self.issues = self.client.search_issues(query, maxResults=9999) - - def format_markdown(self): - out = StringIO() - - issues_by_type = toolz.groupby(lambda i: i.fields.issuetype.name, - self.issues) - for typename, issues in sorted(issues_by_type.items()): - issues.sort(key=lambda x: x.key) - - out.write(md('## {}\n\n', typename)) - for issue in issues: - out.write(md('* {} - {}\n', issue.key, issue.fields.summary)) - out.write('\n') - - return out.getvalue() - - def format_website(self): - # jira category => website category mapping - categories = { - 'New Feature': 'feature', - 'Improvement': 'feature', - 'Wish': 'feature', - 'Task': 'feature', - 'Test': 'bug', - 'Bug': 'bug', - 'Sub-task': 'feature' - } - titles = { - 'feature': 'New Features and Improvements', - 'bugfix': 'Bug Fixes' - } - - issues_by_category = toolz.groupby( - lambda issue: categories[issue.fields.issuetype.name], - self.issues - ) - - out = StringIO() - - for category in ('feature', 'bug'): - title = titles[category] - issues = issues_by_category[category] - issues.sort(key=lambda x: x.key) - - out.write(md('## {}\n\n', title)) - for issue in issues: - link = md('[{0}]({1}/browse/{0})', issue.key, self.server) - out.write(md('* {} - {}\n', link, issue.fields.summary)) - out.write('\n') - - return out.getvalue() - - def render(self, old_changelog, website=False): - old_changelog = old_changelog.splitlines() - if website: - new_changelog = self.format_website() - else: - new_changelog = self.format_markdown() - - out = StringIO() - - # Apache license header - out.write('\n'.join(old_changelog[:18])) - - # Newly generated changelog - today = datetime.today().strftime('%d %B %Y') - out.write(md('\n\n# Apache Arrow {} ({})\n\n', self.version, today)) - out.write(new_changelog) - out.write('\n'.join(old_changelog[19:])) - - return out.getvalue().strip() - - class GitRemoteCallbacks(PygitRemoteCallbacks): def __init__(self, token): @@ -1488,38 +1386,6 @@ def crossbow(ctx, github_token, arrow_path, queue_path, queue_remote, github_token=github_token, require_https=True) -@crossbow.command() -@click.option('--changelog-path', '-c', type=click.Path(exists=True), - default=str(DEFAULT_ARROW_PATH / 'CHANGELOG.md'), - help='Path of changelog to update') -@click.option('--arrow-version', '-v', default=None, - help='Set target version explicitly') -@click.option('--is-website', '-w', default=False, is_flag=True, - help='Whether to use website format for changelog. ') -@click.option('--jira-username', '-u', default=None, help='JIRA username') -@click.option('--jira-password', '-P', default=None, help='JIRA password') -@click.option('--dry-run/--write', default=False, - help='Just display the new changelog, don\'t write it') -@click.pass_obj -def changelog(obj, changelog_path, arrow_version, is_website, jira_username, - jira_password, dry_run): - changelog_path = Path(changelog_path) - target = Target.from_repo(obj['arrow']) - version = arrow_version or target.version - - changelog = JiraChangelog(version, username=jira_username, - password=jira_password) - new_content = changelog.render(changelog_path.read_text(), - website=is_website) - - if dry_run: - click.echo(new_content) - else: - changelog_path.write_text(new_content) - click.echo('New changelog successfully generated, see git diff for the' - 'changes') - - @crossbow.command() @click.option('--config-path', '-c', type=click.Path(exists=True), default=DEFAULT_CONFIG_PATH,