diff --git a/app/lib/shared/count_topics.dart b/app/lib/shared/count_topics.dart index 1dfd51cdf..60585138c 100644 --- a/app/lib/shared/count_topics.dart +++ b/app/lib/shared/count_topics.dart @@ -2,16 +2,28 @@ // for details. All rights reserved. Use of this source code is governed by a // BSD-style license that can be found in the LICENSE file. +import 'dart:collection'; +import 'dart:io'; + +import 'package:collection/collection.dart'; import 'package:gcloud/storage.dart'; +import 'package:logging/logging.dart'; +import 'package:path/path.dart' as p; import 'package:pub_dev/package/backend.dart'; import 'package:pub_dev/package/models.dart'; import 'package:pub_dev/shared/configuration.dart'; import 'package:pub_dev/shared/datastore.dart'; import 'package:pub_dev/shared/storage.dart'; import 'package:pub_dev/shared/utils.dart'; +import 'package:source_span/source_span.dart'; +import 'package:yaml/yaml.dart'; + +import '../frontend/static_files.dart'; final topicsJsonFileName = 'topics.json'; +final _log = Logger('count_topics'); + Future countTopics() async { final topics = {}; @@ -32,3 +44,102 @@ Future countTopics() async { await uploadBytesWithRetry( reportsBucket, topicsJsonFileName, jsonUtf8Encoder.convert(topics)); } + +typedef CanonicalTopic = ({ + String topic, + String description, + Set aliases, +}); + +final canonicalTopics = () { + try { + final f = File(p.join(resolveAppDir(), '../doc/topics.yaml')); + final root = loadYamlNode(f.readAsStringSync(), sourceUrl: f.uri); + if (root is! YamlMap) { + throw SourceSpanFormatException('expected a map', root.span); + } + if (root.keys.length > 1) { + throw SourceSpanFormatException( + 'only the "topic" key is allowed', + root.span, + ); + } + + final topics = root.expectListOfObjects('topics'); + return UnmodifiableListView(topics.map((entry) { + final topic = entry.expectString('topic', maxLength: 32); + if (!isValidTopicFormat(topic)) { + throw SourceSpanFormatException('must be valid topic', entry.span); + } + if (entry.keys.length > 3) { + throw SourceSpanFormatException( + 'only keys "topic", "description" and "aliases" are allowed', + entry.span, + ); + } + return ( + topic: topic, + description: entry.expectString('description', maxLength: 160), + aliases: Set.unmodifiable(entry.expectList('aliases').nodes.map((node) { + final value = node.value; + if (value is! String) { + throw SourceSpanFormatException('must be a string', node.span); + } + if (!isValidTopicFormat(value)) { + throw SourceSpanFormatException('must be valid topic', node.span); + } + return value; + })), + ); + }).toList(growable: false)); + } on Exception catch (e, st) { + _log.shout('failed to load doc/topics.yaml', e, st); + + // This is sad, but we can just ignore it! + return UnmodifiableListView([]); + } +}(); + +/// True, if [topic] is formatted like a valid topic. +bool isValidTopicFormat(String topic) => + RegExp(r'^[a-z0-9-]{2,32}$').hasMatch(topic) && + !topic.contains('--') && + topic.startsWith(RegExp(r'^[a-z]')) && + !topic.endsWith('-'); + +extension on YamlMap { + YamlNode expectProperty(String key) { + if (nodes[key] case final YamlNode v) return v; + throw SourceSpanFormatException('expected a "$key" property', span); + } + + YamlList expectList(String key) { + final value = expectProperty(key); + if (value case final YamlList v) return v; + throw SourceSpanFormatException('"$key" must be a list', value.span); + } + + Iterable expectListOfObjects(String key) sync* { + for (final entry in expectList(key).nodes) { + if (entry is! YamlMap) { + throw SourceSpanFormatException('expected an object', entry.span); + } + yield entry; + } + } + + String expectString(String key, {int? maxLength}) { + final node = expectProperty(key); + final value = node.value; + if (value is! String) { + throw SourceSpanFormatException('"$key" must be a string', node.span); + } + if (maxLength != null && value.length > maxLength) { + throw SourceSpanFormatException( + '"$key" must be shorter than $maxLength', + node.span, + ); + } + return value; + } +} diff --git a/app/test/shared/topics_test.dart b/app/test/shared/topics_test.dart index 437d6337e..301dab648 100644 --- a/app/test/shared/topics_test.dart +++ b/app/test/shared/topics_test.dart @@ -33,4 +33,70 @@ void main() { ], ); }); + + test('isValidTopicFormat', () { + expect(isValidTopicFormat('widget'), isTrue); + expect(isValidTopicFormat('abc'), isTrue); + expect(isValidTopicFormat('foo-bar'), isTrue); + expect(isValidTopicFormat('foo42'), isTrue); + + expect(isValidTopicFormat('-widget'), isFalse); + expect(isValidTopicFormat('a'), isFalse); + expect(isValidTopicFormat('foo-'), isFalse); + expect(isValidTopicFormat('42bar'), isFalse); + }); + + test('validate doc/topics.yaml', () { + // First we ensure that topics are loaded, this validates the file format! + final topics = canonicalTopics; + + // Check if there are any duplicate topics! + final duplicates = topics.duplicates(); + if (duplicates.isNotEmpty) { + fail( + '"doc/topics.yaml" must not have duplicate entries, ' + 'found: ${duplicates.join(', ')}', + ); + } + + // Check if any canonical topics are aliases for other topics + for (final topic in topics.map((e) => e.topic)) { + if (topics.any((e) => e.aliases.contains(topic))) { + fail('The canonical topic "$topic" is also listed in "aliases"!'); + } + } + + // Check that each alias is only used once! + for (final alias in topics.expand((e) => e.aliases)) { + if (topics.where((e) => e.aliases.contains(alias)).length > 1) { + fail('The alias "$alias" is listed in "aliases" for two topics!'); + } + } + }); + + test('duplicates', () { + expect([1, 2, 3, 4, 5, 1].duplicates(), contains(1)); + expect([1, 2, 3, 4, 5].duplicates(), isEmpty); + expect([1, 2, 3, 4, 5, 5, 5].duplicates(), contains(5)); + expect([1, 2, 1, 3, 4, 5, 5, 5].duplicates(), contains(5)); + expect([5, 2, 1, 3, 4, 5, 5, 5].duplicates(), contains(5)); + }); +} + +extension on List { + /// Return elements that appear more than once in this [List]. + Set duplicates() { + final duplicates = {}; + final N = length; + for (var i = 0; i < N; i++) { + final candidate = this[i]; + for (var j = i + 1; j < N; j++) { + if (candidate == this[j]) { + duplicates.add(candidate); + break; + } + } + } + return duplicates; + } } diff --git a/doc/topics.yaml b/doc/topics.yaml new file mode 100644 index 000000000..a0f94e066 --- /dev/null +++ b/doc/topics.yaml @@ -0,0 +1,64 @@ +# Canonicalizing topics +# ===================== +# +# Packages published to pub.dev can define `topics` in `pubspec.yaml`, see; +# https://dart.dev/tools/pub/pubspec#topics +# +# The list of topics is free-form, and package authors are expected to make up +# new topics as needed. Naturally, this will lead to duplicates and topics that +# only differ in spelling. For example, one package might use the topic "widget" +# while another package uses the topic "widgets". +# +# This file aims to mitigate duplicate topics by establishing _canonical topics_ +# with descriptions and a list of aliases. +# +# Aliases for a topic will be resolved when the `pubspec.yaml` is parsed. +# Ensuring that a package tagged with the alias "widgets" will appear on pub.dev +# as if it had been tagged with the canonical topic "widget". +# Similarly, search queries will be normalized to canonical topics. +# +# Topic descriptions serve as documentation for next time an aliases is +# proposed. Descriptions can also be used in tooltips or topic listings: +# https://pub.dev/topics +# +# +# Canonical topic format +# ---------------------- +# +# Entries in the `topics` list in this document, must have the form: +# +# ```yaml +# topics: +# - topic: +# description: +# aliases: +# - +# - ... +# ``` +# +# +# Contributing +# ------------ +# +# You are welcome to submit pull-requests with additional aliases, canonical +# topics and/or improved topic descriptions. +# +# Please limit pull-requests to propose one topic per PR! +# +# +# Editorial guidelines +# -------------------- +# +# The decision on whether or not to merge two similar topics can be difficult. +# When in doubt we should error on the side of causion and avoid merging topics. +# However, if mistakes are made these changes are reversible. +# And we should not be afraid of accepting that sometimes a single topic can +# have multiple meaning, even if this makes the topic hard to use. +# +# The editorial guidelines are intended to evolve as we gain more experience +# merging/managing topics. +topics: +- topic: widget + description: Packages that contain Flutter widgets. + aliases: + - widgets