diff --git a/pip/requirements.in b/pip/requirements.in index 9a214e66f..ebf4183de 100644 --- a/pip/requirements.in +++ b/pip/requirements.in @@ -19,6 +19,7 @@ requests[socks] send2trash stringcase waitress +fluent-syntax # windows only psutil; sys.platform == "win32" diff --git a/pip/requirements.txt b/pip/requirements.txt index 6e4e379fc..e97824beb 100755 --- a/pip/requirements.txt +++ b/pip/requirements.txt @@ -14,6 +14,7 @@ decorator==4.4.2 # via -r requirements.in distro==1.5.0 # via -r requirements.in flask-cors==3.0.9 # via -r requirements.in flask==1.1.2 # via -r requirements.in, flask-cors +fluent-syntax==0.18.1 # via -r requirements.in idna==2.10 # via requests iniconfig==1.1.1 # via pytest isort==5.6.4 # via -r requirements.in, pylint diff --git a/scripts/BUILD.bazel b/scripts/BUILD.bazel index 464420f10..442618eb8 100644 --- a/scripts/BUILD.bazel +++ b/scripts/BUILD.bazel @@ -1,3 +1,5 @@ +load("@py_deps//:requirements.bzl", "requirement") + py_binary( name = "buildinfo", srcs = ["buildinfo.py"], @@ -5,3 +7,9 @@ py_binary( stamp = 1, visibility = ["//visibility:public"], ) + +py_binary( + name = "extract-strings", + srcs = ["extract-strings.py"], + deps = [requirement("fluent-syntax")], +) diff --git a/scripts/extract-strings.py b/scripts/extract-strings.py new file mode 100644 index 000000000..0163e6547 --- /dev/null +++ b/scripts/extract-strings.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + +""" +Tool to extract core strings and keys from .ftl files. +""" + +import os +import json +import glob +from fluent.syntax import parse +from fluent.syntax.serializer import serialize_element +from fluent.syntax.ast import Junk + +root = os.environ["BUILD_WORKSPACE_DIRECTORY"] +ftl_files = glob.glob(os.path.join(root, "ftl", "core", "*.ftl"), recursive=True) +keys_by_value = {} + +for path in ftl_files: + obj = parse(open(path).read(), with_spans=False) + for ent in obj.body: + if isinstance(ent, Junk): + raise Exception(f"file had junk! {path} {ent}") + if getattr(ent, "id", None): + key = ent.id.name + val = "".join(serialize_element(elem) for elem in ent.value.elements) + if val in keys_by_value: + print("duplicate found:", keys_by_value[val], key) + keys_by_value.setdefault(val, []).append(key) + +json.dump(keys_by_value, open(os.path.join(root, "keys_by_value.json"), "w")) +print("keys:", len(keys_by_value))