From b2ebe5819a6e0d58eb34a653277671bb622134ff Mon Sep 17 00:00:00 2001 From: Matthias Klumpp Date: Sun, 27 Jan 2019 23:14:49 +0100 Subject: [PATCH] Refactor locale statistics loader Since locale can be in pretty much any installed package, more advanced logic is needed to find them. The current implementation is only for Gettext, but adding Qt translations will be much easier now. The new implementation also *requires* the presence of a tag in order for this feature to work. This will likely not be changed, explicitness just beats the complexity required for heuristics here. CC: #37 --- data/asgen-hints.json | 12 ++ src/asgen/contentsstore.d | 2 +- src/asgen/engine.d | 5 +- src/asgen/extractor.d | 6 +- src/asgen/handlers/iconhandler.d | 6 +- src/asgen/handlers/localehandler.d | 205 ++++++++++++++++++++--------- src/asgen/handlers/package.d | 2 +- 7 files changed, 170 insertions(+), 68 deletions(-) diff --git a/data/asgen-hints.json b/data/asgen-hints.json index f32c280..96dac1e 100644 --- a/data/asgen-hints.json +++ b/data/asgen-hints.json @@ -262,6 +262,18 @@ "mo-file-error": { "text": "Unable to process Gettext *.mo file for locale '{{locale}}'. The file is probably invalid, and no language information could be extracted.", "severity": "warning" +}, + +"gettext-data-not-found": { + "text": "Unable to find any translation data for specified Gettext domain '{{domain}}'.", + "severity": "warning" +}, + +"no-translation-statistics": { + "text": ["We could not extract any translation statitics, althout metadata hints were given that the data exists.", + "Please check the <translation/> metainfo tag for spelling mistakes.", + "It also makes sense to ensure all locale files are placed in the right directories (e.g. gettext .mo files in /usr/share/locale/*/LC_MESSAGES/"], + "severity": "warning" } } diff --git a/src/asgen/contentsstore.d b/src/asgen/contentsstore.d index 167753c..b4281dd 100644 --- a/src/asgen/contentsstore.d +++ b/src/asgen/contentsstore.d @@ -253,7 +253,7 @@ public: scope (exit) cur.mdb_cursor_close (); checkError (res, "mdb_cursor_open"); - auto pkgCMap = HashMap!(string, string) (32); + auto pkgCMap = HashMap!(string, string) (64); foreach (ref pkid; pkids) { MDB_val pkey = makeDbValue (pkid); MDB_val cval; diff --git a/src/asgen/engine.d b/src/asgen/engine.d index df7688c..86de8e8 100644 --- a/src/asgen/engine.d +++ b/src/asgen/engine.d @@ -48,7 +48,7 @@ import asgen.backends.ubuntu; import asgen.backends.archlinux; import asgen.backends.rpmmd; -import asgen.handlers.iconhandler; +import asgen.handlers : IconHandler, LocaleHandler; /** @@ -132,7 +132,8 @@ public: */ private void processPackages (ref Package[] pkgs, IconHandler iconh) { - auto mde = scoped!DataExtractor (dstore, iconh); + auto localeh = scoped!LocaleHandler (pkgs); + auto mde = scoped!DataExtractor (dstore, iconh, localeh); foreach (ref pkg; parallel (pkgs)) { immutable pkid = pkg.id; if (dstore.packageExists (pkid)) diff --git a/src/asgen/extractor.d b/src/asgen/extractor.d index 0c65de8..090810d 100644 --- a/src/asgen/extractor.d +++ b/src/asgen/extractor.d @@ -47,15 +47,17 @@ private: DataStore dstore; IconHandler iconh; + LocaleHandler localeh; Config conf; DataType dtype; public: - this (DataStore db, IconHandler iconHandler) + this (DataStore db, IconHandler iconHandler, LocaleHandler localeHandler) { dstore = db; iconh = iconHandler; + localeh = localeHandler; conf = Config.get (); dtype = conf.metadataType; } @@ -273,7 +275,7 @@ public: // process locale information. if (conf.feature.processLocale) - processLocaleInfoForComponent (gres, cpt); + localeh.processLocaleInfoForComponent (gres, cpt); // we don't want to run expensive font processing if we don't have a font component. // since the font handler needs to load all font data prior to processing the component, diff --git a/src/asgen/handlers/iconhandler.d b/src/asgen/handlers/iconhandler.d index 6a0a990..a827253 100644 --- a/src/asgen/handlers/iconhandler.d +++ b/src/asgen/handlers/iconhandler.d @@ -282,9 +282,11 @@ public: // so we have to cache the data. auto tmpThemes = HashMap!(string, Theme) (16); auto filesPkids = ccache.getIconFilesMap (pkgMap.keys); - foreach (fname; parallel (filesPkids.byKey, 100)) { + foreach (info; parallel (filesPkids.byKeyValue, 100)) { + immutable fname = info.key; + immutable pkgid = info.value; if (fname.startsWith ("/usr/share/pixmaps/")) { - auto pkg = getPackage (filesPkids[fname]); + auto pkg = getPackage (pkgid); if (pkg is null) continue; synchronized (this) iconFiles[fname] = pkg; diff --git a/src/asgen/handlers/localehandler.d b/src/asgen/handlers/localehandler.d index f1c0d55..4f16a35 100644 --- a/src/asgen/handlers/localehandler.d +++ b/src/asgen/handlers/localehandler.d @@ -21,15 +21,17 @@ module asgen.handlers.localehandler; private: import std.path : baseName, buildPath; -import std.uni : toLower; -import std.string : format, strip; +import std.string : format, strip, startsWith; import std.array : empty; import std.conv : to; +import std.parallelism : parallel; import appstream.Component : Component, ComponentKind; import appstream.Translation : Translation, TranslationKind; import containers: HashMap; +import asgen.logging; import asgen.result : GeneratorResult; +import asgen.backends.interfaces : Package; /** @@ -51,14 +53,16 @@ extern(C) struct GettextHeader { uint trans_sysdep_tab_offset; } -auto getDataForFile (GeneratorResult gres, const string fname) +auto getDataForFile (GeneratorResult gres, Package pkg, Component cpt, const string fname) { + if (pkg is null) + pkg = gres.pkg; const(ubyte)[] fdata; try { - fdata = gres.pkg.getFileData (fname); + fdata = pkg.getFileData (fname); } catch (Exception e) { - gres.addHint (null, "pkg-extract-error", ["fname": fname.baseName, - "pkg_fname": gres.pkg.getFilename.baseName, + gres.addHint (cpt, "pkg-extract-error", ["fname": fname.baseName, + "pkg_fname": pkg.getFilename.baseName, "error": e.msg]); return null; } @@ -72,23 +76,23 @@ long nstringsForGettextData (GeneratorResult gres, const string locale, const(ub import std.bitmanip : swapEndian; GettextHeader header; - memcpy (&header, cast(void*) moData, GettextHeader.sizeof); + memcpy (&header, cast(void*) moData, GettextHeader.sizeof); bool swapped; - if (header.magic == 0x950412de) - swapped = false; - else if (header.magic == 0xde120495) - swapped = true; - else { - gres.addHint (null, "mo-file-error", ["locale": locale]); + if (header.magic == 0x950412de) + swapped = false; + else if (header.magic == 0xde120495) + swapped = true; + else { + gres.addHint (null, "mo-file-error", ["locale": locale]); return -1; - } + } long nstrings; - if (swapped) - nstrings = header.nstrings.swapEndian; - else - nstrings = header.nstrings; + if (swapped) + nstrings = header.nstrings.swapEndian; + else + nstrings = header.nstrings; if (nstrings > 0) return nstrings -1; @@ -96,51 +100,124 @@ long nstringsForGettextData (GeneratorResult gres, const string locale, const(ub } /** - * Load localization information for the given component. + * Finds localization in a set of packages and allows extracting + * translation statistics from locale. */ -public void processLocaleInfoForComponent (GeneratorResult gres, Component cpt) +public final class LocaleHandler { - import std.path : globMatch; - import std.array : split; - immutable ckind = cpt.getKind; +private: + HashMap!(string, Package) localeIdPkgMap; - // we only can extract locale for a set of component types - // (others either don't store files or have to manually set which locale they support) - if (ckind != ComponentKind.DESKTOP_APP && - ckind != ComponentKind.CONSOLE_APP && - ckind != ComponentKind.SERVICE) - return; + public this (Package[] pkgList) + { + import std.typecons : scoped; + import asgen.contentsstore : ContentsStore; + import asgen.config : Config; + logDebug ("Creating new LocaleHandler."); - // read translation domain hints from metainfo data - auto gettextMoName = "*"; - auto translationsArr = cpt.getTranslations; - if (translationsArr.len > 0) { - import appstream.c.types : AsTranslation; - - gettextMoName = null; - for (uint i = 0; i < translationsArr.len; i++) { - // cast array data to D Screenshot and keep a reference to the C struct - auto tr = new Translation (cast (AsTranslation*) translationsArr.index (i)); - if (tr.getKind == TranslationKind.GETTEXT) - gettextMoName = tr.getId.strip; + // convert the list into a HashMap for faster lookups + auto pkgMap = HashMap!(string, Package) (64); + foreach (ref pkg; pkgList) { + immutable pkid = pkg.id; + pkgMap[pkid] = pkg; } - translationsArr.removeRange (0, translationsArr.len); + localeIdPkgMap = HashMap!(string, Package) (64); + + auto conf = Config.get; + if (!conf.feature.processLocale) + return; // don't load the expensive locale<->package mapping if we don't need it + + // open package contents cache + auto ccache = scoped!ContentsStore (); + ccache.open (conf); + + // we make the assumption here that all locale for a given domain are in one package. + // otherwise this global search will get even more insane. + foreach (info; parallel (ccache.getContentsMap (pkgMap.keys).byKeyValue, 100)) { + immutable fname = info.key; + immutable pkgid = info.value; + + if (!fname.startsWith ("/usr/share/locale/")) + continue; + immutable id = fname.baseName; + + // check if we already have a package - lookups in this HashMap are faster + // due to its smaller size and (most of the time) outweight the following additional + // lookup for the right package entity. + if (localeIdPkgMap.get (id, null) !is null) + continue; + + Package pkg; + if (pkgid !is null) + pkg = pkgMap.get (pkgid, null); + + if (pkg !is null) { + synchronized (this) localeIdPkgMap[id] = pkg; + } + } + + logDebug ("Created new LocaleHandler."); } - ulong maxNStrings = 0; - auto localeMap = HashMap!(string, ulong) (32); - foreach (ref fname; gres.pkg.contents) { + /** + * Load localization information for the given component. + */ + public void processLocaleInfoForComponent (GeneratorResult gres, Component cpt) + { + import std.path : globMatch; + import std.array : split; - if (!gettextMoName.empty) { - // Process Gettext .mo files for information - if (fname.globMatch ("/usr/share/locale/*/LC_MESSAGES/%s.mo".format (gettextMoName))) { - auto data = getDataForFile (gres, fname); + immutable ckind = cpt.getKind; + + // we only can extract locale for a set of component types + // (others either don't store files or have to manually set which locale they support) + if (ckind != ComponentKind.DESKTOP_APP && + ckind != ComponentKind.CONSOLE_APP && + ckind != ComponentKind.SERVICE) + return; + + + // read translation domain hints from metainfo data + string[] gettextDomains; + auto translationsArr = cpt.getTranslations; + if (translationsArr.len > 0) { + import appstream.c.types : AsTranslation; + + for (uint i = 0; i < translationsArr.len; i++) { + // cast array data to D Screenshot and keep a reference to the C struct + auto tr = new Translation (cast (AsTranslation*) translationsArr.index (i)); + if (tr.getKind == TranslationKind.GETTEXT) + gettextDomains ~= tr.getId.strip; + } + + translationsArr.removeRange (0, translationsArr.len); + } + + // exit if we have no Gettext domains specified + if (gettextDomains.empty) + return; + + ulong maxNStrings = 0; + auto localeMap = HashMap!(string, ulong) (32); + + // Process Gettext .mo files for information + foreach (ref domain; gettextDomains) { + auto pkg = localeIdPkgMap.get ("%s.mo".format (domain), null); + if (pkg is null) { + gres.addHint (cpt, "gettext-data-not-found", ["domain": domain]); + continue; + } + + foreach (ref fname; pkg.contents) { + if (!fname.globMatch ("/usr/share/locale/*/LC_MESSAGES/%s.mo".format (domain))) + continue; + auto data = getDataForFile (gres, pkg, cpt, fname); if (data.empty) continue; - immutable locale = fname.split ("/")[3]; + immutable locale = fname.split ("/")[4]; auto nstrings = nstringsForGettextData (gres, locale, data); // check if there was an error if (nstrings < 0) @@ -155,18 +232,26 @@ public void processLocaleInfoForComponent (GeneratorResult gres, Component cpt) maxNStrings = nstrings; } } + + // by this point we should have at least some locale information. + // if that is not the case, warn about it. + if (localeMap.empty) { + gres.addHint (cpt, "no-translation-statistics"); + return; + } + + foreach (ref info; localeMap.byKeyValue) { + immutable locale = info.key; + immutable nstrings = info.value; + + immutable int percentage = (nstrings * 100 / maxNStrings).to!int; + + // we only add languages if the translation is more than 25% complete + if (percentage > 25) + cpt.addLanguage (locale, percentage); + } } - foreach (ref info; localeMap.byKeyValue) { - immutable locale = info.key; - immutable nstrings = info.value; - - immutable int percentage = (nstrings * 100 / maxNStrings).to!int; - - // we only add languages if the translation is more than 25% complete - if (percentage > 25) - cpt.addLanguage (locale, percentage); - } } unittest { diff --git a/src/asgen/handlers/package.d b/src/asgen/handlers/package.d index 101080f..0ce739a 100644 --- a/src/asgen/handlers/package.d +++ b/src/asgen/handlers/package.d @@ -25,4 +25,4 @@ public import asgen.handlers.iconhandler; public import asgen.handlers.metainfoparser; public import asgen.handlers.metainfovalidator; public import asgen.handlers.screenshothandler; -public import asgen.handlers.localehandler; +public import asgen.handlers.localehandler : LocaleHandler;