Refactor locale statistics loader

Since locale can be in pretty much any installed package, more advanced
logic is needed to find them.
The current implementation is only for Gettext, but adding Qt
translations will be much easier now.
The new implementation also *requires* the presence of a <translation/>
tag in order for this feature to work. This will likely not be changed,
explicitness just beats the complexity required for heuristics here.

CC: #37
This commit is contained in:
Matthias Klumpp 2019-01-27 23:14:49 +01:00
parent 97081ad8c3
commit b2ebe5819a
7 changed files with 170 additions and 68 deletions

View File

@ -262,6 +262,18 @@
"mo-file-error": {
"text": "Unable to process Gettext *.mo file for locale '{{locale}}'. The file is probably invalid, and no language information could be extracted.",
"severity": "warning"
},
"gettext-data-not-found": {
"text": "Unable to find any translation data for specified Gettext domain '{{domain}}'.",
"severity": "warning"
},
"no-translation-statistics": {
"text": ["We could not extract any translation statitics, althout metadata hints were given that the data exists.",
"Please check the &lt;translation/&gt; metainfo tag for spelling mistakes.",
"It also makes sense to ensure all locale files are placed in the right directories (e.g. gettext .mo files in <code>/usr/share/locale/*/LC_MESSAGES/</code>"],
"severity": "warning"
}
}

View File

@ -253,7 +253,7 @@ public:
scope (exit) cur.mdb_cursor_close ();
checkError (res, "mdb_cursor_open");
auto pkgCMap = HashMap!(string, string) (32);
auto pkgCMap = HashMap!(string, string) (64);
foreach (ref pkid; pkids) {
MDB_val pkey = makeDbValue (pkid);
MDB_val cval;

View File

@ -48,7 +48,7 @@ import asgen.backends.ubuntu;
import asgen.backends.archlinux;
import asgen.backends.rpmmd;
import asgen.handlers.iconhandler;
import asgen.handlers : IconHandler, LocaleHandler;
/**
@ -132,7 +132,8 @@ public:
*/
private void processPackages (ref Package[] pkgs, IconHandler iconh)
{
auto mde = scoped!DataExtractor (dstore, iconh);
auto localeh = scoped!LocaleHandler (pkgs);
auto mde = scoped!DataExtractor (dstore, iconh, localeh);
foreach (ref pkg; parallel (pkgs)) {
immutable pkid = pkg.id;
if (dstore.packageExists (pkid))

View File

@ -47,15 +47,17 @@ private:
DataStore dstore;
IconHandler iconh;
LocaleHandler localeh;
Config conf;
DataType dtype;
public:
this (DataStore db, IconHandler iconHandler)
this (DataStore db, IconHandler iconHandler, LocaleHandler localeHandler)
{
dstore = db;
iconh = iconHandler;
localeh = localeHandler;
conf = Config.get ();
dtype = conf.metadataType;
}
@ -273,7 +275,7 @@ public:
// process locale information.
if (conf.feature.processLocale)
processLocaleInfoForComponent (gres, cpt);
localeh.processLocaleInfoForComponent (gres, cpt);
// we don't want to run expensive font processing if we don't have a font component.
// since the font handler needs to load all font data prior to processing the component,

View File

@ -282,9 +282,11 @@ public:
// so we have to cache the data.
auto tmpThemes = HashMap!(string, Theme) (16);
auto filesPkids = ccache.getIconFilesMap (pkgMap.keys);
foreach (fname; parallel (filesPkids.byKey, 100)) {
foreach (info; parallel (filesPkids.byKeyValue, 100)) {
immutable fname = info.key;
immutable pkgid = info.value;
if (fname.startsWith ("/usr/share/pixmaps/")) {
auto pkg = getPackage (filesPkids[fname]);
auto pkg = getPackage (pkgid);
if (pkg is null)
continue;
synchronized (this) iconFiles[fname] = pkg;

View File

@ -21,15 +21,17 @@ module asgen.handlers.localehandler;
private:
import std.path : baseName, buildPath;
import std.uni : toLower;
import std.string : format, strip;
import std.string : format, strip, startsWith;
import std.array : empty;
import std.conv : to;
import std.parallelism : parallel;
import appstream.Component : Component, ComponentKind;
import appstream.Translation : Translation, TranslationKind;
import containers: HashMap;
import asgen.logging;
import asgen.result : GeneratorResult;
import asgen.backends.interfaces : Package;
/**
@ -51,14 +53,16 @@ extern(C) struct GettextHeader {
uint trans_sysdep_tab_offset;
}
auto getDataForFile (GeneratorResult gres, const string fname)
auto getDataForFile (GeneratorResult gres, Package pkg, Component cpt, const string fname)
{
if (pkg is null)
pkg = gres.pkg;
const(ubyte)[] fdata;
try {
fdata = gres.pkg.getFileData (fname);
fdata = pkg.getFileData (fname);
} catch (Exception e) {
gres.addHint (null, "pkg-extract-error", ["fname": fname.baseName,
"pkg_fname": gres.pkg.getFilename.baseName,
gres.addHint (cpt, "pkg-extract-error", ["fname": fname.baseName,
"pkg_fname": pkg.getFilename.baseName,
"error": e.msg]);
return null;
}
@ -72,23 +76,23 @@ long nstringsForGettextData (GeneratorResult gres, const string locale, const(ub
import std.bitmanip : swapEndian;
GettextHeader header;
memcpy (&header, cast(void*) moData, GettextHeader.sizeof);
memcpy (&header, cast(void*) moData, GettextHeader.sizeof);
bool swapped;
if (header.magic == 0x950412de)
swapped = false;
else if (header.magic == 0xde120495)
swapped = true;
else {
gres.addHint (null, "mo-file-error", ["locale": locale]);
if (header.magic == 0x950412de)
swapped = false;
else if (header.magic == 0xde120495)
swapped = true;
else {
gres.addHint (null, "mo-file-error", ["locale": locale]);
return -1;
}
}
long nstrings;
if (swapped)
nstrings = header.nstrings.swapEndian;
else
nstrings = header.nstrings;
if (swapped)
nstrings = header.nstrings.swapEndian;
else
nstrings = header.nstrings;
if (nstrings > 0)
return nstrings -1;
@ -96,51 +100,124 @@ long nstringsForGettextData (GeneratorResult gres, const string locale, const(ub
}
/**
* Load localization information for the given component.
* Finds localization in a set of packages and allows extracting
* translation statistics from locale.
*/
public void processLocaleInfoForComponent (GeneratorResult gres, Component cpt)
public final class LocaleHandler
{
import std.path : globMatch;
import std.array : split;
immutable ckind = cpt.getKind;
private:
HashMap!(string, Package) localeIdPkgMap;
// we only can extract locale for a set of component types
// (others either don't store files or have to manually set which locale they support)
if (ckind != ComponentKind.DESKTOP_APP &&
ckind != ComponentKind.CONSOLE_APP &&
ckind != ComponentKind.SERVICE)
return;
public this (Package[] pkgList)
{
import std.typecons : scoped;
import asgen.contentsstore : ContentsStore;
import asgen.config : Config;
logDebug ("Creating new LocaleHandler.");
// read translation domain hints from metainfo data
auto gettextMoName = "*";
auto translationsArr = cpt.getTranslations;
if (translationsArr.len > 0) {
import appstream.c.types : AsTranslation;
gettextMoName = null;
for (uint i = 0; i < translationsArr.len; i++) {
// cast array data to D Screenshot and keep a reference to the C struct
auto tr = new Translation (cast (AsTranslation*) translationsArr.index (i));
if (tr.getKind == TranslationKind.GETTEXT)
gettextMoName = tr.getId.strip;
// convert the list into a HashMap for faster lookups
auto pkgMap = HashMap!(string, Package) (64);
foreach (ref pkg; pkgList) {
immutable pkid = pkg.id;
pkgMap[pkid] = pkg;
}
translationsArr.removeRange (0, translationsArr.len);
localeIdPkgMap = HashMap!(string, Package) (64);
auto conf = Config.get;
if (!conf.feature.processLocale)
return; // don't load the expensive locale<->package mapping if we don't need it
// open package contents cache
auto ccache = scoped!ContentsStore ();
ccache.open (conf);
// we make the assumption here that all locale for a given domain are in one package.
// otherwise this global search will get even more insane.
foreach (info; parallel (ccache.getContentsMap (pkgMap.keys).byKeyValue, 100)) {
immutable fname = info.key;
immutable pkgid = info.value;
if (!fname.startsWith ("/usr/share/locale/"))
continue;
immutable id = fname.baseName;
// check if we already have a package - lookups in this HashMap are faster
// due to its smaller size and (most of the time) outweight the following additional
// lookup for the right package entity.
if (localeIdPkgMap.get (id, null) !is null)
continue;
Package pkg;
if (pkgid !is null)
pkg = pkgMap.get (pkgid, null);
if (pkg !is null) {
synchronized (this) localeIdPkgMap[id] = pkg;
}
}
logDebug ("Created new LocaleHandler.");
}
ulong maxNStrings = 0;
auto localeMap = HashMap!(string, ulong) (32);
foreach (ref fname; gres.pkg.contents) {
/**
* Load localization information for the given component.
*/
public void processLocaleInfoForComponent (GeneratorResult gres, Component cpt)
{
import std.path : globMatch;
import std.array : split;
if (!gettextMoName.empty) {
// Process Gettext .mo files for information
if (fname.globMatch ("/usr/share/locale/*/LC_MESSAGES/%s.mo".format (gettextMoName))) {
auto data = getDataForFile (gres, fname);
immutable ckind = cpt.getKind;
// we only can extract locale for a set of component types
// (others either don't store files or have to manually set which locale they support)
if (ckind != ComponentKind.DESKTOP_APP &&
ckind != ComponentKind.CONSOLE_APP &&
ckind != ComponentKind.SERVICE)
return;
// read translation domain hints from metainfo data
string[] gettextDomains;
auto translationsArr = cpt.getTranslations;
if (translationsArr.len > 0) {
import appstream.c.types : AsTranslation;
for (uint i = 0; i < translationsArr.len; i++) {
// cast array data to D Screenshot and keep a reference to the C struct
auto tr = new Translation (cast (AsTranslation*) translationsArr.index (i));
if (tr.getKind == TranslationKind.GETTEXT)
gettextDomains ~= tr.getId.strip;
}
translationsArr.removeRange (0, translationsArr.len);
}
// exit if we have no Gettext domains specified
if (gettextDomains.empty)
return;
ulong maxNStrings = 0;
auto localeMap = HashMap!(string, ulong) (32);
// Process Gettext .mo files for information
foreach (ref domain; gettextDomains) {
auto pkg = localeIdPkgMap.get ("%s.mo".format (domain), null);
if (pkg is null) {
gres.addHint (cpt, "gettext-data-not-found", ["domain": domain]);
continue;
}
foreach (ref fname; pkg.contents) {
if (!fname.globMatch ("/usr/share/locale/*/LC_MESSAGES/%s.mo".format (domain)))
continue;
auto data = getDataForFile (gres, pkg, cpt, fname);
if (data.empty)
continue;
immutable locale = fname.split ("/")[3];
immutable locale = fname.split ("/")[4];
auto nstrings = nstringsForGettextData (gres, locale, data);
// check if there was an error
if (nstrings < 0)
@ -155,18 +232,26 @@ public void processLocaleInfoForComponent (GeneratorResult gres, Component cpt)
maxNStrings = nstrings;
}
}
// by this point we should have at least some locale information.
// if that is not the case, warn about it.
if (localeMap.empty) {
gres.addHint (cpt, "no-translation-statistics");
return;
}
foreach (ref info; localeMap.byKeyValue) {
immutable locale = info.key;
immutable nstrings = info.value;
immutable int percentage = (nstrings * 100 / maxNStrings).to!int;
// we only add languages if the translation is more than 25% complete
if (percentage > 25)
cpt.addLanguage (locale, percentage);
}
}
foreach (ref info; localeMap.byKeyValue) {
immutable locale = info.key;
immutable nstrings = info.value;
immutable int percentage = (nstrings * 100 / maxNStrings).to!int;
// we only add languages if the translation is more than 25% complete
if (percentage > 25)
cpt.addLanguage (locale, percentage);
}
}
unittest {

View File

@ -25,4 +25,4 @@ public import asgen.handlers.iconhandler;
public import asgen.handlers.metainfoparser;
public import asgen.handlers.metainfovalidator;
public import asgen.handlers.screenshothandler;
public import asgen.handlers.localehandler;
public import asgen.handlers.localehandler : LocaleHandler;