[banshee/gio-hardware] [Fixup] Better i18n support for article stripping
- From: Alex Launi <alexlauni src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [banshee/gio-hardware] [Fixup] Better i18n support for article stripping
- Date: Fri, 13 Aug 2010 15:25:08 +0000 (UTC)
commit c5f3834df222f840deb5dcc20ff0f1ebcb313d20
Author: Gabriel Burt <gabriel burt gmail com>
Date: Tue Aug 10 12:29:54 2010 -0700
[Fixup] Better i18n support for article stripping
Translators can now supply as many articles as their language has. Add
unit tests to test the artist normalization, too. Fixes bgo#625331
src/Extensions/Banshee.Fixup/Banshee.Fixup.csproj | 3 +-
.../Banshee.Fixup/AlbumDuplicateSolver.cs | 14 +--
.../Banshee.Fixup/ArtistDuplicateSolver.cs | 30 ++++---
.../Banshee.Fixup/Banshee.Fixup/FixSource.cs | 2 +-
.../Banshee.Fixup/Banshee.Fixup/Solver.cs | 59 +++++++++++++-
.../Banshee.Fixup/Banshee.Fixup/Tests.cs | 87 ++++++++++++++++++++
src/Extensions/Banshee.Fixup/Makefile.am | 1 +
tests/Makefile.am | 1 +
8 files changed, 174 insertions(+), 23 deletions(-)
---
diff --git a/src/Extensions/Banshee.Fixup/Banshee.Fixup.csproj b/src/Extensions/Banshee.Fixup/Banshee.Fixup.csproj
index b41e187..608109f 100644
--- a/src/Extensions/Banshee.Fixup/Banshee.Fixup.csproj
+++ b/src/Extensions/Banshee.Fixup/Banshee.Fixup.csproj
@@ -88,8 +88,9 @@
<Compile Include="Banshee.Fixup\FixSource.cs" />
<Compile Include="Banshee.Fixup\Problem.cs" />
<Compile Include="Banshee.Fixup\ProblemModel.cs" />
- <Compile Include="Banshee.Fixup\View.cs" />
<Compile Include="Banshee.Fixup\Solver.cs" />
+ <Compile Include="Banshee.Fixup\Tests.cs" />
+ <Compile Include="Banshee.Fixup\View.cs" />
</ItemGroup>
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
<ProjectExtensions>
diff --git a/src/Extensions/Banshee.Fixup/Banshee.Fixup/AlbumDuplicateSolver.cs b/src/Extensions/Banshee.Fixup/Banshee.Fixup/AlbumDuplicateSolver.cs
index f88d2c7..b221021 100644
--- a/src/Extensions/Banshee.Fixup/Banshee.Fixup/AlbumDuplicateSolver.cs
+++ b/src/Extensions/Banshee.Fixup/Banshee.Fixup/AlbumDuplicateSolver.cs
@@ -69,16 +69,12 @@ namespace Banshee.Fixup
return null;
ret = ret.ToLower ()
- .Replace (" and ", " & ")
- .Replace (Catalog.GetString (" and "), " & ")
- .Replace (", the", "")
- .Replace (Catalog.GetString (", the"), "")
- .Replace ("the ", "")
- .Replace (Catalog.GetString ("the "), "")
- .Trim ();
+ .RemovePrefixedArticles ()
+ .RemoveSuffixedArticles ()
+ .NormalizeConjunctions ();
- // Stips whitespace, punctuation, accents, and lower-cases
- ret = Hyena.StringUtil.SearchKey (ret);
+ // Strip extra whitespace, punctuation, and accents, lower-case, etc
+ ret = Hyena.StringUtil.SearchKey (ret).Trim ();
return ret + artist;
}
diff --git a/src/Extensions/Banshee.Fixup/Banshee.Fixup/ArtistDuplicateSolver.cs b/src/Extensions/Banshee.Fixup/Banshee.Fixup/ArtistDuplicateSolver.cs
index 4194a71..02a7116 100644
--- a/src/Extensions/Banshee.Fixup/Banshee.Fixup/ArtistDuplicateSolver.cs
+++ b/src/Extensions/Banshee.Fixup/Banshee.Fixup/ArtistDuplicateSolver.cs
@@ -51,7 +51,7 @@ namespace Banshee.Fixup
String.Format (
@"(Name IS NOT NULL AND ArtistID IN (SELECT DISTINCT(ArtistID) FROM CoreTracks WHERE PrimarySourceID = {0})
OR ArtistID IN (SELECT DISTINCT(a.ArtistID) FROM CoreTracks t, CoreAlbums a WHERE t.AlbumID = a.AlbumID AND t.PrimarySourceID = {0}))",
- ServiceManager.SourceManager.MusicLibrary.DbId
+ EnableUnitTests ? 0 : ServiceManager.SourceManager.MusicLibrary.DbId
),
"HYENA_BINARY_FUNCTION ('dupe-artist', Name, NULL)"
);
@@ -65,23 +65,31 @@ namespace Banshee.Fixup
BinaryFunction.Remove (Id);
}
- private object NormalizeArtistName (object name, object null_arg)
+ private string comma = ", ";
+ private string [] comma_ary = new string [] { ", " };
+
+ internal object NormalizeArtistName (object name, object null_arg)
{
var ret = name as string;
if (ret == null)
return null;
+ // If has only one comma, split on it and reverse the order
+ // eg Matthews, Dave => Dave Matthews
+ int i = ret.IndexOf (comma);
+ if (i != -1 && i == ret.LastIndexOf (comma)) {
+ ret = ret.Split (comma_ary, StringSplitOptions.None)
+ .Reverse ()
+ .Join (" ");
+ }
+
ret = ret.ToLower ()
- .Replace (" and ", " & ")
- .Replace (Catalog.GetString (" and "), " & ")
- .Replace (", the", "")
- .Replace (Catalog.GetString (", the"), "")
- .Replace ("the ", "")
- .Replace (Catalog.GetString ("the "), "")
- .Trim ();
+ .RemovePrefixedArticles ()
+ .RemoveSuffixedArticles ()
+ .NormalizeConjunctions ();
- // Stips whitespace, punctuation, accents, and lower-cases
- ret = Hyena.StringUtil.SearchKey (ret);
+ // Strip extra whitespace, punctuation, and accents, lower-case, etc
+ ret = Hyena.StringUtil.SearchKey (ret).Trim ();
return ret;
}
diff --git a/src/Extensions/Banshee.Fixup/Banshee.Fixup/FixSource.cs b/src/Extensions/Banshee.Fixup/Banshee.Fixup/FixSource.cs
index 8b0e8f6..4331465 100644
--- a/src/Extensions/Banshee.Fixup/Banshee.Fixup/FixSource.cs
+++ b/src/Extensions/Banshee.Fixup/Banshee.Fixup/FixSource.cs
@@ -63,7 +63,7 @@ namespace Banshee.Fixup
};
combo.Active = 0;
- var apply_button = new Hyena.Widgets.ImageButton ("Apply Selected Fixes", "gtk-apply");
+ var apply_button = new Hyena.Widgets.ImageButton (Catalog.GetString ("Apply Selected Fixes"), "gtk-apply");
apply_button.Clicked += (o, a) => problem_model.Fix ();
problem_model.Reloaded += (o, a) => apply_button.Sensitive = problem_model.SelectedCount > 0;
diff --git a/src/Extensions/Banshee.Fixup/Banshee.Fixup/Solver.cs b/src/Extensions/Banshee.Fixup/Banshee.Fixup/Solver.cs
index 81b5682..8353079 100644
--- a/src/Extensions/Banshee.Fixup/Banshee.Fixup/Solver.cs
+++ b/src/Extensions/Banshee.Fixup/Banshee.Fixup/Solver.cs
@@ -49,6 +49,9 @@ namespace Banshee.Fixup
{
}
+ // Total hack to work make unit tests work
+ internal static bool EnableUnitTests;
+
public string Id {
get { return id; }
set {
@@ -57,7 +60,9 @@ namespace Banshee.Fixup
}
id = value;
- Generation = DatabaseConfigurationClient.Client.Get<int> ("MetadataFixupGeneration", id, 0);
+ if (!EnableUnitTests) {
+ Generation = DatabaseConfigurationClient.Client.Get<int> ("MetadataFixupGeneration", id, 0);
+ }
}
}
@@ -141,6 +146,7 @@ namespace Banshee.Fixup
protected override void IdentifyCore ()
{
+ // Prune artists and albums that are no longer used
ServiceManager.DbConnection.Execute (@"
DELETE FROM CoreAlbums WHERE AlbumID NOT IN (SELECT DISTINCT(AlbumID) FROM CoreTracks);
DELETE FROM CoreArtists WHERE
@@ -152,6 +158,57 @@ namespace Banshee.Fixup
ServiceManager.DbConnection.Execute (cmd, Generation);
}
}
+
+ }
+
+ public static class FixupExtensions
+ {
+ public static string NormalizeConjunctions (this string input)
+ {
+ return input.Replace (" & ", " and ");
+ }
+
+ public static string RemovePrefixedArticles (this string input)
+ {
+ foreach (var prefix in article_prefixes) {
+ if (input.StartsWith (prefix)) {
+ input = input.Substring (prefix.Length, input.Length - prefix.Length);
+ }
+ }
+ return input;
+ }
+
+ public static string RemoveSuffixedArticles (this string input)
+ {
+ foreach (var suffix in article_suffixes) {
+ if (input.EndsWith (suffix)) {
+ input = input.Substring (0, input.Length - suffix.Length);
+ }
+ }
+ return input;
+ }
+
+ static string [] article_prefixes;
+ static string [] article_suffixes;
+ static FixupExtensions ()
+ {
+ // Translators: These are articles that might be prefixed or suffixed
+ // on artist names or album titles. You can add as many as you need,
+ // separated by a pipe (|)
+ var articles = (Catalog.GetString ("a|an|the") + "|a|an|the").Split ('|').Distinct ();
+
+ // Translators: This is the format commonly used in your langauge for
+ // suffixing an article, eg in English: ", The"
+ var suffix_format = Catalog.GetString (", {0}");
+
+ article_prefixes = articles.Select (a => a + " ")
+ .ToArray ();
+
+ article_suffixes = articles.SelectMany (a =>
+ new string [] { String.Format (suffix_format, a), ", " + a }
+ ).Distinct ().ToArray ();
+ }
+
}
/*public class CompilationSolver : Solver
diff --git a/src/Extensions/Banshee.Fixup/Banshee.Fixup/Tests.cs b/src/Extensions/Banshee.Fixup/Banshee.Fixup/Tests.cs
new file mode 100644
index 0000000..36be2bd
--- /dev/null
+++ b/src/Extensions/Banshee.Fixup/Banshee.Fixup/Tests.cs
@@ -0,0 +1,87 @@
+//
+// Tests.cs
+//
+// Author:
+// Gabriel Burt <gburt novell com>
+//
+// Copyright (C) 2010 Novell, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the
+// "Software"), to deal in the Software without restriction, including
+// without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and/or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to
+// the following conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
+#if ENABLE_TESTS
+
+using System;
+using System.Linq;
+
+using NUnit.Framework;
+using GLib;
+
+using Hyena;
+
+namespace Banshee.Fixup
+{
+ [TestFixture]
+ public class FixupTests
+ {
+ ArtistDuplicateSolver artist_solver;
+
+ [SetUp]
+ public void Setup ()
+ {
+ Solver.EnableUnitTests = true;
+ artist_solver = new ArtistDuplicateSolver ();
+ }
+
+ [TearDown]
+ public void Teardown ()
+ {
+ }
+
+ [Test]
+ public void ArtistNormalization ()
+ {
+ AssertArtistNormalized (null, null);
+ AssertArtistNormalized (null, 12);
+ AssertArtistNormalized ("", "");
+ AssertArtistNormalized ("foo", "foo");
+ AssertArtistNormalized ("dave matthews", "Dave Matthews");
+ AssertArtistNormalized ("dave matthews", "Matthews, Dave");
+ AssertArtistNormalized ("black keys", "The Black Keys");
+ AssertArtistNormalized ("black keys", "black Keys, the");
+ AssertArtistNormalized ("beatles", "Beatles");
+ AssertArtistNormalized ("beatles", "The Beatles");
+ AssertArtistNormalized ("beatles", " Béatles , The ");
+ AssertArtistNormalized ("beatles", "Beatles, A");
+ AssertArtistNormalized ("beatles", "Beatles, An");
+ AssertArtistNormalized ("beatles", "A Beatles ");
+ AssertArtistNormalized ("rem", " R.Ã?.M");
+ AssertArtistNormalized ("belle and sebastian", "Belle & Sebastian");
+ AssertArtistNormalized ("belle and sebastian", "Bellé and SebastÃan\t ");
+ }
+
+ private void AssertArtistNormalized (string correct, object input)
+ {
+ Assert.AreEqual (correct, artist_solver.NormalizeArtistName (input, null));
+ }
+ }
+}
+
+#endif
diff --git a/src/Extensions/Banshee.Fixup/Makefile.am b/src/Extensions/Banshee.Fixup/Makefile.am
index 62cf3e2..7a5c893 100644
--- a/src/Extensions/Banshee.Fixup/Makefile.am
+++ b/src/Extensions/Banshee.Fixup/Makefile.am
@@ -13,6 +13,7 @@ SOURCES = \
Banshee.Fixup/Problem.cs \
Banshee.Fixup/ProblemModel.cs \
Banshee.Fixup/Solver.cs \
+ Banshee.Fixup/Tests.cs \
Banshee.Fixup/View.cs
RESOURCES = \
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 93a1da9..53f8dd6 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -14,6 +14,7 @@ TEST_ASSEMBLIES = \
Banshee.Core.dll \
Banshee.Gnome.dll \
Banshee.Services.dll \
+ Banshee.Fixup.dll \
Banshee.Dap.Mtp.dll
if ENABLE_GIO
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]