[smuxi: 172/179] Common, Engine(-Tests): use precise regex for parsing emojis
- From: Mirco M. M. Bauer <mmmbauer src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [smuxi: 172/179] Common, Engine(-Tests): use precise regex for parsing emojis
- Date: Sat, 4 Nov 2017 05:51:39 +0000 (UTC)
commit d99f363711726703b39baf0d3b0591a8222aa190
Author: Mirco Bauer <meebey meebey net>
Date: Thu Oct 5 20:49:36 2017 +0800
Common, Engine(-Tests): use precise regex for parsing emojis
The emoji regex was simply :\w+: which did not match all emojis like :+1: or
:e-mail:. Instead of guessing what characters are valid we are now building a
regex that is based on the emoji list we have from EmojiOne anyhow.
As this is a very large regex with 27k of characters and 2373 emojis, this could
have been a serious performance regression but a benchmark says disagrees:
Performance with the old but simple regex:
/benchmark_message_builder -c 10000 --append-message
MessageBuilder().AppendMessage(). count: 10000 took: 3370 ms avg: 0.34 ms
MessageBuilder().AppendMessage(). count: 10000 took: 3331 ms avg: 0.33 ms
MessageBuilder().AppendMessage(). count: 10000 took: 3300 ms avg: 0.33 ms
Performance with the complex but precisely generated regex:
/benchmark_message_builder -c 10000 --append-message
MessageBuilder().AppendMessage(). count: 10000 took: 3353 ms avg: 0.34 ms
MessageBuilder().AppendMessage(). count: 10000 took: 3294 ms avg: 0.33 ms
MessageBuilder().AppendMessage(). count: 10000 took: 3316 ms avg: 0.33 ms
For the /benchmark_message_builder command to actually use emojis I was
temporarily making the following code change in the CommandManager class:
MessageBuilder CreateMessageBuilder()
{
var builder = new MessageBuilder();
builder.Settings.Emojis = true;
return builder;
}
src/Common/Emojione.cs | 6 ++++
src/Engine-Tests/MessageBuilderTests.cs | 39 +++++++++++++++++++++++++++
src/Engine/Config/MessageBuilderSettings.cs | 13 ++++++++-
3 files changed, 57 insertions(+), 1 deletions(-)
---
diff --git a/src/Common/Emojione.cs b/src/Common/Emojione.cs
index fa5f273..3699204 100644
--- a/src/Common/Emojione.cs
+++ b/src/Common/Emojione.cs
@@ -29,6 +29,12 @@ namespace Smuxi.Common
{
readonly static string BaseUri = "http://cdnjs.cloudflare.com/ajax/libs/emojione/2.2.7/assets/png/";
+ public static Dictionary<string, string> ShortnameToUnicodeMap {
+ get {
+ return map;
+ }
+ }
+
public static string ShortnameToUnicode(string shortName)
{
string val;
diff --git a/src/Engine-Tests/MessageBuilderTests.cs b/src/Engine-Tests/MessageBuilderTests.cs
index 8a877f4..6a951a5 100644
--- a/src/Engine-Tests/MessageBuilderTests.cs
+++ b/src/Engine-Tests/MessageBuilderTests.cs
@@ -19,6 +19,7 @@
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
using System;
using NUnit.Framework;
+using Smuxi.Common;
namespace Smuxi.Engine
{
@@ -785,6 +786,7 @@ namespace Smuxi.Engine
[Test]
public void AppendMessageWithEmojis()
{
+ // simple emoji
var msg = "foo :smiley: bar";
var builder = new MessageBuilder();
builder.Settings.Emojis = true;
@@ -795,6 +797,43 @@ namespace Smuxi.Engine
);
builder.Append(new TextMessagePartModel(" bar"));
TestMessage(msg, builder.ToMessage(), builder.Settings);
+
+ // emoji with underscore
+ msg = ":slightly_smiling_face:";
+ builder = new MessageBuilder();
+ builder.Settings.Emojis = true;
+ builder.TimeStamp = DateTime.MinValue;
+ builder.Append(
+ new ImageMessagePartModel("smuxi-emoji://slightly_smiling_face", ":slightly_smiling_face:")
+ );
+ TestMessage(msg, builder.ToMessage(), builder.Settings);
+
+ // emoji with plus
+ msg = ":+1:";
+ builder = new MessageBuilder();
+ builder.Settings.Emojis = true;
+ builder.TimeStamp = DateTime.MinValue;
+ builder.Append(
+ new ImageMessagePartModel("smuxi-emoji://+1", ":+1:")
+ );
+ TestMessage(msg, builder.ToMessage(), builder.Settings);
+
+ // test all supported emojis of the Emojione provider
+ foreach (var emojiShortname in Emojione.ShortnameToUnicodeMap.Keys) {
+ var msgWithEmoji = ":" + emojiShortname + ":";
+ builder = new MessageBuilder();
+ builder.Settings.Emojis = true;
+ builder.TimeStamp = DateTime.MinValue;
+ builder.Append(
+ new ImageMessagePartModel(
+ String.Format("smuxi-emoji://{0}", emojiShortname),
+ msgWithEmoji
+ )
+ );
+ TestMessage(msgWithEmoji, builder.ToMessage(), builder.Settings,
+ String.Format("failed testing emoji '{0}'",
+ emojiShortname));
+ }
}
}
}
diff --git a/src/Engine/Config/MessageBuilderSettings.cs b/src/Engine/Config/MessageBuilderSettings.cs
index b5955f8..9a6b7aa 100644
--- a/src/Engine/Config/MessageBuilderSettings.cs
+++ b/src/Engine/Config/MessageBuilderSettings.cs
@@ -18,6 +18,7 @@
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
using System;
+using System.Text;
using System.Text.RegularExpressions;
using System.Collections.Generic;
using Smuxi.Common;
@@ -52,7 +53,17 @@ namespace Smuxi.Engine
static MessageBuilderSettings()
{
- var emojiRegex = new Regex(@":(\w+):", RegexOptions.Compiled);
+ // OPT: this emoji regex is really long, around 27k characters
+ var emojiRegexBuilder = new StringBuilder(32 * 1024);
+ emojiRegexBuilder.Append(":(");
+ foreach (var emojiShortname in Emojione.ShortnameToUnicodeMap.Keys) {
+ emojiRegexBuilder.AppendFormat("{0}|", Regex.Escape(emojiShortname));
+ }
+ // remove trailing |
+ emojiRegexBuilder.Length--;
+ emojiRegexBuilder.Append("):");
+
+ var emojiRegex = new Regex(emojiRegexBuilder.ToString(), RegexOptions.Compiled);
EmojiMessagePattern = new MessagePatternModel(emojiRegex) {
MessagePartType = typeof(ImageMessagePartModel),
LinkFormat = "smuxi-emoji://{1}"
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]