Browse Source

Update Humanizer (#3532)

pull/3404/merge
Christoph Wille 4 days ago committed by GitHub
parent
commit
99165b7ce2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 23
      ICSharpCode.Decompiler/Humanizer/StringHumanizeExtensions.cs
  2. 51
      ICSharpCode.Decompiler/Humanizer/Vocabularies.cs
  3. 154
      ICSharpCode.Decompiler/Humanizer/Vocabulary.cs
  4. 1
      ICSharpCode.Decompiler/ICSharpCode.Decompiler.csproj

23
ICSharpCode.Decompiler/Humanizer/StringHumanizeExtensions.cs

@ -0,0 +1,23 @@
namespace Humanizer.Inflections;
using CharSpan = System.ReadOnlySpan<System.Char>;
/// <summary>
/// Contains extension methods for humanizing string values.
/// </summary>
internal static class StringHumanizeExtensions
{
internal static unsafe string Concat(CharSpan left, CharSpan right)
{
var result = new string('\0', left.Length + right.Length);
fixed (char* pResult = result)
{
left.CopyTo(new(pResult, left.Length));
right.CopyTo(new(pResult + left.Length, right.Length));
}
return result;
}
internal static unsafe string Concat(char left, CharSpan right) =>
Concat(new CharSpan(&left, 1), right);
}

51
ICSharpCode.Decompiler/Humanizer/Vocabularies.cs

@ -1,19 +1,14 @@
using System; using System;
using System.Threading; using System.Threading;
namespace Humanizer.Inflections namespace Humanizer.Inflections;
{
/// <summary>
/// Container for registered Vocabularies. At present, only a single vocabulary is supported: Default.
/// </summary>
internal static class Vocabularies
{
private static readonly Lazy<Vocabulary> Instance;
static Vocabularies() /// <summary>
{ /// Container for registered Vocabularies. At present, only a single vocabulary is supported: Default.
Instance = new Lazy<Vocabulary>(BuildDefault, LazyThreadSafetyMode.PublicationOnly); /// </summary>
} internal static class Vocabularies
{
static readonly Lazy<Vocabulary> Instance = new(BuildDefault, LazyThreadSafetyMode.PublicationOnly);
/// <summary> /// <summary>
/// The default vocabulary used for singular/plural irregularities. /// The default vocabulary used for singular/plural irregularities.
@ -22,7 +17,7 @@ namespace Humanizer.Inflections
/// </summary> /// </summary>
public static Vocabulary Default => Instance.Value; public static Vocabulary Default => Instance.Value;
private static Vocabulary BuildDefault() static Vocabulary BuildDefault()
{ {
var _default = new Vocabulary(); var _default = new Vocabulary();
@ -38,7 +33,7 @@ namespace Humanizer.Inflections
_default.AddPlural("(hive)$", "$1s"); _default.AddPlural("(hive)$", "$1s");
_default.AddPlural("([^aeiouy]|qu)y$", "$1ies"); _default.AddPlural("([^aeiouy]|qu)y$", "$1ies");
_default.AddPlural("(x|ch|ss|sh)$", "$1es"); _default.AddPlural("(x|ch|ss|sh)$", "$1es");
_default.AddPlural("(matr|vert|ind|d)ix|ex$", "$1ices"); _default.AddPlural("(matr|vert|ind|d)(ix|ex)$", "$1ices");
_default.AddPlural("(^[m|l])ouse$", "$1ice"); _default.AddPlural("(^[m|l])ouse$", "$1ice");
_default.AddPlural("^(ox)$", "$1en"); _default.AddPlural("^(ox)$", "$1en");
_default.AddPlural("(quiz)$", "$1zes"); _default.AddPlural("(quiz)$", "$1zes");
@ -60,7 +55,7 @@ namespace Humanizer.Inflections
_default.AddSingular("(m)ovies$", "$1ovie"); _default.AddSingular("(m)ovies$", "$1ovie");
_default.AddSingular("(x|ch|ss|sh)es$", "$1"); _default.AddSingular("(x|ch|ss|sh)es$", "$1");
_default.AddSingular("(^[m|l])ice$", "$1ouse"); _default.AddSingular("(^[m|l])ice$", "$1ouse");
_default.AddSingular("(o)es$", "$1"); _default.AddSingular("(?<!^[a-z])(o)es$", "$1");
_default.AddSingular("(shoe)s$", "$1"); _default.AddSingular("(shoe)s$", "$1");
_default.AddSingular("(cris|ax|test)es$", "$1is"); _default.AddSingular("(cris|ax|test)es$", "$1is");
_default.AddSingular("(octop|vir|alumn|fung|cact|foc|hippopotam|radi|stimul|syllab|nucle)i$", "$1us"); _default.AddSingular("(octop|vir|alumn|fung|cact|foc|hippopotam|radi|stimul|syllab|nucle)i$", "$1us");
@ -83,26 +78,28 @@ namespace Humanizer.Inflections
_default.AddIrregular("move", "moves"); _default.AddIrregular("move", "moves");
_default.AddIrregular("goose", "geese"); _default.AddIrregular("goose", "geese");
_default.AddIrregular("wave", "waves"); _default.AddIrregular("wave", "waves");
_default.AddIrregular("die", "dice");
_default.AddIrregular("foot", "feet"); _default.AddIrregular("foot", "feet");
_default.AddIrregular("tooth", "teeth"); _default.AddIrregular("tooth", "teeth");
_default.AddIrregular("curriculum", "curricula"); _default.AddIrregular("curriculum", "curricula");
_default.AddIrregular("database", "databases"); _default.AddIrregular("database", "databases");
_default.AddIrregular("zombie", "zombies"); _default.AddIrregular("zombie", "zombies");
_default.AddIrregular("personnel", "personnel"); _default.AddIrregular("personnel", "personnel");
//Fix #789
_default.AddIrregular("cache", "caches"); _default.AddIrregular("cache", "caches");
//Fix 975
_default.AddIrregular("ex", "exes", matchEnding: false); _default.AddIrregular("ex", "exes", matchEnding: false);
_default.AddIrregular("is", "are", matchEnding: false); _default.AddIrregular("is", "are", matchEnding: false);
_default.AddIrregular("was", "were", matchEnding: false);
_default.AddIrregular("that", "those", matchEnding: false); _default.AddIrregular("that", "those", matchEnding: false);
_default.AddIrregular("this", "these", matchEnding: false); _default.AddIrregular("this", "these", matchEnding: false);
_default.AddIrregular("bus", "buses", matchEnding: false); _default.AddIrregular("bus", "buses", matchEnding: false);
_default.AddIrregular("staff", "staff", matchEnding: false); _default.AddIrregular("die", "dice", matchEnding: false);
_default.AddIrregular("training", "training", matchEnding: false); _default.AddIrregular("tie", "ties", matchEnding: false);
_default.AddIrregular("lens", "lenses");
_default.AddIrregular("clove", "cloves");
_default.AddIrregular("valve", "valves");
_default.AddIrregular("explosive", "explosives");
_default.AddUncountable("staff");
_default.AddUncountable("training");
_default.AddUncountable("equipment"); _default.AddUncountable("equipment");
_default.AddUncountable("information"); _default.AddUncountable("information");
_default.AddUncountable("corn"); _default.AddUncountable("corn");
@ -142,8 +139,16 @@ namespace Humanizer.Inflections
_default.AddUncountable("scissors"); _default.AddUncountable("scissors");
_default.AddUncountable("means"); _default.AddUncountable("means");
_default.AddUncountable("mail"); _default.AddUncountable("mail");
_default.AddUncountable("pliers");
_default.AddUncountable("sheers");
_default.AddUncountable("clothes");
_default.AddUncountable("apparatus");
_default.AddUncountable("chassis");
_default.AddUncountable("debris");
//Fix 1132
_default.AddUncountable("metadata");
return _default; return _default;
} }
}
} }

154
ICSharpCode.Decompiler/Humanizer/Vocabulary.cs

@ -1,22 +1,27 @@
using System.Collections.Generic; using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
namespace Humanizer.Inflections namespace Humanizer.Inflections;
#nullable enable
/// <summary>
/// A container for exceptions to simple pluralization/singularization rules.
/// Vocabularies.Default contains an extensive list of rules for US English.
/// At this time, multiple vocabularies and removing existing rules are not supported.
/// </summary>
internal class Vocabulary
{ {
/// <summary>
/// A container for exceptions to simple pluralization/singularization rules.
/// Vocabularies.Default contains an extensive list of rules for US English.
/// At this time, multiple vocabularies and removing existing rules are not supported.
/// </summary>
internal class Vocabulary
{
internal Vocabulary() internal Vocabulary()
{ {
} }
private readonly List<Rule> _plurals = new List<Rule>(); readonly List<Rule> plurals = [];
private readonly List<Rule> _singulars = new List<Rule>(); readonly List<Rule> singulars = [];
private readonly List<string> _uncountables = new List<string>(); readonly HashSet<string> uncountables = new(StringComparer.CurrentCultureIgnoreCase);
readonly Regex letterS = new("^([sS])[sS]*$");
/// <summary> /// <summary>
/// Adds a word to the vocabulary which cannot easily be pluralized/singularized by RegEx, e.g. "person" and "people". /// Adds a word to the vocabulary which cannot easily be pluralized/singularized by RegEx, e.g. "person" and "people".
@ -28,8 +33,10 @@ namespace Humanizer.Inflections
{ {
if (matchEnding) if (matchEnding)
{ {
AddPlural("(" + singular[0] + ")" + singular.Substring(1) + "$", "$1" + plural.Substring(1)); var singularSubstring = singular.Substring(1);
AddSingular("(" + plural[0] + ")" + plural.Substring(1) + "$", "$1" + singular.Substring(1)); var pluralSubString = plural.Substring(1);
AddPlural($"({singular[0]}){singularSubstring}$", $"$1{pluralSubString}");
AddSingular($"({plural[0]}){pluralSubString}$", $"$1{singularSubstring}");
} }
else else
{ {
@ -42,54 +49,63 @@ namespace Humanizer.Inflections
/// Adds an uncountable word to the vocabulary, e.g. "fish". Will be ignored when plurality is changed. /// Adds an uncountable word to the vocabulary, e.g. "fish". Will be ignored when plurality is changed.
/// </summary> /// </summary>
/// <param name="word">Word to be added to the list of uncountables.</param> /// <param name="word">Word to be added to the list of uncountables.</param>
public void AddUncountable(string word) public void AddUncountable(string word) =>
{ uncountables.Add(word);
_uncountables.Add(word.ToLower());
}
/// <summary> /// <summary>
/// Adds a rule to the vocabulary that does not follow trivial rules for pluralization, e.g. "bus" -> "buses" /// Adds a rule to the vocabulary that does not follow trivial rules for pluralization, e.g. "bus" -> "buses"
/// </summary> /// </summary>
/// <param name="rule">RegEx to be matched, case insensitive, e.g. "(bus)es$"</param> /// <param name="rule">RegEx to be matched, case insensitive, e.g. "(bus)es$"</param>
/// <param name="replacement">RegEx replacement e.g. "$1"</param> /// <param name="replacement">RegEx replacement e.g. "$1"</param>
public void AddPlural(string rule, string replacement) public void AddPlural(string rule, string replacement) =>
{ plurals.Add(new(rule, replacement));
_plurals.Add(new Rule(rule, replacement));
}
/// <summary> /// <summary>
/// Adds a rule to the vocabulary that does not follow trivial rules for singularization, e.g. "vertices/indices -> "vertex/index" /// Adds a rule to the vocabulary that does not follow trivial rules for singularization, e.g. "vertices/indices -> "vertex/index"
/// </summary> /// </summary>
/// <param name="rule">RegEx to be matched, case insensitive, e.g. ""(vert|ind)ices$""</param> /// <param name="rule">RegEx to be matched, case insensitive, e.g. ""(vert|ind)ices$""</param>
/// <param name="replacement">RegEx replacement e.g. "$1ex"</param> /// <param name="replacement">RegEx replacement e.g. "$1ex"</param>
public void AddSingular(string rule, string replacement) public void AddSingular(string rule, string replacement) =>
{ singulars.Add(new(rule, replacement));
_singulars.Add(new Rule(rule, replacement));
}
/// <summary> /// <summary>
/// Pluralizes the provided input considering irregular words /// Pluralizes the provided input considering irregular words
/// </summary> /// </summary>
/// <param name="word">Word to be pluralized</param> /// <param name="word">Word to be pluralized</param>
/// <param name="inputIsKnownToBeSingular">Normally you call Pluralize on singular words; but if you're unsure call it with false</param> /// <param name="inputIsKnownToBeSingular">Normally you call Pluralize on singular words; but if you're unsure call it with false</param>
/// <returns></returns> [return: NotNullIfNotNull(nameof(word))]
public string Pluralize(string word, bool inputIsKnownToBeSingular = true) public string? Pluralize(string? word, bool inputIsKnownToBeSingular = true)
{ {
var result = ApplyRules(_plurals, word, false); if (word == null)
{
return null;
}
var s = LetterS(word);
if (s != null)
{
return s + "s";
}
var result = ApplyRules(plurals, word, false);
if (inputIsKnownToBeSingular) if (inputIsKnownToBeSingular)
{ {
return result ?? word; return result ?? word;
} }
var asSingular = ApplyRules(_singulars, word, false); var asSingular = ApplyRules(singulars, word, false);
var asSingularAsPlural = ApplyRules(_plurals, asSingular, false); var asSingularAsPlural = ApplyRules(plurals, asSingular, false);
if (asSingular != null && asSingular != word && asSingular + "s" != word && asSingularAsPlural == word && result != word) if (asSingular != null &&
asSingular != word &&
asSingular + "s" != word &&
asSingularAsPlural == word &&
result != word)
{ {
return word; return word;
} }
return result; return result!;
} }
/// <summary> /// <summary>
@ -98,10 +114,20 @@ namespace Humanizer.Inflections
/// <param name="word">Word to be singularized</param> /// <param name="word">Word to be singularized</param>
/// <param name="inputIsKnownToBePlural">Normally you call Singularize on plural words; but if you're unsure call it with false</param> /// <param name="inputIsKnownToBePlural">Normally you call Singularize on plural words; but if you're unsure call it with false</param>
/// <param name="skipSimpleWords">Skip singularizing single words that have an 's' on the end</param> /// <param name="skipSimpleWords">Skip singularizing single words that have an 's' on the end</param>
/// <returns></returns> [return: NotNullIfNotNull(nameof(word))]
public string Singularize(string word, bool inputIsKnownToBePlural = true, bool skipSimpleWords = false) public string? Singularize(string? word, bool inputIsKnownToBePlural = true, bool skipSimpleWords = false)
{ {
var result = ApplyRules(_singulars, word, skipSimpleWords); if (word == null)
{
return null;
}
var s = LetterS(word);
if (s != null)
{
return s;
}
var result = ApplyRules(singulars, word, skipSimpleWords);
if (inputIsKnownToBePlural) if (inputIsKnownToBePlural)
{ {
@ -109,17 +135,24 @@ namespace Humanizer.Inflections
} }
// the Plurality is unknown so we should check all possibilities // the Plurality is unknown so we should check all possibilities
var asPlural = ApplyRules(_plurals, word, false); var asPlural = ApplyRules(plurals, word, false);
var asPluralAsSingular = ApplyRules(_singulars, asPlural, false); if (asPlural == word ||
if (asPlural != word && word + "s" != asPlural && asPluralAsSingular == word && result != word) word + "s" == asPlural)
{ {
return word; return result ?? word;
} }
var asPluralAsSingular = ApplyRules(singulars, asPlural, false);
if (asPluralAsSingular != word ||
result == word)
{
return result ?? word; return result ?? word;
} }
private string ApplyRules(IList<Rule> rules, string word, bool skipFirstRule) return word;
}
string? ApplyRules(IList<Rule> rules, string? word, bool skipFirstRule)
{ {
if (word == null) if (word == null)
{ {
@ -145,34 +178,43 @@ namespace Humanizer.Inflections
break; break;
} }
} }
return result;
}
private bool IsUncountable(string word) if (result == null)
{ {
return _uncountables.Contains(word.ToLower()); return null;
} }
private class Rule return MatchUpperCase(word, result);
{ }
private readonly Regex _regex;
private readonly string _replacement; bool IsUncountable(string word) =>
uncountables.Contains(word);
static string MatchUpperCase(string word, string replacement) =>
char.IsUpper(word[0]) &&
char.IsLower(replacement[0]) ? StringHumanizeExtensions.Concat(char.ToUpper(replacement[0]), replacement.AsSpan(1)) : replacement;
public Rule(string pattern, string replacement) /// <summary>
/// If the word is the letter s, singular or plural, return the letter s singular
/// </summary>
string? LetterS(string word)
{ {
_regex = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptions.Compiled); var s = letterS.Match(word);
_replacement = replacement; return s.Groups.Count > 1 ? s.Groups[1].Value : null;
} }
public string Apply(string word) class Rule(string pattern, string replacement)
{ {
if (!_regex.IsMatch(word)) readonly Regex regex = new(pattern, RegexOptions.IgnoreCase | RegexOptions.Compiled);
public string? Apply(string word)
{
if (!regex.IsMatch(word))
{ {
return null; return null;
} }
return _regex.Replace(word, _replacement); return regex.Replace(word, replacement);
}
} }
} }
} }

1
ICSharpCode.Decompiler/ICSharpCode.Decompiler.csproj

@ -108,6 +108,7 @@
<Compile Include="DecompilationProgress.cs" /> <Compile Include="DecompilationProgress.cs" />
<Compile Include="Disassembler\IEntityProcessor.cs" /> <Compile Include="Disassembler\IEntityProcessor.cs" />
<Compile Include="Disassembler\SortByNameProcessor.cs" /> <Compile Include="Disassembler\SortByNameProcessor.cs" />
<Compile Include="Humanizer\StringHumanizeExtensions.cs" />
<Compile Include="IL\Transforms\InlineArrayTransform.cs" /> <Compile Include="IL\Transforms\InlineArrayTransform.cs" />
<Compile Include="IL\Transforms\RemoveUnconstrainedGenericReferenceTypeCheck.cs" /> <Compile Include="IL\Transforms\RemoveUnconstrainedGenericReferenceTypeCheck.cs" />
<Compile Include="Metadata\MetadataFile.cs" /> <Compile Include="Metadata\MetadataFile.cs" />

Loading…
Cancel
Save