From 4dc7c63c4be8e55c6ce20e3cd8824b34274cbfc8 Mon Sep 17 00:00:00 2001 From: Christoph Wille Date: Tue, 5 Aug 2025 13:24:50 +0200 Subject: [PATCH] Update Humanizer --- .../Humanizer/StringHumanizeExtensions.cs | 23 ++ .../Humanizer/Vocabularies.cs | 273 ++++++++-------- .../Humanizer/Vocabulary.cs | 298 ++++++++++-------- .../ICSharpCode.Decompiler.csproj | 1 + 4 files changed, 333 insertions(+), 262 deletions(-) create mode 100644 ICSharpCode.Decompiler/Humanizer/StringHumanizeExtensions.cs diff --git a/ICSharpCode.Decompiler/Humanizer/StringHumanizeExtensions.cs b/ICSharpCode.Decompiler/Humanizer/StringHumanizeExtensions.cs new file mode 100644 index 000000000..926714866 --- /dev/null +++ b/ICSharpCode.Decompiler/Humanizer/StringHumanizeExtensions.cs @@ -0,0 +1,23 @@ +namespace Humanizer.Inflections; + +using CharSpan = System.ReadOnlySpan; + +/// +/// Contains extension methods for humanizing string values. +/// +internal static class StringHumanizeExtensions +{ + internal static unsafe string Concat(CharSpan left, CharSpan right) + { + var result = new string('\0', left.Length + right.Length); + fixed (char* pResult = result) + { + left.CopyTo(new(pResult, left.Length)); + right.CopyTo(new(pResult + left.Length, right.Length)); + } + return result; + } + + internal static unsafe string Concat(char left, CharSpan right) => + Concat(new CharSpan(&left, 1), right); +} \ No newline at end of file diff --git a/ICSharpCode.Decompiler/Humanizer/Vocabularies.cs b/ICSharpCode.Decompiler/Humanizer/Vocabularies.cs index efb2a12cd..302df27d5 100644 --- a/ICSharpCode.Decompiler/Humanizer/Vocabularies.cs +++ b/ICSharpCode.Decompiler/Humanizer/Vocabularies.cs @@ -1,149 +1,154 @@ using System; using System.Threading; -namespace Humanizer.Inflections +namespace Humanizer.Inflections; + +/// +/// Container for registered Vocabularies. At present, only a single vocabulary is supported: Default. +/// +internal static class Vocabularies { + static readonly Lazy Instance = new(BuildDefault, LazyThreadSafetyMode.PublicationOnly); + /// - /// Container for registered Vocabularies. At present, only a single vocabulary is supported: Default. + /// The default vocabulary used for singular/plural irregularities. + /// Rules can be added to this vocabulary and will be picked up by called to Singularize() and Pluralize(). + /// At this time, multiple vocabularies and removing existing rules are not supported. /// - internal static class Vocabularies - { - private static readonly Lazy Instance; - - static Vocabularies() - { - Instance = new Lazy(BuildDefault, LazyThreadSafetyMode.PublicationOnly); - } + public static Vocabulary Default => Instance.Value; - /// - /// The default vocabulary used for singular/plural irregularities. - /// Rules can be added to this vocabulary and will be picked up by called to Singularize() and Pluralize(). - /// At this time, multiple vocabularies and removing existing rules are not supported. - /// - public static Vocabulary Default => Instance.Value; - - private static Vocabulary BuildDefault() - { - var _default = new Vocabulary(); - - _default.AddPlural("$", "s"); - _default.AddPlural("s$", "s"); - _default.AddPlural("(ax|test)is$", "$1es"); - _default.AddPlural("(octop|vir|alumn|fung|cact|foc|hippopotam|radi|stimul|syllab|nucle)us$", "$1i"); - _default.AddPlural("(alias|bias|iris|status|campus|apparatus|virus|walrus|trellis)$", "$1es"); - _default.AddPlural("(buffal|tomat|volcan|ech|embarg|her|mosquit|potat|torped|vet)o$", "$1oes"); - _default.AddPlural("([dti])um$", "$1a"); - _default.AddPlural("sis$", "ses"); - _default.AddPlural("(?:([^f])fe|([lr])f)$", "$1$2ves"); - _default.AddPlural("(hive)$", "$1s"); - _default.AddPlural("([^aeiouy]|qu)y$", "$1ies"); - _default.AddPlural("(x|ch|ss|sh)$", "$1es"); - _default.AddPlural("(matr|vert|ind|d)ix|ex$", "$1ices"); - _default.AddPlural("(^[m|l])ouse$", "$1ice"); - _default.AddPlural("^(ox)$", "$1en"); - _default.AddPlural("(quiz)$", "$1zes"); - _default.AddPlural("(buz|blit|walt)z$", "$1zes"); - _default.AddPlural("(hoo|lea|loa|thie)f$", "$1ves"); - _default.AddPlural("(alumn|alg|larv|vertebr)a$", "$1ae"); - _default.AddPlural("(criteri|phenomen)on$", "$1a"); + static Vocabulary BuildDefault() + { + var _default = new Vocabulary(); - _default.AddSingular("s$", ""); - _default.AddSingular("(n)ews$", "$1ews"); - _default.AddSingular("([dti])a$", "$1um"); - _default.AddSingular("(analy|ba|diagno|parenthe|progno|synop|the|ellip|empha|neuro|oa|paraly)ses$", "$1sis"); - _default.AddSingular("([^f])ves$", "$1fe"); - _default.AddSingular("(hive)s$", "$1"); - _default.AddSingular("(tive)s$", "$1"); - _default.AddSingular("([lr]|hoo|lea|loa|thie)ves$", "$1f"); - _default.AddSingular("(^zomb)?([^aeiouy]|qu)ies$", "$2y"); - _default.AddSingular("(s)eries$", "$1eries"); - _default.AddSingular("(m)ovies$", "$1ovie"); - _default.AddSingular("(x|ch|ss|sh)es$", "$1"); - _default.AddSingular("(^[m|l])ice$", "$1ouse"); - _default.AddSingular("(o)es$", "$1"); - _default.AddSingular("(shoe)s$", "$1"); - _default.AddSingular("(cris|ax|test)es$", "$1is"); - _default.AddSingular("(octop|vir|alumn|fung|cact|foc|hippopotam|radi|stimul|syllab|nucle)i$", "$1us"); - _default.AddSingular("(alias|bias|iris|status|campus|apparatus|virus|walrus|trellis)es$", "$1"); - _default.AddSingular("^(ox)en", "$1"); - _default.AddSingular("(matr|d)ices$", "$1ix"); - _default.AddSingular("(vert|ind)ices$", "$1ex"); - _default.AddSingular("(quiz)zes$", "$1"); - _default.AddSingular("(buz|blit|walt)zes$", "$1z"); - _default.AddSingular("(alumn|alg|larv|vertebr)ae$", "$1a"); - _default.AddSingular("(criteri|phenomen)a$", "$1on"); - _default.AddSingular("([b|r|c]ook|room|smooth)ies$", "$1ie"); + _default.AddPlural("$", "s"); + _default.AddPlural("s$", "s"); + _default.AddPlural("(ax|test)is$", "$1es"); + _default.AddPlural("(octop|vir|alumn|fung|cact|foc|hippopotam|radi|stimul|syllab|nucle)us$", "$1i"); + _default.AddPlural("(alias|bias|iris|status|campus|apparatus|virus|walrus|trellis)$", "$1es"); + _default.AddPlural("(buffal|tomat|volcan|ech|embarg|her|mosquit|potat|torped|vet)o$", "$1oes"); + _default.AddPlural("([dti])um$", "$1a"); + _default.AddPlural("sis$", "ses"); + _default.AddPlural("(?:([^f])fe|([lr])f)$", "$1$2ves"); + _default.AddPlural("(hive)$", "$1s"); + _default.AddPlural("([^aeiouy]|qu)y$", "$1ies"); + _default.AddPlural("(x|ch|ss|sh)$", "$1es"); + _default.AddPlural("(matr|vert|ind|d)(ix|ex)$", "$1ices"); + _default.AddPlural("(^[m|l])ouse$", "$1ice"); + _default.AddPlural("^(ox)$", "$1en"); + _default.AddPlural("(quiz)$", "$1zes"); + _default.AddPlural("(buz|blit|walt)z$", "$1zes"); + _default.AddPlural("(hoo|lea|loa|thie)f$", "$1ves"); + _default.AddPlural("(alumn|alg|larv|vertebr)a$", "$1ae"); + _default.AddPlural("(criteri|phenomen)on$", "$1a"); - _default.AddIrregular("person", "people"); - _default.AddIrregular("man", "men"); - _default.AddIrregular("human", "humans"); - _default.AddIrregular("child", "children"); - _default.AddIrregular("sex", "sexes"); - _default.AddIrregular("glove", "gloves"); - _default.AddIrregular("move", "moves"); - _default.AddIrregular("goose", "geese"); - _default.AddIrregular("wave", "waves"); - _default.AddIrregular("die", "dice"); - _default.AddIrregular("foot", "feet"); - _default.AddIrregular("tooth", "teeth"); - _default.AddIrregular("curriculum", "curricula"); - _default.AddIrregular("database", "databases"); - _default.AddIrregular("zombie", "zombies"); - _default.AddIrregular("personnel", "personnel"); - //Fix #789 - _default.AddIrregular("cache", "caches"); + _default.AddSingular("s$", ""); + _default.AddSingular("(n)ews$", "$1ews"); + _default.AddSingular("([dti])a$", "$1um"); + _default.AddSingular("(analy|ba|diagno|parenthe|progno|synop|the|ellip|empha|neuro|oa|paraly)ses$", "$1sis"); + _default.AddSingular("([^f])ves$", "$1fe"); + _default.AddSingular("(hive)s$", "$1"); + _default.AddSingular("(tive)s$", "$1"); + _default.AddSingular("([lr]|hoo|lea|loa|thie)ves$", "$1f"); + _default.AddSingular("(^zomb)?([^aeiouy]|qu)ies$", "$2y"); + _default.AddSingular("(s)eries$", "$1eries"); + _default.AddSingular("(m)ovies$", "$1ovie"); + _default.AddSingular("(x|ch|ss|sh)es$", "$1"); + _default.AddSingular("(^[m|l])ice$", "$1ouse"); + _default.AddSingular("(? +/// A container for exceptions to simple pluralization/singularization rules. +/// Vocabularies.Default contains an extensive list of rules for US English. +/// At this time, multiple vocabularies and removing existing rules are not supported. +/// +internal class Vocabulary { + internal Vocabulary() + { + } + + readonly List plurals = []; + readonly List singulars = []; + readonly HashSet uncountables = new(StringComparer.CurrentCultureIgnoreCase); + readonly Regex letterS = new("^([sS])[sS]*$"); + /// - /// A container for exceptions to simple pluralization/singularization rules. - /// Vocabularies.Default contains an extensive list of rules for US English. - /// At this time, multiple vocabularies and removing existing rules are not supported. + /// Adds a word to the vocabulary which cannot easily be pluralized/singularized by RegEx, e.g. "person" and "people". /// - internal class Vocabulary + /// The singular form of the irregular word, e.g. "person". + /// The plural form of the irregular word, e.g. "people". + /// True to match these words on their own as well as at the end of longer words. False, otherwise. + public void AddIrregular(string singular, string plural, bool matchEnding = true) { - internal Vocabulary() + if (matchEnding) { + var singularSubstring = singular.Substring(1); + var pluralSubString = plural.Substring(1); + AddPlural($"({singular[0]}){singularSubstring}$", $"$1{pluralSubString}"); + AddSingular($"({plural[0]}){pluralSubString}$", $"$1{singularSubstring}"); } + else + { + AddPlural($"^{singular}$", plural); + AddSingular($"^{plural}$", singular); + } + } - private readonly List _plurals = new List(); - private readonly List _singulars = new List(); - private readonly List _uncountables = new List(); + /// + /// Adds an uncountable word to the vocabulary, e.g. "fish". Will be ignored when plurality is changed. + /// + /// Word to be added to the list of uncountables. + public void AddUncountable(string word) => + uncountables.Add(word); - /// - /// Adds a word to the vocabulary which cannot easily be pluralized/singularized by RegEx, e.g. "person" and "people". - /// - /// The singular form of the irregular word, e.g. "person". - /// The plural form of the irregular word, e.g. "people". - /// True to match these words on their own as well as at the end of longer words. False, otherwise. - public void AddIrregular(string singular, string plural, bool matchEnding = true) + /// + /// Adds a rule to the vocabulary that does not follow trivial rules for pluralization, e.g. "bus" -> "buses" + /// + /// RegEx to be matched, case insensitive, e.g. "(bus)es$" + /// RegEx replacement e.g. "$1" + public void AddPlural(string rule, string replacement) => + plurals.Add(new(rule, replacement)); + + /// + /// Adds a rule to the vocabulary that does not follow trivial rules for singularization, e.g. "vertices/indices -> "vertex/index" + /// + /// RegEx to be matched, case insensitive, e.g. ""(vert|ind)ices$"" + /// RegEx replacement e.g. "$1ex" + public void AddSingular(string rule, string replacement) => + singulars.Add(new(rule, replacement)); + + /// + /// Pluralizes the provided input considering irregular words + /// + /// Word to be pluralized + /// Normally you call Pluralize on singular words; but if you're unsure call it with false + [return: NotNullIfNotNull(nameof(word))] + public string? Pluralize(string? word, bool inputIsKnownToBeSingular = true) + { + if (word == null) { - if (matchEnding) - { - AddPlural("(" + singular[0] + ")" + singular.Substring(1) + "$", "$1" + plural.Substring(1)); - AddSingular("(" + plural[0] + ")" + plural.Substring(1) + "$", "$1" + singular.Substring(1)); - } - else - { - AddPlural($"^{singular}$", plural); - AddSingular($"^{plural}$", singular); - } + return null; } - /// - /// Adds an uncountable word to the vocabulary, e.g. "fish". Will be ignored when plurality is changed. - /// - /// Word to be added to the list of uncountables. - public void AddUncountable(string word) + var s = LetterS(word); + if (s != null) { - _uncountables.Add(word.ToLower()); + return s + "s"; } - /// - /// Adds a rule to the vocabulary that does not follow trivial rules for pluralization, e.g. "bus" -> "buses" - /// - /// RegEx to be matched, case insensitive, e.g. "(bus)es$" - /// RegEx replacement e.g. "$1" - public void AddPlural(string rule, string replacement) + var result = ApplyRules(plurals, word, false); + + if (inputIsKnownToBeSingular) { - _plurals.Add(new Rule(rule, replacement)); + return result ?? word; } - /// - /// Adds a rule to the vocabulary that does not follow trivial rules for singularization, e.g. "vertices/indices -> "vertex/index" - /// - /// RegEx to be matched, case insensitive, e.g. ""(vert|ind)ices$"" - /// RegEx replacement e.g. "$1ex" - public void AddSingular(string rule, string replacement) + var asSingular = ApplyRules(singulars, word, false); + var asSingularAsPlural = ApplyRules(plurals, asSingular, false); + if (asSingular != null && + asSingular != word && + asSingular + "s" != word && + asSingularAsPlural == word && + result != word) { - _singulars.Add(new Rule(rule, replacement)); + return word; } - /// - /// Pluralizes the provided input considering irregular words - /// - /// Word to be pluralized - /// Normally you call Pluralize on singular words; but if you're unsure call it with false - /// - public string Pluralize(string word, bool inputIsKnownToBeSingular = true) - { - var result = ApplyRules(_plurals, word, false); + return result!; + } - if (inputIsKnownToBeSingular) - { - return result ?? word; - } + /// + /// Singularizes the provided input considering irregular words + /// + /// Word to be singularized + /// Normally you call Singularize on plural words; but if you're unsure call it with false + /// Skip singularizing single words that have an 's' on the end + [return: NotNullIfNotNull(nameof(word))] + public string? Singularize(string? word, bool inputIsKnownToBePlural = true, bool skipSimpleWords = false) + { + if (word == null) + { + return null; + } + var s = LetterS(word); + if (s != null) + { + return s; + } - var asSingular = ApplyRules(_singulars, word, false); - var asSingularAsPlural = ApplyRules(_plurals, asSingular, false); - if (asSingular != null && asSingular != word && asSingular + "s" != word && asSingularAsPlural == word && result != word) - { - return word; - } + var result = ApplyRules(singulars, word, skipSimpleWords); - return result; + if (inputIsKnownToBePlural) + { + return result ?? word; } - /// - /// Singularizes the provided input considering irregular words - /// - /// Word to be singularized - /// Normally you call Singularize on plural words; but if you're unsure call it with false - /// Skip singularizing single words that have an 's' on the end - /// - public string Singularize(string word, bool inputIsKnownToBePlural = true, bool skipSimpleWords = false) + // the Plurality is unknown so we should check all possibilities + var asPlural = ApplyRules(plurals, word, false); + if (asPlural == word || + word + "s" == asPlural) { - var result = ApplyRules(_singulars, word, skipSimpleWords); - - if (inputIsKnownToBePlural) - { - return result ?? word; - } - - // the Plurality is unknown so we should check all possibilities - var asPlural = ApplyRules(_plurals, word, false); - var asPluralAsSingular = ApplyRules(_singulars, asPlural, false); - if (asPlural != word && word + "s" != asPlural && asPluralAsSingular == word && result != word) - { - return word; - } + return result ?? word; + } + var asPluralAsSingular = ApplyRules(singulars, asPlural, false); + if (asPluralAsSingular != word || + result == word) + { return result ?? word; } - private string ApplyRules(IList rules, string word, bool skipFirstRule) + return word; + } + + string? ApplyRules(IList rules, string? word, bool skipFirstRule) + { + if (word == null) { - if (word == null) - { - return null; - } + return null; + } - if (word.Length < 1) - { - return word; - } + if (word.Length < 1) + { + return word; + } - if (IsUncountable(word)) - { - return word; - } + if (IsUncountable(word)) + { + return word; + } - var result = word; - var end = skipFirstRule ? 1 : 0; - for (var i = rules.Count - 1; i >= end; i--) + var result = word; + var end = skipFirstRule ? 1 : 0; + for (var i = rules.Count - 1; i >= end; i--) + { + if ((result = rules[i].Apply(word)) != null) { - if ((result = rules[i].Apply(word)) != null) - { - break; - } + break; } - return result; } - private bool IsUncountable(string word) + if (result == null) { - return _uncountables.Contains(word.ToLower()); + return null; } - private class Rule - { - private readonly Regex _regex; - private readonly string _replacement; + return MatchUpperCase(word, result); + } - public Rule(string pattern, string replacement) - { - _regex = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptions.Compiled); - _replacement = replacement; - } + bool IsUncountable(string word) => + uncountables.Contains(word); - public string Apply(string word) - { - if (!_regex.IsMatch(word)) - { - return null; - } + static string MatchUpperCase(string word, string replacement) => + char.IsUpper(word[0]) && + char.IsLower(replacement[0]) ? StringHumanizeExtensions.Concat(char.ToUpper(replacement[0]), replacement.AsSpan(1)) : replacement; + + /// + /// If the word is the letter s, singular or plural, return the letter s singular + /// + string? LetterS(string word) + { + var s = letterS.Match(word); + return s.Groups.Count > 1 ? s.Groups[1].Value : null; + } + + class Rule(string pattern, string replacement) + { + readonly Regex regex = new(pattern, RegexOptions.IgnoreCase | RegexOptions.Compiled); - return _regex.Replace(word, _replacement); + public string? Apply(string word) + { + if (!regex.IsMatch(word)) + { + return null; } + + return regex.Replace(word, replacement); } } } \ No newline at end of file diff --git a/ICSharpCode.Decompiler/ICSharpCode.Decompiler.csproj b/ICSharpCode.Decompiler/ICSharpCode.Decompiler.csproj index 15d677535..cf44cc470 100644 --- a/ICSharpCode.Decompiler/ICSharpCode.Decompiler.csproj +++ b/ICSharpCode.Decompiler/ICSharpCode.Decompiler.csproj @@ -108,6 +108,7 @@ +