Login


Extending System.String

By Jonathan Wood on 7/11/2014
Language: C#
Technology: .NETLINQ
Platform: Windows
License: CPOL
Views: 7,141
Frameworks & Libraries » LINQ » General » Extending System.String

Download the Demo Project Download the Demo Project

Introduction

I've written a number of articles about extension methods. To summarize, extension methods allow you to add methods to an existing class without modifying that existing class.

Extension methods were used to implement the LINQ functions, which are available for all IEnumerable types.

So I thought it might be interesting to use extension methods to extend a common class: System.String. Of course, the existing String class already has a lot of functionality, so it requires a bit of thought about what would be interesting to add. But I came up with 23 extension methods, which I'll present in this article.

Presenting the Code

Listing 1 shows my StringExtensions class. In order to implement an extension method, it must be a member of a static class, which StringExtensions is.

To implement an extension method in this class, you must declare the method as static, and specify the this keyword for the first parameter. This makes the method an extension method for the type used to declare the parameter with the this keyword. For all the extension methods in Listing 1, this type is System.String.

As long as the namespace that contains your extension method is visible, that method will appear as a method for all variables of that type. Intellisense will even list it as an available method.

Listing 1: The StringExtensions Class

static class StringExtensions
{
    /// <summary>
    /// Returns this string, or an empty string if this string is null.
    /// </summary>
    public static string EmptyIfNull(this string s)
    {
        return (s == null) ? String.Empty : s;
    }

    /// <summary>
    /// Returns this string, or null if this string is empty.
    /// </summary>
    public static string NullIfEmpty(this string s)
    {
        return (s.Length == 0) ? null : s;
    }

    /// <summary>
    /// Returns this string, or an empty string if this string is null
    /// or contains only whitespace. Any leading or trailing whitespace
    /// characters are trimmed.
    /// </summary>
    public static string EmptyIfNullOrWhiteSpace(this string s)
    {
        return String.IsNullOrWhiteSpace(s) ? String.Empty : s.Trim();
    }

    /// <summary>
    /// Returns this string, or null if this string is null or contains
    /// only whitespace. Any leading or trailing whitespace characters
    /// are trimmed.
    /// </summary>
    public static string NullIfEmptyOrWhiteSpace(this string s)
    {
        return String.IsNullOrWhiteSpace(s) ? null : s.Trim();
    }

    /// <summary>
    /// Returns true if this string contains the specified character.
    /// </summary>
    /// <param name="c">Character to find</param>
    public static bool Contains(this string s, char c)
    {
        return (s.IndexOf(c) >= 0);
    }

    /// <summary>
    /// Returns true if this string contains any of the characters
    /// in the specified string.
    /// </summary>
    /// <param name="s2">String of characters to find</param>
    public static bool ContainsAny(this string s, string s2)
    {
        return (s.IndexOfAny(s2.ToCharArray()) >= 0);
    }

    /// <summary>
    /// Returns a copy of this string with the order of the characters
    /// reversed.
    /// </summary>
    public static string Reverse(this string s)
    {
        return new string(s.ToCharArray().Reverse().ToArray());
    }

    /// <summary>
    /// Returns a copy of this string with the characters sorted.
    /// </summary>
    public static string Sort(this string s)
    {
        return new string(s.ToCharArray().OrderBy(c => c).ToArray());
    }

    /// <summary>
    /// Returns a string with the distinct (no duplicates) characters
    /// from this string.
    /// </summary>
    public static string Distinct(this string s)
    {
        return new String(s.ToCharArray().Distinct().ToArray());
    }

    /// <summary>
    /// Returns the average character value of this string.
    /// </summary>
    public static char Average(this string s)
    {
        return (char)s.ToCharArray().Average(c => c);
    }

    /// <summary>
    /// Returns a string that contains the union of all characters in this
    /// string as well as the specified string.
    /// </summary>
    /// <param name="s2">String of characters to be combined with the first
    /// string</param>
    public static string Union(this string s, string s2)
    {
        return new string(s.ToCharArray().Union(s2.ToCharArray()).ToArray());
    }

    /// <summary>
    /// Returns a string that contains all the characters in this string that
    /// are also in the specified string.
    /// </summary>
    /// <param name="s2">String of characters whose characters will be in the
    /// result if they are also in the first string</param>
    public static string Intersect(this string s, string s2)
    {
        return new string(s.ToCharArray().Intersect(s2.ToCharArray()).ToArray());
    }

    /// <summary>
    /// Returns a copy of this string with all characters that appear in the
    /// specified string removed.
    /// </summary>
    /// <param name="s2">A string of characters that should be removed</param>
    public static string Except(this string s, string s2)
    {
        return new string(s.ToCharArray().Except(s2.ToCharArray()).ToArray());
    }

    /// <summary>
    /// Returns a copy of this string with all characters where the predicate
    /// returns true removed.
    /// </summary>
    /// <param name="predicate">Predicate to return true for characters that
    /// should be removed</param>
    public static string Except(this string s, Func<char, bool> predicate)
    {
        StringBuilder sb = new StringBuilder();
        foreach (char c in s)
        {
            if (!predicate(c))
                sb.Append(c);
        }
        return sb.ToString();
    }

    /// <summary>
    /// Returns true if this string's value can be interpreted as "true".
    /// </summary>
    public static bool IsTrue(this string s)
    {
        s = s.Trim().ToLower();
        return (s == "true" ||
            s == "t" ||
            s == "yes" ||
            s == "y" ||
            s == "on" ||
            s == "1");
    }

    /// <summary>
    /// Returns true if all characters in this string represent a valid
    /// integer value.
    /// </summary>
    public static bool IsInteger(this string s)
    {
        return s.ToCharArray().All(c => Char.IsDigit(c));
    }

    /// <summary>
    /// Returns true if all characters in this string represent a valid
    /// numeric value.
    /// </summary>
    public static bool IsNumber(this string s)
    {
        char[] arr = s.ToCharArray();
        return (arr.All(c => Char.IsDigit(c) || c == '.') &&
            arr.Count(c => c == '.') <= 1);
    }

    /// <summary>
    /// Returns a copy of this string with all characters removed
    /// that are not part of a valid integer value.
    /// </summary>
    public static string StripNonIntegerChars(this string s)
    {
        return s.Except(c => !Char.IsDigit(c));
    }

    /// <summary>
    /// Returns a copy of this string with all characters removes
    /// that are not part of a valid numeric value.
    /// </summary>
    public static string StripNonNumberChars(this string s)
    {
        return s.Except(c => !Char.IsDigit(c) && c != '.');
    }

    /// <summary>
    /// Splits this string into an array of string tokens.
    /// </summary>
    /// <param name="delimiterChars">Characters that delimit tokens</param>
    public static string[] Tokenize(this string s, string delimiterChars)
    {
        return s.Tokenize(c => delimiterChars.Contains(c));
    }

    /// <summary>
    /// Splits this string into an array of string tokens.
    /// </summary>
    /// <param name="predicate">Delegate to return true for characters
    /// that delimite tokens</param>
    public static string[] Tokenize(this string s, Func<char, bool> predicate)
    {
        List<string> tokens = new List<string>();
        int pos = 0;

        while (pos < s.Length)
        {
            while (pos < s.Length && predicate(s[pos]))
                pos++;
            int start = pos;
            while (pos < s.Length && !predicate(s[pos]))
                pos++;
            if (pos > start)
                tokens.Add(s.Substring(start, pos - start));
        }
        return tokens.ToArray();
    }

    /// <summary>
    /// Returns a copy of this string, truncated to the specified length.
    /// </summary>
    /// <param name="maxLength">Maximum string length</param>
    /// <param name="trimEnd">If true, and trailing spaces or partial words
    /// are removed from the shortened string</param>
    /// <param name="showEllipsis">If true, &quot;...&quot; is appended to
    /// the shortened string</param>
    public static string LimitLength(this string s, int maxLength,
        bool trimEnd = true, bool showEllipsis = true)
    {
        if (s.Length > maxLength)
        {
            int len = maxLength;
            if (trimEnd)
            {
                while (len > 0 && IsWordChar(s[len]))
                    len--;
                while (len > 0 && Char.IsWhiteSpace(s[len - 1]))
                    len--;
                if (len == 0)
                    len = maxLength;
            }
            s = s.Substring(0, len);
            if (showEllipsis)
                s += "...";
        }
        return s;
    }

    /// <summary>
    /// Returns a copy of this string with title case.
    /// </summary>
    public static string TitleCase(this string s)
    {
        StringBuilder result = new StringBuilder();
        StringBuilder currWord = new StringBuilder();
        bool inWord = false;
        int wordNumber = 0;

        foreach (char c in s)
        {
            if (IsWordChar(c))
            {
                // In a word
                if (!inWord)
                {
                    wordNumber++;
                    inWord = true;
                    currWord.Clear();
                }
                currWord.Append(c);
            }
            else
            {
                // Not in a word
                if (inWord)
                {
                    result.Append(SetTitleWordCase(currWord.ToString(), wordNumber));
                    inWord = false;
                }
                result.Append(c);
                if (IsEndOfSentencePuncuation(c))
                    wordNumber = 0;
            }
        }
        if (inWord)
            result.Append(SetTitleWordCase(currWord.ToString(), ++wordNumber));
        return result.ToString();
    }

    private static bool IsWordChar(char c)
    {
        return (Char.IsLetterOrDigit(c) || c == '\'');
    }

    private static bool IsEndOfSentencePuncuation(char c)
    {
        return (c == '.' || c == '!' || c == '?' || c == ':');
    }

    /// <summary>
    /// Sets title on a single word.
    /// </summary>
    /// <remarks>
    /// It turns out, the rules for title case are not that well defined.
    /// Different writers have their own style. Here are some common rules.
    /// (Note all rules are implemented here.)
    /// 
    /// 1. Always capitalize the first and the last word.
    /// 2. Capitalize all nouns, pronouns, adjectives, verbs, adverbs, and
    ///    subordinate conjunctions ("as", "because", "although", "if",
    ///    etc.).
    /// 3. Lowercase all articles, coordinate conjunctions ("and", "or",
    ///    "nor"), and prepositions regardless of length, when they are
    ///    other than the first or last word. (Note: NIVA prefers to
    ///    capitalize prepositions of five characters or more ("after",
    ///    "among", "between").)
    /// 4. Lowercase the "to" in an infinitive.
    /// </remarks>
    private static string SetTitleWordCase(string word, int wordNumber)
    {
        string[] seperatorWords = new string[]
        {
            "a",
            "about",
            "after",
            "an",
            "and",
            "are",
            "around",
            "as",
            "at",
            "be",
            "before",
            "but",
            "by",
            "else",
            "for",
            "from",
            "how",
            "if",
            "in",
            "is",
            "into",
            "nor",
            "of",
            "on",
            "or",
            "over",
            "that",
            "the",
            "then",
            "through",
            "to",
            "under",
            "when",
            "where",
            "why",
            "with"
        };

        Debug.Assert(word.Length > 0);
        word = word.ToLower();
        if (wordNumber == 1 || !seperatorWords.Contains(word))
            word = word.Substring(0, 1).ToUpper() + word.Substring(1);
        return word;
    }
}

Reviewing the Methods

As you can see, my StringExtensions contains a number of extension methods. Many are trivial and don't require any explanation. But in this section, I'll provide an overview of them, and try to touch on any interesting considerations.

The first few methods (EmptyIfNull, NullIfEmpty, EmptyIfNullOrWhiteSpace, and NullIfEmptyOrWhiteSpace) are related to null and empty strings.

For example, if you are going to print a string that could be null, you might want to print s.EmptyIfNull() to guard against trying to print null. These extension methods also help for the reverse case where you want to have null if the string is empty.

The Contains and ContainsAny methods simply tell you if a string contains a character, or if it contains any of the characters from another string. These methods simply defer to System.String.IndexOf and System.String.IndexOfAny. Although these extension methods discard the information about the location of a character that exists in the string, they provide a shorthand for when you only want to know whether or not the string contains the character or characters.

The next several extension methods (Reverse, Sort, Distinct, Average, Union, and Except) are interesting because they all defer to the corresponding LINQ methods. These extension methods convert the string to a char array. In C#, arrays implement IEnumerable and so all the LINQ methods are then available. After the action is performed, the array is converted back to a string.

I made an overload of Except that does not defer to LINQ. This version accepts a predicate, which provides a bit more flexibility about which characters should be eliminated from the results. So, for example, you could remove all non-numeric characters from a string using s.Except(c => !Char.IsDigit(c) && c != '.').

The IsTrue extension method determines if the string can be interpreted as true. This could be handy, for example, when accepting user input. The user might not type "true"--they might type "yes" instead. This method provides a shorthand way to evaluate such a string and even considers "y", "on" and "1" as true values.

The next four extension methods (IsInteger, IsNumber, StripNonIntegerChars, and StripNonNumberChars) are designed for working with strings that contain numbers. The most useful case I can think of is where a user inputs a number like "1,000". In order to parse this string using int.Parse, you might want to use s.StripNonIntegerChars to remove the comma.

The next extension method, Tokenize, works much like String.Split. I wanted to provide a little bit more flexibility. I added an overload of this method that takes a predicate to indicate which characters should be interpreted as delimiter characters.

The LimitLength extension method is simple enough: it truncates the string if it's longer than the specified length. However, it contains some additional options. If requested, it will trim off any whitespace or partial words, and it can also append ellipsis (...) to truncated strings.

Finally, my TitleCase extension method will convert the current string to title case. When researching this, I found there were some complex and not very consistent rules about when characters should be capitalized in a title. But my version works pretty good for most cases. You can review my remarks in the code for ideas to enhance this functionality.

Testing the Extension Methods

Of course, I needed to test all my extension methods to ensure they produced the desired results. So I wrote a small test program, which is shown in Listing 2.

Not only does this code test the extension methods, but it also provides a type of documentation to the exact behavior of those methods. You can download my full project if you want to play with this further.

Listing 2: Test Program for the StringExtensions Class

static void Main(string[] args)
{
    TestIfTrue("NullIfEmpty", "".NullIfEmpty() == null);
    TestIfTrue("NullIfEmpty", " ".NullIfEmpty() == " ");
    TestIfTrue("EmptyIfNull", ((string)null).EmptyIfNull() == "");
    TestIfTrue("EmptyIfNull", "".EmptyIfNull() == "");
    TestIfTrue("EmptyIfNullOrWhiteSpace", " abc ".EmptyIfNullOrWhiteSpace() == "abc");
    TestIfTrue("EmptyIfNullOrWhiteSpace", ((string)null).EmptyIfNullOrWhiteSpace() == "");
    TestIfTrue("NullIfEmptyOrWhiteSpace", " abc ".NullIfEmptyOrWhiteSpace() == "abc");
    TestIfTrue("NullIfEmptyOrWhiteSpace", "  ".NullIfEmptyOrWhiteSpace() == null);
    TestIfTrue("Contains", "abc".Contains('b'));
    TestIfTrue("Contains", !"abc".Contains('z'));
    TestIfTrue("ContainsAny", "abc".ContainsAny("cde"));
    TestIfTrue("ContainsAny", !"abc".ContainsAny("def"));
    TestIfTrue("Reverse", "abc".Reverse() == "cba");
    TestIfTrue("Sort", "cba".Sort() == "abc");
    TestIfTrue("Distinct", "aabbcc".Distinct() == "abc");
    TestIfTrue("Average", "abc".Average() == 'b');
    TestIfTrue("Union", "abc".Union("cde") == "abcde");
    TestIfTrue("Intersect", "abc".Intersect("bcd") == "bc");
    TestIfTrue("Except", "abc".Except("cde") == "ab");
    TestIfTrue("IsTrue", "".IsTrue() == false);
    TestIfTrue("IsTrue", "abc".IsTrue() == false);
    TestIfTrue("IsTrue", "true".IsTrue() == true);
    TestIfTrue("IsTrue", "yes".IsTrue() == true);
    TestIfTrue("StripNonIntegerChars", "1,0.0".StripNonIntegerChars() == "100");
    TestIfTrue("StripNonNumberChars", "1,0.0".StripNonNumberChars() == "10.0");
    TestIfTrue("Tokenize", ArraysMatch("abcdefg".Tokenize("cf"),
        new string[] { "ab", "de", "g" }));
    TestIfTrue("Tokenize", ArraysMatch("abc def\tghi\r\njkl"
        .Tokenize(c => Char.IsWhiteSpace(c)),
        new string[] { "abc", "def", "ghi", "jkl" }));
    TestIfTrue("LimitLength", "abcdef ghi".LimitLength(5, false, false) == "abcde");
    TestIfTrue("LimitLength", "abcdef ghi".LimitLength(5, true, true) == "abcde...");
    TestIfTrue("LimitLength", "abcdef ghi".LimitLength(8, false, false) == "abcdef g");
    TestIfTrue("LimitLength", "abcdef ghi".LimitLength(8, true, true) == "abcdef...");
    TestIfTrue("TitleCase", "this is a test".TitleCase() == "This is a Test");

    Console.ReadKey();
}

static bool ArraysMatch(string[] arr1, string[] arr2)
{
    if (arr1.Length != arr2.Length)
        return false;
    for (int i = 0; i < arr1.Length; i++)
        if (arr1[i] != arr2[i])
            return false;
    return true;
}

static void TestIfTrue(string operation, bool value)
{
    if (!value)
    {
        Console.Write("TEST FAILED: ");
        Console.WriteLine("Test Failed for {0}", operation);
    }
}

Conclusion

For the most part, I wrote this project because I found it interesting. It remains to be seen how many of these methods will actually be useful.

Either way, there you have it. I hope someone finds it helpful.

End-User License

Use of this article and any related source code or other files is governed by the terms and conditions of The Code Project Open License.

Author Information

Jonathan Wood

I'm a software/website developer working out of the greater Salt Lake City area in Utah. I've developed many websites including Black Belt Coder, Insider Articles, and others.