Skip to main content

Split a string on whole words

In case you've ever needed an extension method that splits a string on whole words, here are my implementation.

public static class StringExtensions
{
    /// <summary>
    /// If we only allow complete words the right edge might be a bit ugly.
    /// Instead we accept that words are broken if cut has right egde would
    /// move more than MaxStringSplitOffset characters for the cut to exist.
    /// </summary>
    private const int MaxStringSplitOffset = 4;

/// &lt;summary&gt;
/// Splits a string on whole words.
/// &lt;/summary&gt;
/// &lt;param name=&quot;input&quot;&gt;The input.&lt;/param&gt;
/// &lt;param name=&quot;partitionSize&quot;&gt;Size of the partition.&lt;/param&gt;
/// &lt;returns&gt;An array of strings not exceeding the partition size&lt;/returns&gt;
public static string[] SplitOnWholeWords(this string input, int partitionSize)
{
    if (input == null)
    {
        throw new ArgumentNullException(&quot;input&quot;);
    }
    if (partitionSize &lt;= 0)
    {
        throw new ArgumentOutOfRangeException(&quot;partitionSize&quot;, &quot;partitionSize must be larger than 0&quot;);
    }
    Contract.EndContractBlock();

    var partitioned = new List&lt;string&gt;();

    var rest = input;
    while (rest.Length &gt; partitionSize)
    {
        var part = rest.Substring(0, partitionSize);
        var cutIndex = part.LastIndexOf(&#39; &#39;);

        /* For those cases where next character is &#39; &#39; */
        if (rest[partitionSize] == &#39; &#39;)
        {
            cutIndex = partitionSize;
        }   

        /* No space found */
        if (cutIndex == -1)
        {
            rest = rest.Substring(partitionSize);
        }
        else if (cutIndex &lt; partitionSize - MaxStringSplitOffset)
        {
            const int PushCharactersToNextRow = 2;

            /* Remove add a dash to the end of the string */
            part = part.Substring(0, part.Length - PushCharactersToNextRow) + &quot;-&quot;;

            /* Remove part from the rest */
            rest = rest.Substring(part.Length - 1);
        }
        else
        {
            /* Refine cut */
            part = part.Substring(0, cutIndex);

            /* Remove part from the rest (including the space) */
            rest = rest.Substring(cutIndex + 1);
        }

        partitioned.Add(part);
    }

    partitioned.Add(rest);
    return partitioned.ToArray();
}

}

And some tests to prove that it works.

[TestFixture]
public class SplitOnWholeWords
{
    // Split between "fox" and "jumps"
    [TestCase("A quick brown fox jumps over the dog", "A quick brown fox", "jumps over the dog")]

// Split will exceed max allowed characters moving to the left
[TestCase(&quot;My lover say gregarious, as I stand over her&quot;, &quot;My lover say grega-&quot;, &quot;rious, as I stand&quot;)] 

// Split right before the space
[TestCase(&quot;All of your base are belong to us&quot;, &quot;All of your base are&quot;, &quot;belong to us&quot;)]

// Split right after the space
[TestCase(&quot;##All your base are belong to us&quot;, &quot;##All your base are&quot;, &quot;belong to us&quot;)]
public void ShouldSplitStringInTwo(string original, string expectedLine1, string expectedLine2)
{
    /* Test */
    var partitions = original.SplitOnWholeWords(20);

    /* Assert */
    Assert.That(partitions[0], Is.EqualTo(expectedLine1));
    Assert.That(partitions[1], Is.EqualTo(expectedLine2));
}

[Test]
public void ShouldHandleWhereInputAndPartitionSizeAreTheSame()
{
    /* Setup */
    const string Input = &quot;Hello World!&quot;;

    /* Test */
    var partitions = Input.SplitOnWholeWords(Input.Length);

    /* Assert */
    Assert.That(partitions.Length, Is.EqualTo(1));
}

[Test]
public void ShouldNotTryToSplitWhereThereAreNoSpaces()
{
    /* Setup */
    const string Input = &quot;TheQuick BrownFoxJumpsOverTheLazyDog&quot;;

    /* Test */
    var partitions = Input.SplitOnWholeWords(10);

    /* Assert */
    Assert.That(partitions[1], Is.EqualTo(&quot;BrownFoxJu&quot;));
    Assert.That(partitions[2], Is.EqualTo(&quot;mpsOverThe&quot;));
    Assert.That(partitions[3], Is.EqualTo(&quot;LazyDog&quot;));
}

[Test]
public void CannotSplitNullInputArgument()
{
    /* Test */
    TestDelegate code = () =&gt; ((string) null).SplitOnWholeWords(10);

    /* Assert */
    Assert.Throws&lt;ArgumentNullException&gt;(code);
}

[Test]
public void CannotSplitWhenPartitionSizeIsNotPositiveNumber()
{
    /* Test */
    TestDelegate code = () =&gt; &quot;Hello World!&quot;.SplitOnWholeWords(0);

    /* Assert */
    Assert.Throws&lt;ArgumentOutOfRangeException&gt;(code);
}

}

Happy coding!

comments powered by Disqus