Query Excel File Using LINQ

Lately at work I’ve had to pull data from a variety of Excel worksheets. Excel interop is quite handy, but making sure to release the used COM objects and dealing with ranges can be quite a pain.

I found a nice little article that shows a basic example of loading all the data from a worksheet and returning it as an IEnumerable using the yield keyword.

I’ve enhanced the example to open the workbook as read-only and to properly release COM objects.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
using Microsoft.Office.Interop.Excel;

public static class ExcelIterator
{

<pre><code>/// &lt;summary&gt;Loads rows from an Excel spreadsheet so they can be queried using LINQ.&lt;/summary&gt;
/// &lt;paramref name="sheetIndex"&gt;Sheet to load. (1-based)&lt;/paramref&gt;
public static IEnumerable&lt;List&lt;dynamic&gt;&gt; GetExcelRowEnumerator(string fileName, int sheetIndex)
{
    // declare an array to hold our values
    object[,] valueArray = null;

    // create the COM reference to Excel
    var excel = new Application();
    Workbooks workbooks = null;
    Workbook workBook = null;
    Sheets sheets = null;

    try
    {

        // open the workbook by filename
        workbooks = excel.Workbooks;
        workBook = workbooks.Open(fileName, ReadOnly:true);

        sheets = workBook.Sheets;

        if (workBook != null)
        {

            if (sheetIndex &lt;= 0 || sheetIndex &gt; sheets.Count) throw new ArgumentOutOfRangeException("sheetIndex");

            Worksheet sheet = null;
            Range usedRange = null;

            try
            {
                // get the worksheet, 1 for the first, etc.
                sheet = sheets[sheetIndex];

                // find the used range in the sheet
                usedRange = sheet.UsedRange;

                // read in the value array, this is the fastest
                // way to get all values in one hit.
                valueArray = usedRange.get_Value(XlRangeValueDataType.xlRangeValueDefault);
            }
            finally
            {

                if (sheet != null)
                {
                    Marshal.ReleaseComObject(sheet);
                    sheet = null;
                }
                if (usedRange != null)
                {
                    Marshal.ReleaseComObject(usedRange);
                    usedRange = null;
                }

            }

        }
        else
        {
            yield break;
        }

    }
    finally
    {

        if (workBook != null)
        {
            workBook.Close(false, fileName);
            Marshal.ReleaseComObject(workBook);
            workBook = null;
        }
        if (workbooks != null)
        {
            Marshal.ReleaseComObject(workbooks);
            workbooks = null;
        }
        if (sheets != null)
        {
            Marshal.ReleaseComObject(sheets);
            sheets = null;
        }

        // finished with Excel now, close.
        excel.Quit();
        Marshal.ReleaseComObject(excel);
        excel = null;

    }

    // build and yield each row at a time
    for (int rowIndex = 1; rowIndex &lt;= valueArray.GetLength(0); rowIndex++)
    {
        List&lt;dynamic&gt; row = new List&lt;dynamic&gt;(valueArray.GetLength(1));

        // build a list of column values for the row
        for (int colIndex = 1; colIndex &lt;= valueArray.GetLength(1); colIndex++)
        {

            row.Add(valueArray[rowIndex, colIndex]);
        }

        yield return row;
    }

}
</code></pre>

}

The iterator returns one row at a time. Here’s an example of getting cost information from a spreadsheet with integer item numbers, vendor names, and a decimal cost value.

public static Dictionary<int, Dictionary<string, decimal>> GetItemContractTypes()
{

<pre><code>const string vendorXls = @"C:\Dev\Data\Vendor\VendorInfo.xlsm";

var rows = ExcelIterator.GetExcelRowEnumerator(vendorXls, 1);

const int partCol = 0;
const int vendorCol = 1;
const int costCol = 2;

return rows
    .Skip(1)  // skip header row
    .Where(r => r[partCol] != null &amp;&amp; r[vendorCol] != null &amp;&amp; r[costCol] != null)
    .Select(r => new
        {
            Part = (int)Convert.ToInt32(r[partCol]),
            Vendor = (string)r[vendorCol],
            Cost = (decimal)Convert.ToDecimal(r[costCol])
        })
    .GroupBy(a => a.Part)
    .ToDictionary(g => g.First().Part,
                  g => g.ToDictionary(a => a.Vendor, a => a.Cost));
</code></pre>

}

One thought on “Query Excel File Using LINQ

Leave a Reply

Your email address will not be published. Required fields are marked *