Lately at work I’ve had to pull data from a variety of Excel worksheets. Excel interop is quite handy, but making sure to release the used COM objects and dealing with ranges can be quite a pain.
I found a nice little article that shows a basic example of loading all the data from a worksheet and returning it as an IEnumerable using the yield
keyword.
I’ve enhanced the example to open the workbook as read-only and to properly release COM objects.
[csharp]
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
using Microsoft.Office.Interop.Excel;
public static class ExcelIterator
{
/// <summary>Loads rows from an Excel spreadsheet so they can be queried using LINQ.</summary>
/// <paramref name="sheetIndex">Sheet to load. (1-based)</paramref>
public static IEnumerable<List<dynamic>> GetExcelRowEnumerator(string fileName, int sheetIndex)
{
// declare an array to hold our values
object[,] valueArray = null;
// create the COM reference to Excel
var excel = new Application();
Workbooks workbooks = null;
Workbook workBook = null;
Sheets sheets = null;
try
{
// open the workbook by filename
workbooks = excel.Workbooks;
workBook = workbooks.Open(fileName, ReadOnly:true);
sheets = workBook.Sheets;
if (workBook != null)
{
if (sheetIndex <= 0 || sheetIndex > sheets.Count) throw new ArgumentOutOfRangeException("sheetIndex");
Worksheet sheet = null;
Range usedRange = null;
try
{
// get the worksheet, 1 for the first, etc.
sheet = sheets[sheetIndex];
// find the used range in the sheet
usedRange = sheet.UsedRange;
// read in the value array, this is the fastest
// way to get all values in one hit.
valueArray = usedRange.get_Value(XlRangeValueDataType.xlRangeValueDefault);
}
finally
{
if (sheet != null)
{
Marshal.ReleaseComObject(sheet);
sheet = null;
}
if (usedRange != null)
{
Marshal.ReleaseComObject(usedRange);
usedRange = null;
}
}
}
else
{
yield break;
}
}
finally
{
if (workBook != null)
{
workBook.Close(false, fileName);
Marshal.ReleaseComObject(workBook);
workBook = null;
}
if (workbooks != null)
{
Marshal.ReleaseComObject(workbooks);
workbooks = null;
}
if (sheets != null)
{
Marshal.ReleaseComObject(sheets);
sheets = null;
}
// finished with Excel now, close.
excel.Quit();
Marshal.ReleaseComObject(excel);
excel = null;
}
// build and yield each row at a time
for (int rowIndex = 1; rowIndex <= valueArray.GetLength(0); rowIndex++)
{
List<dynamic> row = new List<dynamic>(valueArray.GetLength(1));
// build a list of column values for the row
for (int colIndex = 1; colIndex <= valueArray.GetLength(1); colIndex++)
{
row.Add(valueArray[rowIndex, colIndex]);
}
yield return row;
}
}
}
[/csharp]
The iterator returns one row at a time. Here’s an example of getting cost information from a spreadsheet with integer item numbers, vendor names, and a decimal cost value.
[csharp]
public static Dictionary<int, Dictionary<string, decimal>> GetItemContractTypes()
{
const string vendorXls = @"C:\Dev\Data\Vendor\VendorInfo.xlsm";
var rows = ExcelIterator.GetExcelRowEnumerator(vendorXls, 1);
const int partCol = 0;
const int vendorCol = 1;
const int costCol = 2;
return rows
.Skip(1) // skip header row
.Where(r => r[partCol] != null && r[vendorCol] != null && r[costCol] != null)
.Select(r => new
{
Part = (int)Convert.ToInt32(r[partCol]),
Vendor = (string)r[vendorCol],
Cost = (decimal)Convert.ToDecimal(r[costCol])
})
.GroupBy(a => a.Part)
.ToDictionary(g => g.First().Part,
g => g.ToDictionary(a => a.Vendor, a => a.Cost));
}
[/csharp]
Beautiful, thank you! Very nicely written solution.