受欢迎的博客标签

用Aspose.Words 从Word文档中提取表格数据

Published

用Aspose.Words来创建和操作Word文档。

以C#创建一个类似的表格模型从而稍后当读取文档的时候我们可以用上它。 如下所示,你可以看到创建的名为WordDocumentTable的类,带有三个属性:TableID,RowID和ColumnID,如之前所说的,我们没有支持TableID/RowIDs,这些属性仅仅暗示着Word文档的位置。开始索引假定为0。    

public class WordDocumentTable
{ 
    public WordDocumentTable(int PiTableID) 
    {  
        MiTableID = PiTableID; 
    }

    public WordDocumentTable(int PiTableID, int PiColumnID) 
    {  
        MiTableID = PiTableID;  
        MiColumnID = PiColumnID; 
    }

    public WordDocumentTable(int PiTableID, int PiColumnID, int PiRowID) 
    {  
        MiTableID = PiTableID;  
        MiColumnID = PiColumnID;  
        MiRowID = PiRowID; 
    }

    private int MiTableID = 0;

    public int TableID 
    {  
        get { return MiTableID; }  
        set { MiTableID = value; } 
    }        

    private int MiRowID = 0;    
    public int RowID 
    {  
        get { return MiRowID; }  
        set { MiRowID = value; } 
    }

    private int MiColumnID = 0;    
    public int ColumnID 
    {  
        get { return MiColumnID; }  
        set { MiColumnID = value; } 
    }
}

现在来到提取环节。如下所示,你将看到我想要从文档中读取的表格单元格的集。 

private List<WordDocumentTable> WordDocumentTables
{  
    get  
    {    
        List<WordDocumentTable> wordDocTable = new List<WordDocumentTable>();      
        //Reads the data from the first Table of the document.    
        wordDocTable.Add(new WordDocumentTable(0));      
        //Reads the data from the second table and its second column. 
        //This table has only one row.    
        wordDocTable.Add(new WordDocumentTable(1, 1));      
        //Reads the data from third table, second row and second cell.    
        wordDocTable.Add(new WordDocumentTable(2, 1, 1));  
        return wordDocTable;  
    }
}

下面是从基于表格、行和单元格的Aspose.Words文档提取数据。

public void ExtractTableData(byte[] PobjData)
{          
    using (MemoryStream LobjStream = new MemoryStream(PobjData)) 
    {  
        Document LobjAsposeDocument = new Document(LobjStream);     
        foreach(WordDocumentTable wordDocTable in WordDocumentTables)  
        {   
            Aspose.Words.Tables.Table table = (Aspose.Words.Tables.Table)
            LobjAsposeDocument.GetChild
            (NodeType.Table, wordDocTable.TableID, true);   
            string cellData = table.Range.Text;

            if (wordDocTable.ColumnID > 0)   
            {    
                if (wordDocTable.RowID == 0)    
                {     
                    NodeCollection LobjCells = 
                    table.GetChildNodes(NodeType.Cell, true);     
                    cellData = LobjCells[wordDocTable.ColumnID].ToTxt();
                }    
                else    
                {     
                    NodeCollection LobjRows = 
                    table.GetChildNodes(NodeType.Row, true);     
                    cellData = ((Row)(LobjRows[wordDocTable.RowID])).
                    Cells[wordDocTable.ColumnID].ToTxt();    
                }   
            }

            Console.WriteLine(String.Format("Data in Table {0}, 
                    Row {1}, Column {2} : {3}",           
                                    wordDocTable.TableID,          
                                    wordDocTable.RowID,          
                                    wordDocTable.ColumnID,          
                                    cellData);              
        } 
    }
}