首页 > 其他 > 详细

CsvReader

时间:2020-10-05 12:11:54      阅读:30      评论:0      收藏:0      [点我收藏+]
using System;
using System.Collections.Generic;
using System.Data;
using System.IO;
using System.Text;

namespace BrowsePearls.CSV
{
    /// <summary>
    /// 从各种类型的数据源读取CSV格式的内容
    /// </summary>
    public sealed class CsvReader : IDisposable
    {
        #region Members
        private FileStream _fileStream;
        private Stream _stream;
        private StreamReader _streamReader;
        private StreamWriter _streamWriter;
        private Stream _memoryStream;
        private Encoding _encoding;
        private readonly StringBuilder _columnBuilder = new StringBuilder(100);
        private readonly Type _type = Type.File;

        private bool _dispose = false;
        #endregion Members

        #region Properties

        /// <summary>
        /// 设置成true,会自动去除读到的字段的值两端的空格。 如 "hello world ",实际读到的是[hello world],而不是[hello world ].
        /// </summary>
        public bool TrimColumns { get; set; } = true;

        /// <summary>
        /// 指示CSV有无标题行,CSVReader的方法不使用此属性,该属性在CsvFile加载一个CSV文件时使用,用于辨别是否填充CSVFile的Headers属性
        /// </summary>
        public bool HasHeaderRow { get; set; }

        /// <summary>
        /// 返回当前正在被读取的行的所有字段的值的集合。初始值为null
        /// </summary>
        public List<string> Fields { get; private set; }

        /// <summary>
        /// 返回当前正在被读取的行的字段的数量。未读取时,为null
        /// </summary>
        public int? FieldCount
        {
            get
            {
                return (Fields != null ? Fields.Count : (int?)null);
            }
        }

        #endregion Properties

        #region Enums

        /// <summary>
        /// 数据源类型
        /// </summary>
        private enum Type
        {
            File,
            Stream,
            String
        }

        #endregion Enums

        #region Constructors

        /// <summary>
        /// 文件
        /// </summary>
        /// <param name="filePath">File path</param>
        public CsvReader(string filePath)
        {
            _type = Type.File;
            Initialise(filePath, Encoding.Default);
        }

        /// <summary>
        /// 文件
        /// </summary>
        /// <param name="filePath">File path</param>
        /// <param name="encoding">Encoding</param>
        public CsvReader(string filePath, Encoding encoding)
        {
            _type = Type.File;
            Initialise(filePath, encoding);
        }

        /// <summary>
        /// 流
        /// </summary>
        /// <param name="stream">Stream</param>
        public CsvReader(Stream stream)
        {
            _type = Type.Stream;
            Initialise(stream, Encoding.Default);
        }

        /// <summary>
        /// 流
        /// </summary>
        /// <param name="stream">Stream</param>
        /// <param name="encoding">Encoding</param>
        public CsvReader(Stream stream, Encoding encoding)
        {
            _type = Type.Stream;
            Initialise(stream, encoding);
        }

        /// <summary>
        /// csv字符串
        /// </summary>
        /// <param name="encoding"></param>
        /// <param name="csvContent"></param>
        public CsvReader(Encoding encoding, string csvContent)
        {
            _type = Type.String;
            Initialise(encoding, csvContent);  
        }

        #endregion Constructors

        #region Methods

        /// <summary>
        /// 文件
        /// </summary>
        /// <param name="filePath"></param>
        /// <param name="encoding"></param>
        private void Initialise(string filePath, Encoding encoding)
        {
            if (!File.Exists(filePath))
                throw new FileNotFoundException(string.Format("The file ‘{0}‘ does not exist.", filePath));

            _fileStream = File.OpenRead(filePath);
            Initialise(_fileStream, encoding);
        }

        /// <summary>
        /// 流
        /// </summary>
        /// <param name="stream"></param>
        /// <param name="encoding"></param>
        private void Initialise(Stream stream, Encoding encoding)
        {
            if (stream == null)
                throw new ArgumentNullException("The supplied stream is null.");

            _stream = stream;
            _stream.Position = 0;
            _encoding = (encoding ?? Encoding.Default);
            _streamReader = new StreamReader(_stream, _encoding);
        }

        /// <summary>
        /// csv字符串
        /// </summary>
        /// <param name="encoding"></param>
        /// <param name="csvContent"></param>
        private void Initialise(Encoding encoding, string csvContent)
        {
            if (csvContent == null)
                throw new ArgumentNullException("The supplied csvContent is null.");

            _encoding = (encoding ?? Encoding.Default);

            _memoryStream = new MemoryStream(csvContent.Length);
            _streamWriter = new StreamWriter(_memoryStream);
            _streamWriter.Write(csvContent);
            _streamWriter.Flush();
            Initialise(_memoryStream, encoding);           
        }

        /// <summary>
        /// 读下一行记录
        /// </summary>
        /// <returns>如果成功读取一行记录,返回true,否则返回false</returns>
        public bool ReadNextRecord()
        {
            Fields = null;
            string line = _streamReader.ReadLine();
            while(line != null && line.Replace(" ","").Length == 0)
            {
                line = _streamReader.ReadLine();
            }
            if (line == null)
                return false;

            ParseLine(line);
            return true;
        }

        public DataTable ReadIntoDataTable()
        {
            return ReadIntoDataTable(new System.Type[] {});
        }

        /// <summary>
        /// 
        /// </summary>
        /// <param name="columnTypes">列类型。如果提供类型数组长度小于标题数目,后面的标题默认是string</param>
        /// <returns></returns>
        public DataTable ReadIntoDataTable(System.Type[] columnTypes)
        {
            DataTable dataTable = new DataTable();
            bool addedHeader = false;
            _stream.Position = 0;

            while (ReadNextRecord())
            {
                if (!addedHeader)
                {
                    for (int i = 0; i < Fields.Count; i++)
                        dataTable.Columns.Add(Fields[i], (columnTypes.Length > i ? columnTypes[i] : typeof(string)));

                    addedHeader = true;
                    continue;
                }

                DataRow row = dataTable.NewRow();

                for (int i = 0; i < Fields.Count; i++)
                    row[i] = Fields[i];

                dataTable.Rows.Add(row);
            }

            return dataTable;
        }

        /// <summary>
        /// 解析csv行
        /// </summary>
        /// <param name="line">Line</param>
        private void ParseLine(string line)
        {
            Fields = new List<string>();
            _columnBuilder.Remove(0, _columnBuilder.Length);

            bool inColumn = false;
            bool inQuotes = false;
            int quoteCount = 0;

            for (int i = 0; i < line.Length; ++i)
            {
                char character = line[i];

                //未进入列
                if (!inColumn)
                {
                    //去除列前的所有空格
                    if (character == ‘ ‘)
                    {
                        continue;
                    }
                    //第一次遇到的非空白字符若是逗号,则认为该列值是空字符串
                    else if (character == ‘,‘)
                    {
                        Fields.Add(string.Empty);
                        inColumn = false;
                        inQuotes = false;
                        continue;
                    }
                    //第一次遇到的非空白字符若是双引号,则认为进入了列,且该列是必须用双引号括起来的特殊列
                    else if (character == ‘"‘)
                    {
                        inColumn = true;
                        inQuotes = true;
                        quoteCount++;
                        continue;
                    }
                    //其他字符则标记进入了列,但是是不需要用双引号括起来的普通列
                    else
                    {
                        inColumn = true;
                        inQuotes = false;
                    }
                }
                else // 进入列
                {
                    //如果是被双引号包含的特殊列
                    if (inQuotes)
                    {
                        // 双引号计数器必定为奇数
                        if (quoteCount % 2 == 0)
                        {
                            throw new FormatException("Invalid double quotes");
                        }
                        //如果是一个列的最后一个双引号
                        if (character == ‘"‘ && (i + 1) < line.Length &&  line[i + 1] != ‘"‘)
                        {
                            quoteCount++;
                            //去除结束双引号与逗号之间的无效空白字符
                            for (int j = 1; ; ++j)
                            {
                                if ((i + j) < line.Length && line[i + j] == ‘,‘)
                                {
                                    i = i + j;
                                    inColumn = false;
                                    break;
                                }
                                else if ((i + j) < line.Length && line[i + j] == ‘ ‘)
                                {
                                    ;//空语句
                                }
                                else if(i + j == line.Length)
                                {
                                    i = i + j;
                                    break;
                                }
                            }
                        }
                        //处理双引号。 列中的双引号必须成对且无间隔的出现
                        else if (character == ‘"‘ && (i + 1) < line.Length && line[i + 1] == ‘"‘)
                        {
                                ++i;
                                quoteCount = quoteCount + 2;
                        }
                        else if(character == ‘"‘ && i + 1 == line.Length)
                        {
                            quoteCount++;
                            inColumn = false;
                        }
                    }

                    else //处理普通列
                    {
                        if (character == ‘,‘)
                        {
                            quoteCount = 0;
                            inColumn = false;
                        }
                        else if(character == ‘"‘)
                        {
                            throw new FormatException("This field can‘t contatin double quotes because there aren‘t double quotes at both ends of field");
                        }
                    }
                }

                if (!inColumn)
                {
                    string field = inQuotes ? _columnBuilder.ToString() : _columnBuilder.ToString().Trim();
                    Fields.Add(TrimColumns ? field.Trim() : field);
                    _columnBuilder.Remove(0, _columnBuilder.Length);
                    inQuotes = false;
                    quoteCount = 0;
                }
                else
                    _columnBuilder.Append(character);
            }

            //处理最后一列
            if (inColumn)
            {
                if (inQuotes)
                {
                    if (quoteCount % 2 != 0)
                    {
                        throw new FormatException("Invalid double quotes");
                    }
                }
                else
                {
                    if(quoteCount != 0)
                    {
                        throw new FormatException("Invalid double quotes");
                    }
                }
                Fields.Add(TrimColumns ? _columnBuilder.ToString().Trim() : _columnBuilder.ToString());
                inColumn = false;
                quoteCount = 0;
            }

            if(!inColumn && line.Trim().EndsWith(","))
            {
                Fields.Add(string.Empty);
            }
        }


        public void Dispose()
        {
            if (!_dispose)
            {
                _dispose = true;
                CleanUp();
                GC.SuppressFinalize(this);
            }
        }

        ~CsvReader()
        {
            CleanUp();
        }


        /// <summary>
        /// 释放非托管资源
        /// </summary>
        private void CleanUp()
        {
            if (_streamReader != null)
            {
                _streamReader.Close();
                _streamReader.Dispose();
            }

            if (_streamWriter != null)
            {
                _streamWriter.Close();
                _streamWriter.Dispose();
            }

            if (_memoryStream != null)
            {
                _memoryStream.Close();
                _memoryStream.Dispose();
            }

            if (_fileStream != null)
            {
                _fileStream.Close();
                _fileStream.Dispose();
            }

            if ((_type == Type.String || _type == Type.File) && _stream != null)
            {
                _stream.Close();
                _stream.Dispose();
            }
        }
        #endregion Methods
    }
}

CsvReader

原文:https://www.cnblogs.com/browsepearls/p/13769540.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!