首页 > 数据库技术 > 详细

MongoDB杂谈

时间:2019-07-29 13:21:58      阅读:65      评论:0      收藏:0      [点我收藏+]

1. mongodb索引

  • 索引通常能够极大的提高查询的效率, 如果没有索引, MongoDB在读取数据时必须扫描集合中的每个文件并选取那些符合查询条件的记录.
  • 这种扫描全集合的查询效率是非常低的, 特别在处理大量的数据时, 查询可以要花费几十秒甚至几分钟, 这对网站的性能是非常致命的.
  • 索引是特殊的数据结构, 索引存储在一个易于遍历读取的数据集合中, 索引是对数据库表中一列或多列的值进行排序的一种结构.

2. 索引的增删查改

2.1 增加索引

# 1是正排, -1 是倒排
db.$collection.createIndex({$field:1});

# 建立复合索引
db.$collection.createIndex({$field1:1, $field2:1});

2.2 查看索引

db.$collection.getIndexes();

2.3 删除索引

db.$collecion.dropIndex($index);

# 删除全部索引
db.$collection.dropIndexes();

3. 创建索引前后的对比

4. explain的参数详解以及重点查看参数

{
    "queryPlanner" : {   // 查询计划
        "plannerVersion" : 1,   
        "namespace" : "test.testindex",  // 查询的表名
        "indexFilterSet" : false,  //针对该query是否有indexfilter ???
        "parsedQuery" : {  // 查询条件
            "id" : {
                "$lt" : 97
            }
        },
        "winningPlan" : {// 查询优化器针对该query所返回的最优执行计划的详细内容。
            "stage" : "FETCH",// FETC: 可以理解为通过返回的index位置去检索具体的文档???
            "inputStage" : {
                "stage" : "IXSCAN",  // 表示进行的是index scanning
                "keyPattern" : { // 索引内容
                    "id" : 1,
                    "age" : 1
                },
                "indexName" : "id_1_age_1", // 使用的索引
                "isMultiKey" : false, // 非复合索引 如果索引建立在array上,此处将是true
                "multiKeyPaths" : {
                    "id" : [ ],
                    "age" : [ ]
                },
                "isUnique" : false,
                "isSparse" : false,
                "isPartial" : false,
                "indexVersion" : 2,
                "direction" : "forward", //此query的查询顺序,此处是forward,如果用了.sort({$filed:-1})将显示backward
                "indexBounds" : {
                    "id" : [
                        "[-inf.0, 97.0)" // winningplan所扫描的索引范围 $lt:97
                    ],
                    "age" : [
                        "[MinKey, MaxKey]"
                    ]
                }
            }
        },
        "rejectedPlans" : [ ]
    },
    "executionStats" : {
        "executionSuccess" : true,  // 是否执行成功
        "nReturned" : 97,    // 结果返回条数
        "executionTimeMillis" : 0, // 整体执行时间 单位毫秒
        "totalKeysExamined" : 97, // 索引扫描个数
        "totalDocsExamined" : 97, // 文档扫描个数
        "executionStages" : {
            "stage" : "FETCH",
            "nReturned" : 97, 
            "executionTimeMillisEstimate" : 0, //该查询根据index去检索document获取97条具体数据的时间
            "works" : 98,
            "advanced" : 97,
            "needTime" : 0,
            "needYield" : 0,
            "saveState" : 0,
            "restoreState" : 0,
            "isEOF" : 1,
            "invalidates" : 0,
            "docsExamined" : 97,
            "alreadyHasObj" : 0,
            "inputStage" : {
                "stage" : "IXSCAN",
                "nReturned" : 97, //最理想状态 nReturned=totalKeysExamined & totalDocsExamined=0 / nReturned=totalKeysExamined=totalDocsExamined
                "executionTimeMillisEstimate" : 0,//该查询扫描97行index所用时间
                "works" : 98,
                "advanced" : 97,
                "needTime" : 0,
                "needYield" : 0,
                "saveState" : 0,
                "restoreState" : 0,
                "isEOF" : 1,
                "invalidates" : 0,
                "keyPattern" : {
                    "id" : 1,
                    "age" : 1
                },
                "indexName" : "id_1_age_1",
                "isMultiKey" : false,
                "multiKeyPaths" : {
                    "id" : [ ],
                    "age" : [ ]
                },
                "isUnique" : false,
                "isSparse" : false,
                "isPartial" : false,
                "indexVersion" : 2,
                "direction" : "forward",
                "indexBounds" : {
                    "id" : [
                        "[-inf.0, 97.0)"
                    ],
                    "age" : [
                        "[MinKey, MaxKey]"
                    ]
                },
                "keysExamined" : 97,
                "seeks" : 1,
                "dupsTested" : 0,
                "dupsDropped" : 0,
                "seenInvalidated" : 0
            }
        }
    },
    "serverInfo" : {
        "host" : "centos01",
        "port" : 27017,
        "version" : "4.0.9",
        "gitVersion" : "fc525e2d9b0e4bceff5c2201457e564362909765"
    },
    "ok" : 1
}
  • stage的含义

    • COLLSCAN
      全表扫描
    • XSCAN
      索引扫描
    • ETCH
      根据索引去检索指定document
    • SHARD_MERGE
      将各个分片返回数据进行merge, 但是根据源码中的信息,个人还总结了文档中没有的如下几类(常用如下,由于是通过源码查找,可能有所遗漏)
    • SORT
      表明在内存中进行了排序(与老版本的scanAndOrder:true一致)
    • LIMIT
      使用limit限制返回数
    • SKIP
      使用skip进行跳过
    • IDHACK
      针对_id进行查询
    • SHARDING_FILTER
      通过mongos对分片数据进行查询
    • COUNT
      利用db.coll.explain().count()之类进行count运算
    • COUNTSCAN
      count不使用用Index进行count时的stage返回
    • COUNT_SCAN
      count使用了Index进行count时的stage返回
    • SUBPLA
      未使用到索引的$or查询的stage返回
    • TEXT
      使用全文索引进行查询时候的stage返回
    • PROJECTION
      限定返回字段时候stage的返回
  • 我们希望看到的stage的组合

    Fetch+IDHACK
    Fetch+ixscan
    Limit+(Fetch+ixscan)
    PROJECTION+ixscan
    SHARDING_FILTER+ixscan

  • 不希望看到包含如下的stage:

    COLLSCAN(全表扫),SORT(使用sort但是无index),不合理的SKIP,SUBPLA(未用到index的$or)
    对于count查询,希望看到的有:
    COUNT_SCAN

    不希望看到的有:
    COUNTSCAN

5. Explain 分析实例

5.1 准备数据

{ "_id" : ObjectId("5ce371f2d9a9a24e713bedd3"), "a" : 0, "b" : 38, "c" : 10 }
{ "_id" : ObjectId("5ce371f2d9a9a24e713bedd4"), "a" : 1, "b" : 64, "c" : 9 }
{ "_id" : ObjectId("5ce371f2d9a9a24e713bedd5"), "a" : 2, "b" : 28, "c" : 8 }
{ "_id" : ObjectId("5ce371f2d9a9a24e713bedd6"), "a" : 0, "b" : 73, "c" : 7 }
{ "_id" : ObjectId("5ce371f2d9a9a24e713bedd7"), "a" : 1, "b" : 95, "c" : 6 }
{ "_id" : ObjectId("5ce371f2d9a9a24e713bedd8"), "a" : 2, "b" : 11, "c" : 5 }
{ "_id" : ObjectId("5ce371f2d9a9a24e713bedd9"), "a" : 0, "b" : 73, "c" : 4 }
{ "_id" : ObjectId("5ce371f2d9a9a24e713bedda"), "a" : 1, "b" : 14, "c" : 3 }
{ "_id" : ObjectId("5ce371f2d9a9a24e713beddb"), "a" : 2, "b" : 73, "c" : 2 }
{ "_id" : ObjectId("5ce371f2d9a9a24e713beddc"), "a" : 0, "b" : 0, "c" : 1 }
  • 测试的查询语句

    db.testindex.find({a:2,b:{$lt:90}}).sort({c:-1});

    5.2 没有index的查询计划

db.testindex.find({a:2,b:{$lt:90}}).sort({c:-1}).explain("executionStats");
{
    "executionStats" : {
        "executionSuccess" : true,
        "nReturned" : 3,
        "executionTimeMillis" : 0,
        "totalKeysExamined" : 0,
        "totalDocsExamined" : 10,
        "executionStages" : {
            "stage" : "SORT",
            "nReturned" : 3,
            "executionTimeMillisEstimate" : 0,
            "works" : 17,
            "advanced" : 3,
            "needTime" : 13,
            "needYield" : 0,
            "saveState" : 0,
            "restoreState" : 0,
            "isEOF" : 1,
            "invalidates" : 0,
            "sortPattern" : {
                "c" : -1
            },
            "memUsage" : 153,
            "memLimit" : 33554432,
            "inputStage" : {
                "stage" : "SORT_KEY_GENERATOR",
                "nReturned" : 3,
                "executionTimeMillisEstimate" : 0,
                "works" : 13,
                "advanced" : 3,
                "needTime" : 9,
                "needYield" : 0,
                "saveState" : 0,
                "restoreState" : 0,
                "isEOF" : 1,
                "invalidates" : 0,
                "inputStage" : {
                    "stage" : "COLLSCAN",
                    "filter" : {
                        "$and" : [
                            {
                                "a" : {
                                    "$eq" : 2
                                }
                            },
                            {
                                "b" : {
                                    "$lt" : 90
                                }
                            }
                        ]
                    },
                    "nReturned" : 3,
                    "executionTimeMillisEstimate" : 0,
                    "works" : 12,
                    "advanced" : 3,
                    "needTime" : 8,
                    "needYield" : 0,
                    "saveState" : 0,
                    "restoreState" : 0,
                    "isEOF" : 1,
                    "invalidates" : 0,
                    "direction" : "forward",
                    "docsExamined" : 10
                }
            }
        }
    },
    "serverInfo" : {
        "host" : "centos01",
        "port" : 27017,
        "version" : "4.0.9",
        "gitVersion" : "fc525e2d9b0e4bceff5c2201457e564362909765"
    },
    "ok" : 1
}
  • nReturned为3,符合的条件的返回为3条.
  • totalKeysExamined为0,没有使用index.
  • totalDocsExamined为10,扫描了所有记录.
  • executionStages.stage为SORT,未使用index的sort,占用的内存与内存限制为”memUsage” : 153, “memLimit” : 33554432.

  • executionStages.inputStage.stage为COLLSCAN,全表扫描,扫描条件为
{
    "filter":{
        "$and":[
            {
                "a":{
                    "$eq":2
                }
            },
            {
                "b":{
                    "$lt":90
                }
            }
        ]
    }
}

5.3 增加字段c的索引

db.testindex.createIndex({c:1});
{
    "executionStats" : {
        "executionSuccess" : true,
        "nReturned" : 3,
        "executionTimeMillis" : 0,
        "totalKeysExamined" : 10,
        "totalDocsExamined" : 10,
        "executionStages" : {
            "stage" : "FETCH",
            "filter" : {
                "$and" : [
                    {
                        "a" : {
                            "$eq" : 2
                        }
                    },
                    {
                        "b" : {
                            "$lt" : 90
                        }
                    }
                ]
            },
            "nReturned" : 3,
            "executionTimeMillisEstimate" : 0,
            "works" : 11,
            "advanced" : 3,
            "needTime" : 7,
            "needYield" : 0,
            "saveState" : 0,
            "restoreState" : 0,
            "isEOF" : 1,
            "invalidates" : 0,
            "docsExamined" : 10,
            "alreadyHasObj" : 0,
            "inputStage" : {
                "stage" : "IXSCAN",
                "nReturned" : 10,
                "executionTimeMillisEstimate" : 0,
                "works" : 11,
                "advanced" : 10,
                "needTime" : 0,
                "needYield" : 0,
                "saveState" : 0,
                "restoreState" : 0,
                "isEOF" : 1,
                "invalidates" : 0,
                "keyPattern" : {
                    "c" : 1
                },
                "indexName" : "c_1",
                "isMultiKey" : false,
                "multiKeyPaths" : {
                    "c" : [ ]
                },
                "isUnique" : false,
                "isSparse" : false,
                "isPartial" : false,
                "indexVersion" : 2,
                "direction" : "backward",
                "indexBounds" : {
                    "c" : [
                        "[MaxKey, MinKey]"
                    ]
                },
                "keysExamined" : 10,
                "seeks" : 1,
                "dupsTested" : 0,
                "dupsDropped" : 0,
                "seenInvalidated" : 0
            }
        }
    },
    "serverInfo" : {
        "host" : "centos01",
        "port" : 27017,
        "version" : "4.0.9",
        "gitVersion" : "fc525e2d9b0e4bceff5c2201457e564362909765"
    },
    "ok" : 1
}

我们发现,Stage没有了SORT,因为我们sort字段有了index,但是由于查询还是没有index,故totalDocsExamined还是10,但是由于sort用了index,totalKeysExamined也是10,但是仅对sort排序做了优化,查询性能还是一样的低效。

5.4 使用db.testindex.ensureIndex({b:1,a:1,c:1})索引的执行计划

{
    "executionStats" : {
        "executionSuccess" : true,
        "nReturned" : 3,
        "executionTimeMillis" : 0,
        "totalKeysExamined" : 10,
        "totalDocsExamined" : 10,
        "executionStages" : {
            "stage" : "FETCH",
            "filter" : {
                "$and" : [
                    {
                        "a" : {
                            "$eq" : 2
                        }
                    },
                    {
                        "b" : {
                            "$lt" : 90
                        }
                    }
                ]
            },
            "nReturned" : 3,
            "executionTimeMillisEstimate" : 0,
            "works" : 12,
            "advanced" : 3,
            "needTime" : 7,
            "needYield" : 0,
            "saveState" : 0,
            "restoreState" : 0,
            "isEOF" : 1,
            "invalidates" : 0,
            "docsExamined" : 10,
            "alreadyHasObj" : 0,
            "inputStage" : {
                "stage" : "IXSCAN",
                "nReturned" : 10,
                "executionTimeMillisEstimate" : 0,
                "works" : 11,
                "advanced" : 10,
                "needTime" : 0,
                "needYield" : 0,
                "saveState" : 0,
                "restoreState" : 0,
                "isEOF" : 1,
                "invalidates" : 0,
                "keyPattern" : {
                    "c" : 1
                },
                "indexName" : "c_1",
                "isMultiKey" : false,
                "multiKeyPaths" : {
                    "c" : [ ]
                },
                "isUnique" : false,
                "isSparse" : false,
                "isPartial" : false,
                "indexVersion" : 2,
                "direction" : "backward",
                "indexBounds" : {
                    "c" : [
                        "[MaxKey, MinKey]"
                    ]
                },
                "keysExamined" : 10,
                "seeks" : 1,
                "dupsTested" : 0,
                "dupsDropped" : 0,
                "seenInvalidated" : 0
            }
        }
    }

这个索引被拒绝了, 用的还是c_1的索引.

5.5 使用db.testindex.ensureIndex({a:1,b:1,c:1})索引的执行计划

{
    "executionStats" : {
        "executionSuccess" : true,
        "nReturned" : 3,
        "executionTimeMillis" : 0,
        "totalKeysExamined" : 3,
        "totalDocsExamined" : 3,
        "executionStages" : {
            "stage" : "SORT",
            "nReturned" : 3,
            "executionTimeMillisEstimate" : 0,
            "works" : 10,
            "advanced" : 3,
            "needTime" : 5,
            "needYield" : 0,
            "saveState" : 0,
            "restoreState" : 0,
            "isEOF" : 1,
            "invalidates" : 0,
            "sortPattern" : {
                "c" : 1
            },
            "memUsage" : 153,
            "memLimit" : 33554432,
            "inputStage" : {
                "stage" : "SORT_KEY_GENERATOR",
                "nReturned" : 3,
                "executionTimeMillisEstimate" : 0,
                "works" : 5,
                "advanced" : 3,
                "needTime" : 1,
                "needYield" : 0,
                "saveState" : 0,
                "restoreState" : 0,
                "isEOF" : 1,
                "invalidates" : 0,
                "inputStage" : {
                    "stage" : "FETCH",
                    "nReturned" : 3,
                    "executionTimeMillisEstimate" : 0,
                    "works" : 4,
                    "advanced" : 3,
                    "needTime" : 0,
                    "needYield" : 0,
                    "saveState" : 0,
                    "restoreState" : 0,
                    "isEOF" : 1,
                    "invalidates" : 0,
                    "docsExamined" : 3,
                    "alreadyHasObj" : 0,
                    "inputStage" : {
                        "stage" : "IXSCAN",
                        "nReturned" : 3,
                        "executionTimeMillisEstimate" : 0,
                        "works" : 4,
                        "advanced" : 3,
                        "needTime" : 0,
                        "needYield" : 0,
                        "saveState" : 0,
                        "restoreState" : 0,
                        "isEOF" : 1,
                        "invalidates" : 0,
                        "keyPattern" : {
                            "a" : 1,
                            "b" : 1,
                            "c" : 1
                        },
                        "indexName" : "a_1_b_1_c_1",
                        "isMultiKey" : false,
                        "multiKeyPaths" : {
                            "a" : [ ],
                            "b" : [ ],
                            "c" : [ ]
                        },
                        "isUnique" : false,
                        "isSparse" : false,
                        "isPartial" : false,
                        "indexVersion" : 2,
                        "direction" : "forward",
                        "indexBounds" : {
                            "a" : [
                                "[2.0, 2.0]"
                            ],
                            "b" : [
                                "[-inf.0, 90.0)"
                            ],
                            "c" : [
                                "[MinKey, MaxKey]"
                            ]
                        },
                        "keysExamined" : 3,
                        "seeks" : 1,
                        "dupsTested" : 0,
                        "dupsDropped" : 0,
                        "seenInvalidated" : 0
                    }
                }
            }
        }
    },
    "serverInfo" : {
        "host" : "centos01",
        "port" : 27017,
        "version" : "4.0.9",
        "gitVersion" : "fc525e2d9b0e4bceff5c2201457e564362909765"
    },
    "ok" : 1
}

我们可以看到

  • nReturned为3,返回3条记录
  • totalKeysExamined为3,扫描了3个index
  • totalDocsExamined为3,扫描了3个docs
  • 此时nReturned=totalDocsExamined=totalKeysExamined,符合我们的期望。看起来很美吧?
  • 但是,但是,但是!重要的事情说三遍!executionStages.Stage为Sort,在内存中进行排序了,这个在生产环境中尤其是在数据量较大的时候,是非常消耗性能的,这个千万不能忽视了,我们需要改进这个点。

5.6 使用db.testindex.ensureIndex({a:1, c:1, b:1)

{ 
    "executionStats" : {
        "executionSuccess" : true,
        "nReturned" : 3,
        "executionTimeMillis" : 0,
        "totalKeysExamined" : 3,
        "totalDocsExamined" : 3,
        "executionStages" : {
            "stage" : "FETCH",
            "nReturned" : 3,
            "executionTimeMillisEstimate" : 0,
            "works" : 5,
            "advanced" : 3,
            "needTime" : 0,
            "needYield" : 0,
            "saveState" : 0,
            "restoreState" : 0,
            "isEOF" : 1,
            "invalidates" : 0,
            "docsExamined" : 3,
            "alreadyHasObj" : 0,
            "inputStage" : {
                "stage" : "IXSCAN",
                "nReturned" : 3,
                "executionTimeMillisEstimate" : 0,
                "works" : 4,
                "advanced" : 3,
                "needTime" : 0,
                "needYield" : 0,
                "saveState" : 0,
                "restoreState" : 0,
                "isEOF" : 1,
                "invalidates" : 0,
                "keyPattern" : {
                    "a" : 1,
                    "c" : 1,
                    "b" : 1
                },
                "indexName" : "a_1_c_1_b_1",
                "isMultiKey" : false,
                "multiKeyPaths" : {
                    "a" : [ ],
                    "c" : [ ],
                    "b" : [ ]
                },
                "isUnique" : false,
                "isSparse" : false,
                "isPartial" : false,
                "indexVersion" : 2,
                "direction" : "forward",
                "indexBounds" : {
                    "a" : [
                        "[2.0, 2.0]"
                    ],
                    "c" : [
                        "[MinKey, MaxKey]"
                    ],
                    "b" : [
                        "[-inf.0, 90.0)"
                    ]
                },
                "keysExamined" : 3,
                "seeks" : 1,
                "dupsTested" : 0,
                "dupsDropped" : 0,
                "seenInvalidated" : 0
            }
        }
    },
    "serverInfo" : {
        "host" : "centos01",
        "port" : 27017,
        "version" : "4.0.9",
        "gitVersion" : "fc525e2d9b0e4bceff5c2201457e564362909765"
    },
    "ok" : 1
}

我们可以看到

  • nReturned为3,返回3条记录
  • totalKeysExamined为3,扫描了3个index
  • totalDocsExamined为3,扫描了3个docs
  • nReturned=totalKeysExamined=totalDocsExamined,Stage无Sort,即利用了index进行排序,而非内存,这个性能的提升高于多扫几个index的代价。
  • 综上可以有一个小结论,当查询覆盖精确匹配,范围查询与排序的时候,
    精确匹配字段,排序字段,范围查询字段这样的索引排序会更为高效。

MongoDB杂谈

原文:https://www.cnblogs.com/unclecc/p/11262775.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!