大纲

名词解释
logstash

  • Logstash is a dynamic data collection pipeline with an extensible plugin ecosystem and strong Elasticsearch synergy.
  • logstash提供了实时的,分布式数据收集能力
  • ElasticSearch

  • ElasticSearch是一个基于Lucene的搜索服务器。它提供了一个分布式多用户能力的全文搜索引擎,基于RESTful web接口。Elasticsearch是用Java开发的,并作为Apache许可条款下的开放源码发布,是当前流行的企业级搜索引擎。设计用于云计算中,能够达到实时搜索,稳定,可靠,快速,安装使用方便。
  • kibana

  • Kibana gives shape to your data and is the extensible user interface for configuring and managing all aspects of the Elastic Stack.
  • Kibana提供了方便易用的用户查询,以及数据展示界面

  • 结构图
    图:ELK结构1
    图:ELK结构2

    详解
    一、Logstash
    logstash配置
    input{
        file{
            path => "/usr/logs/access.log"
            start_position => "beginning"
            codec => json
        }
    }
    filter {
        ruby {
            init => "@kname = ['remote_addr','remote_user','time_local','request','status','body_bytes_sent','http_referer','http_user_agent','http_x_forwarded_for']"
            code =>"event.append(Hash[@kname.zip(event['message'].split(' | '))])"
        }
    
        if [request] {
            ruby {
                init => "@kname = ['method','uri','verb']"
                code => "event.append(Hash[@kname.zip(event['request'].split(' '))])"
            }
    
            if [uri] {
                ruby {
                    init => "@kname = ['url_path','url_args']"
                    code => "event.append(Hash[@kname.zip(event['request'].split('?'))])"
                }
    
                kv {
                    prefix => "url_"
                    source => "url_args"
                    field_split => "& "
                    remove_field => [ "url_args","uri","request" ]
                }
            }
        }
    
        mutate {
            convert => [
                "body_bytes_sent" , "integer"
            ]
        }
    
        date {
            match => [ "time_local", "dd/MMM/yyyy:hh:mm:ss Z" ]
            locale => "en"
        }
    }
    
    output{
            //stdout用于测试环境输出
    
            stdout{
                codec => rubydebug
            }
    
            //假设elasticsearch在本地开启,若输出到远程服务器,添加配置
            elasticsearch{
                // host => 127.0.0.2
                // user => ***
                // password => ***
                codec => json
            }
    }
    
    执行logstash程序收集日志
    //检测conf是否通过
    bin/logstash -f test.conf --configtest
    configation OK
    //启动
    bin/logstash -f test.conf
    //stdout记录输出,并输出到elasticsearch
    {
                   "message" => "127.0.0.1 | - | 11/Nov/2015:13:14:53 +0800 | GET
    /file/test.img?width=800&height=600 HTTP/1.1 | 404 | 570 | - | Mozilla/5.0 (Wind
    ows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Sa
    fari/537.36 | - \r",
                  "@version" => "1",
                "@timestamp" => "2015-11-11T05:14:53.000Z",
                      "host" => "Jevirs-PC",
                      "path" => "D:\\nginx\\nginx\\logs\\access_test.log",
               "remote_addr" => "127.0.0.1",
               "remote_user" => "-",
                "time_local" => "11/Nov/2015:13:14:53 +0800",
                    "status" => "404",
           "body_bytes_sent" => 570,
              "http_referer" => "-",
           "http_user_agent" => "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/5
    37.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36",
      "http_x_forwarded_for" => "- \r",
                    "method" => "GET",
                      "verb" => "HTTP/1.1",
                  "url_path" => "GET /file/test.img",
                 "url_width" => "800",
                "url_height" => "600"
    }
    
    二、ElasticSearch
    • Document元数据:_index,_type,_id,这几个来唯一确定文档。
    • 操作文档:PUT,GET,HEAD,PUT,DELETE
    • PUT	/{index}/{type}/{id}
      {
      "field":	"value",
      ...
      }
      PUT	/website/blog/123
      {
      "title":	"My	first	blog	entry",
      "text":		"Just	trying	this	out...",
      "date":		"2014/01/01"
      }
      {
      "_index":	"website",
      "_type":	"blog",
      "_id":           "123",
      "_version":	1,
      "created":	true
      }
      
      curl -X DELETE  http://localhost:9200/debug-log-2017.04.13/debug_log/AVtnGe5VSS9WNB-y43iv
      {"found":true,"_index":"debug-log-2017.04.13","_type":"debug_log","_id":"AVtnGe5VSS9WNB-y43iv","_version":2,"result":"deleted","_shards":{"total":2,"successful":1,"failed":0}}
      
      curl -XPOST  'http://localhost:9200/debug-log-2017.04.17/debug_log/AVt56IqWjEftJIiCDQyC/_update' -d '{"doc":{"message":"2017-04-17 11:15:33 password:22222"}}'
      {"_index":"debug-log-2017.04.17","_type":"debug_log","_id":"AVt56IqWjEftJIiCDQyC","_version":2,"result":"updated","_shards":{"total":2,"successful":1,"failed":0}}
      
    • 查询语法
    curl -XPOST 'localhost:9200/bank/_search?pretty' -d '
    {
      "query": { "match-all": { } },     //所有文档
      "query": { "match": { "address": "mill" } },   //特定文档
      "query": { "match": { "address": "aaa bbb" } }, //含有aaa或bbb
      "query": { "match_phrase": { "address": "aaa bbb" } }, //含有"aaa bbb"
      "size":10,   //记录条数
      "from":10,   //记录跳页
      "sort": { "balance": { "order": "desc" } },//排序
    
      //与
      "query": {
        "bool": {
          "must": [
            { "match": { "address": "mill" } },
            { "match": { "address": "lane" } }
          ]
        }
      },
      //或
      "query": {
        "bool": {
          "should": [
            { "match": { "address": "mill" } },
            { "match": { "address": "lane" } }
          ]
        }
      },
      //非
      "query": {
        "bool": {
          "must_not": [
            { "match": { "address": "mill" } },
            { "match": { "address": "lane" } }
          ]
        }
      },
      //多条件
       "query": {
         "bool": {
          "must": [
            { "match": { "age": "40" } }
          ],
          "must_not": [
            { "match": { "state": "ID" } }
          ]
        }
      },
      //范围选择
       "query": {
         "bool": {
          "must": { "match_all": {} },
          "filter": {
            "range": {
              "balance": {"gte": 20000,"lte": 30000}
            }
          }
        }
      },
    
      //排序
        "aggs": {
        "group_by_state": {
          "terms": {
            "field": "state"
          }
        }
      }
    }'
    
    分词:关于ElasticSearch的分词工具lucence
    分词:关于Lucence的中文分词工具IK-Analyzer
    api:REST API
    api流程:文档操作流程
    集群管理:宕机和扩容
    官方guide
    三、Kibana
    界面查询/统计/分析工具,支持模糊查询,message查询,关键词查询等
    [message模糊查询]
    不等:!"api.xin.com"
    等于:api.xin.com
    
    [关键字段查询]
    response:200
    !response:200
    
    [区间查询]
    []:端点数值包含在范围内
    {}:端点数值不包含在范围内
    price:[50 TO 100000]
    date:{"now-6h" TO "now"}
    
    [通配符]
    ?:匹配单个字符
    *:匹配0到多个字符
    kiba*a, el*search
    
    [模糊查询]
    ~:单词后加~启用模糊搜索
    first~ 能匹配到frist
    
    [近似搜索]
    "select where"~ 表示select和where之间隔着3个单词以内
    
    指定相似度
    cromm~0.3 能匹配到from,chrome
    数值范围0.0 ~ 1.0,默认0.5,越大越接近搜索的原始值
    
    逻辑操作:AND, OR, !
    (nginx OR apache) AND api
    
    +:搜索结果必须包含此项
    -:搜索结果必须不包含此项
    +apache -nginx api:包含apache,不包含nginx,api可有可无
    
    [字段分组]
    title:(+api +sms)
    
    [特殊字符]
    + - && || ! () {} [] ^" ~ * ? : \
    特殊字符作为值搜索时需要用\转义
            

    参考文献:

    1. 网页:http://www.jianshu.com/p/5be9d0b32c58
    2. kibana查询语法:https://segmentfault.com/a/1190000002972420
    3. 书籍:《ElasticSearch权威指南》