Agent: " Create Grafana dashboard for:
- Service overview with SLIs
- Detailed performance metrics
- Infrastructure monitoring
"title" : " Service Overview " ,
"uid" : " service-overview " ,
"tags" : [ " production " , " sli " ],
"datasource" : " $datasource " ,
"query" : " label_values(kube_namespace_created, namespace) " ,
"datasource" : " $datasource " ,
"query" : " label_values(up{namespace=~ \" $namespace \" }, job) " ,
"title" : " Service Level Indicators " ,
"gridPos" : { "h" : 8 , "w" : 24 , "x" : 0 , "y" : 0 },
"gridPos" : { "h" : 8 , "w" : 6 , "x" : 0 , "y" : 1 },
"expr" : " sum(rate(http_requests_total{status!~ \" 5.. \" ,namespace=~ \" $namespace \" ,job=~ \" $service \" }[5m])) / sum(rate(http_requests_total{namespace=~ \" $namespace \" ,job=~ \" $service \" }[5m])) " ,
"legendFormat" : " Availability "
{ "color" : " red " , "value" : null },
{ "color" : " yellow " , "value" : 0.99 },
{ "color" : " green " , "value" : 0.995 }
"gridPos" : { "h" : 8 , "w" : 6 , "x" : 6 , "y" : 1 },
"expr" : " sum(rate(http_requests_total{namespace=~ \" $namespace \" ,job=~ \" $service \" }[5m])) by (status) " ,
"legendFormat" : " {{status}} "
{ "format" : " reqps " , "label" : " Requests/sec " }
"gridPos" : { "h" : 8 , "w" : 6 , "x" : 12 , "y" : 1 },
"expr" : " histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{namespace=~ \" $namespace \" ,job=~ \" $service \" }[5m])) by (le)) " ,
{ "format" : " s " , "label" : " Latency " }
"params" : [ " A " , " 5m " , " now " ]
"name" : " High P95 Latency " ,
"noDataState" : " no_data " ,
"gridPos" : { "h" : 8 , "w" : 6 , "x" : 18 , "y" : 1 },
"expr" : " sum(rate(http_requests_total{status=~ \" 5.. \" ,namespace=~ \" $namespace \" ,job=~ \" $service \" }[5m])) / sum(rate(http_requests_total{namespace=~ \" $namespace \" ,job=~ \" $service \" }[5m])) " ,
"legendFormat" : " Error Rate "
{ "color" : " green " , "value" : null },
{ "color" : " yellow " , "value" : 0.01 },
{ "color" : " red " , "value" : 0.05 }
"title" : " Infrastructure Metrics " ,
"gridPos" : { "h" : 8 , "w" : 24 , "x" : 0 , "y" : 9 },
"gridPos" : { "h" : 8 , "w" : 8 , "x" : 0 , "y" : 10 },
"expr" : " sum(rate(container_cpu_usage_seconds_total{namespace=~ \" $namespace \" ,pod=~ \" .*$service.* \" }[5m])) by (pod) " ,
"legendFormat" : " {{pod}} "
"gridPos" : { "h" : 8 , "w" : 8 , "x" : 8 , "y" : 10 },
"expr" : " sum(container_memory_working_set_bytes{namespace=~ \" $namespace \" ,pod=~ \" .*$service.* \" }) by (pod) " ,
"legendFormat" : " {{pod}} "
"gridPos" : { "h" : 8 , "w" : 8 , "x" : 16 , "y" : 10 },
"expr" : " sum(rate(container_network_receive_bytes_total{namespace=~ \" $namespace \" ,pod=~ \" .*$service.* \" }[5m])) by (pod) " ,
"legendFormat" : " RX {{pod}} "
"expr" : " -sum(rate(container_network_transmit_bytes_total{namespace=~ \" $namespace \" ,pod=~ \" .*$service.* \" }[5m])) by (pod) " ,
"legendFormat" : " TX {{pod}} "