blob: 89212d824d758ddc2a42d0cc0787b1da85e1b982 [file] [log] [blame] [edit]
#!/usr/bin/env python
#
# Copyright 2007 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Data structures to represent statistics used by analysis library.
Appstats data is loaded into data structures defined in this code.
URLStats holds information about all requests of an URL path,
URLRequestStats holds information about a specific request,
RPCStats holds data about a specific RPC category for each request.
"""
import logging
import entity
def _RPCCategory(rpcstatsproto):
"""Categorize Datastore RPCs by entity kind and other information.
The analysis tool presents a breakdown of the request latency into
different RPCs. Simply grouping RPCs with the same service and call name
together is too coarse-grained. E.g., consider a request that
involves two different types of datastore queries on different
entity kinds. More meaningful information to the developer can be
conveyed by presenting time spent in query_kind1, and query_kind2
separately. To handle this, we identify the "category" of an RPC,
and summarize results based on the service name, call name, and
category. At this point, the category field is only relevant for
datastore related RPCs, and is simply '' for all non-datastore RPCs.
For the datastore RPCs, category information usually includes the
relevant entity kind and other information, but the details are
very specific to the individual call.
Args:
rpcstatsproto: IndividualRPCStatsProto from Appstats recording which
represents statistics for a single RPC in a request.
Returns:
A string which indicates category to which the RPC belongs.
Returns '' if category information is not relevant to this RPC.
"""
category = ''
if not rpcstatsproto.has_datastore_details():
return category
servicecallname = rpcstatsproto.service_call_name()
if servicecallname == 'datastore_v3.Put':
category = entity.EntityListKind(
rpcstatsproto.datastore_details().keys_written_list())
elif servicecallname == 'datastore_v3.Get':
category = entity.EntityListKind(
rpcstatsproto.datastore_details().keys_read_list())
elif servicecallname == 'datastore_v3.Next':
category = entity.EntityListKind(
rpcstatsproto.datastore_details().keys_read_list())
elif servicecallname == 'datastore_v3.RunQuery':
if rpcstatsproto.datastore_details().has_query_kind():
kind = rpcstatsproto.datastore_details().query_kind()
else:
kind = 'NoKind'
if rpcstatsproto.datastore_details().has_query_ancestor():
ancestor = '_ANC'
else:
ancestor = ''
category = '%s%s' %(kind, ancestor)
return category
class RPCStats(object):
"""Statistics associated with each RPC call category for a request.
For each RPC call category associated with a URL request, track the number of
calls, and total time spent summed across all calls. For datastore related
RPCs, track list of entities accessed (fetched/written/failed get requests).
"""
_ABBRV = {
'datastore_v3.Put': 'ds.Put',
'datastore_v3.RunQuery': 'ds.Query',
'datastore_v3.Get': 'ds.Get',
'datastore_v3.Next': 'ds.Next',
}
def __init__(self, rpcstatsproto):
"""Initialize stats first time RPC called for that URL request.
Args:
rpcstatsproto: IndividualRPCStatsProto from Appstats recording which
represents statistics for a single RPC in a request.
"""
self.servicecallname = rpcstatsproto.service_call_name()
self.category = _RPCCategory(rpcstatsproto)
self.time = 0
self.numcalls = 0
self.keys_read = []
self.keys_written = []
self.keys_failed_get = []
self.Incr(rpcstatsproto)
def Incr(self, rpcstatsproto):
"""Update stats every time RPC called for that URL request.
Increment number of calls to RPCs in this category by 1 and increment
total time spent in this RPC category by time taken by this particular
RPC. Augment the entities read, written and missed by this RPC category
with the entities read, written and missed by the RPC.
Args:
rpcstatsproto: IndividualRPCStatsProto from Appstats recording which
represents statistics for a single RPC in a request.
"""
self.time += int(rpcstatsproto.duration_milliseconds())
self.numcalls += 1
if rpcstatsproto.has_datastore_details():
self.keys_read.extend(
rpcstatsproto.datastore_details().keys_read_list())
self.keys_written.extend(
rpcstatsproto.datastore_details().keys_written_list())
if self.servicecallname == 'datastore_v3.Get':
hits = rpcstatsproto.datastore_details().get_successful_fetch_list()
entities = rpcstatsproto.datastore_details().keys_read_list()
for index in range(len(hits)):
if not hits[index]:
self.keys_failed_get.append(entities[index])
def GetLabel(self):
"""Get label used to refer to RPC category in graphs."""
label = RPCStats._ABBRV.get(self.servicecallname, self.servicecallname)
if self.category:
label = '%s_%s' %(label, self.category)
return label
def Match(self, rpcstatsproto):
"""Checks if an RPC belongs to the same category as current.
Args:
rpcstatsproto: IndividualRPCStatsProto from Appstats recording which
represents statistics for a single RPC in a request.
Returns:
True or False. True indicates the RPC belongs to same category
as current one. False indicates otherwise.
"""
if rpcstatsproto.service_call_name() != self.servicecallname:
return False
category = _RPCCategory(rpcstatsproto)
if category != self.category:
return False
return True
class URLRequestStats(object):
"""Statistics associated with each URL request.
For each URL request, keep track of list of RPCs, statistics
associated with each RPC, and total response time for that
URL request.
"""
def __init__(self, statsproto):
"""Constructor."""
self.rpcstatslist = []
self.timestamp = statsproto.start_timestamp_milliseconds() * 0.001
self.totalresponsetime = int(statsproto.duration_milliseconds())
for t in statsproto.individual_stats_list():
self.AddRPCStats(t)
self.totalrpctime = self.TotalRPCTime()
def TotalRPCTime(self):
"""Compute total time spent in all RPCs."""
totalrpctime = 0
for rpc in self.rpcstatslist:
totalrpctime += rpc.time
return totalrpctime
def AddRPCStats(self, rpcstatsproto):
"""Update statistics for a given RPC called for that URL request."""
for rpc in self.rpcstatslist:
if rpc.Match(rpcstatsproto):
rpc.Incr(rpcstatsproto)
return
rpcstats = RPCStats(rpcstatsproto)
self.rpcstatslist.append(rpcstats)
def _IncrementCount(self, key_list, group_flag, freq, action):
"""Helper function to increment entity (group) access counts.
Args:
key_list: List of entity keys that were accessed.
group_flag: Boolean. If True, entity group counts are desired.
If False, entity counts are desired.
freq: A dictionary keyed on entity (group) kind and name that
holds counts for reads, writes and misses to that entity (group).
action: Whether the access was a 'read', 'write' or 'miss'.
"""
for key in key_list:
if group_flag:
name = entity.EntityGroupName(key)
kind = entity.EntityGroupKind(key)
kind_name = '%s,%s' %(kind, name)
else:
name = entity.EntityFullName(key)
kind = entity.EntityKind(key)
kind_name = '%s,%s' %(kind, name)
if not kind_name in freq:
freq[kind_name] = {'read': 0, 'write': 0, 'miss': 0}
freq[kind_name][action] += 1
def EntityGroupCount(self):
"""Computes reads/writes/failed gets to each entity group for that request.
Returns:
freq: Dictionary keyed on entity group. Key is of the form
'entitygroupkind,entitygroupname' which allows organizing statistics
of entity groups by their kind. Value is an inner dictionary with 3
keys: 'read', 'write', and 'missed'. Value of each inner dictionary
item is the number of reads/writes/failed gets to that entity group
for the request.
"""
freq = {}
for rpcstats in self.rpcstatslist:
self._IncrementCount(rpcstats.keys_read, True, freq, 'read')
self._IncrementCount(rpcstats.keys_written, True, freq, 'write')
self._IncrementCount(rpcstats.keys_failed_get, True, freq, 'miss')
return freq
def EntityCount(self):
"""Computes number of reads/writes to each entity for that request.
Returns:
freq: Dictionary keyed on entity, with value being number of reads,
writes or failed gets to that entity for the request. The dictionary
key is of the form "entitykind,entityfullname" which allows organizing
statistics of entities by their kind.
"""
freq = {}
for rpcstats in self.rpcstatslist:
self._IncrementCount(rpcstats.keys_read, False, freq, 'read')
self._IncrementCount(rpcstats.keys_written, False, freq, 'write')
self._IncrementCount(rpcstats.keys_failed_get, False, freq, 'miss')
return freq
class URLStats(object):
"""Statistics associated with a given URL.
For each request of that URL, keep track of statistics associated
with that request such as response time, RPCs called, and
statistics associated with the RPC.
"""
def __init__(self, url):
"""Constructor."""
self.url = url
self.urlrequestlist = []
def AddRequest(self, statsproto):
"""Add stats about new request to that URL."""
requeststats = URLRequestStats(statsproto)
self.urlrequestlist.append(requeststats)
def GetResponseTimeList(self):
"""Returns list of response times across all requests of URL."""
responsetimelist = []
for urlrequest in self.urlrequestlist:
responsetimelist.append(urlrequest.totalresponsetime)
return responsetimelist
def GetTotalRPCTimes(self):
"""Returns list of response times across all requests of URL."""
totalrpctimes = []
for request in self.urlrequestlist:
totalrpctimes.append(request.totalrpctime)
return totalrpctimes
def _Count(self, group_flag):
"""Helper function to count accesses to entities (entity groups).
Args:
group_flag: Boolean. If true, count entity groups. If false, count
entities.
Returns:
Dictionary keyed on names of entities (entity groups) with values
corresponding to their access counts.
"""
freq_total = {}
for request in self.urlrequestlist:
if group_flag:
freq_request = request.EntityGroupCount()
else:
freq_request = request.EntityCount()
for name, freq in freq_request.items():
if not name in freq_total:
freq_total[name] = {'read': 0, 'write': 0, 'miss': 0}
freq_total[name]['read'] += freq['read']
freq_total[name]['write'] += freq['write']
freq_total[name]['miss'] += freq['miss']
return freq_total
def EntityGroupCount(self):
"""Get reads/writes/failed gets to each entity group over all URL requests.
Returns:
freq_total: Dict keyed on entity group, with value being
count of reads/writes/failed gets to that entity group across
all requests.
"""
return self._Count(True)
def EntityCount(self):
"""Computes reads/writes/failed gets to each entity across all URL requests.
Returns:
freq_total: Dict keyed on entity name (in kind_fullname form), with
value being number of reads and writes to that entity across all
requests.
"""
return self._Count(False)
def Dump(self):
"""Dumps URL statistics to INFO/DEBUG logs for debugging."""
logging.info('URL: %s', self.url)
for urlrequest in self.urlrequestlist:
logging.info('Resptime: %d', urlrequest.totalresponsetime)
for rpc in urlrequest.rpcstatslist:
logging.info('%s %s %d %d read:%d written:%d failedgets:%d',
rpc.servicecallname,
rpc.category,
rpc.time,
rpc.numcalls,
len(rpc.keys_read),
len(rpc.keys_written),
len(rpc.keys_failed_get))
logging.debug('Keys Read')
for key in rpc.keys_read:
logging.debug('%s ', entity.EntityFullName(key))
logging.debug('Keys Written')
for key in rpc.keys_written:
logging.debug('%s ', entity.EntityFullName(key))
logging.info('Keys Failed Get')
for key in rpc.keys_failed_get:
logging.debug('%s ', entity.EntityFullName(key))