Please note that the CVS and issue trackers have moved to GitHub. These Trac pages are no longer kept up-to-date.

root/seattle/trunk/deploymentscripts/deploy_stats.py@5637

Revision 2810, 12.8 KB (checked in by konp, 10 years ago)

Fixed high cpu usage, added summary tool, a number of other fixes and improvements as well. Note: blackbox node keys have not been added yet.

Line 
1"""
2<Program Name>
3  deploy_stats.py
4
5<Started>
6  July 2009
7
8<Author>
9  n2k8000@u.washington.edu
10  Konstantin Pik
11
12<Purpose>
13  This file contains methods to be used for creating summaries.
14
15<Usage>
16  See deploy_main.py.
17 
18"""
19import deploy_main
20import deploy_html
21
22
23import repyhelper
24
25# make sure we have access to the rsa lib in the local namespace
26repyhelper.translate_and_import('rsa.repy')
27
28canonicalpublickey = rsa_file_to_publickey("canonical.publickey")
29# The key used for new donations...
30acceptdonationpublickey = rsa_file_to_publickey("acceptdonation.publickey")
31
32# Used for our first attempt at doing something sensible...
33movingtoonepercentpublickey = rsa_file_to_publickey("movingtoonepercent.publickey")
34onepercentpublickey = rsa_file_to_publickey("onepercent.publickey")
35
36# Used as the second onepercentpublickey -- used to correct ivan's
37# mistake of deleting vesselport entries from the geni database
38movingtoonepercent2publickey = rsa_file_to_publickey("movingtoonepercent2.publickey")
39onepercent2publickey = rsa_file_to_publickey("onepercent2.publickey")
40
41
42# Getting us out of the mess we started with
43#genilookuppublickey = rsa_file_to_publickey("genilookup.publickey")
44movingtogenilookuppublickey = rsa_file_to_publickey("movingtogenilookup.publickey")
45
46# Used as the many events onepercent publickey -- This has 50 events per vessel
47movingtoonepercentmanyeventspublickey = rsa_file_to_publickey("movingtoonepercentmanyevents.publickey")
48onepercentmanyeventspublickey = rsa_file_to_publickey("onepercentmanyevents.publickey")
49
50# create an array of the states
51knownstates = [canonicalpublickey, acceptdonationpublickey, 
52           movingtoonepercentpublickey, onepercentpublickey,
53           movingtoonepercent2publickey, onepercent2publickey,
54           movingtoonepercentmanyeventspublickey, onepercentmanyeventspublickey,
55           movingtogenilookuppublickey]
56           
57# and the human readable representations of those states
58knownstates_string = ['canonicalpublickey', 'acceptdonationpublickey', 
59           'movingtoonepercentpublickey', 'onepercentpublickey',
60           'movingtoonepercent2publickey', 'onepercent2publickey',
61           'movingtoonepercentmanyeventspublickey', 'onepercentmanyeventspublickey',
62           'movingtogenilookuppublickey']
63           
64
65def check_is_seattle_installed(file_data):
66  """
67  <Purpose>
68    Checks to see if seattle is installed.
69   
70  <Arguments>
71    file_data:
72      the logfile as a string from a node.
73   
74  <Exceptions>
75    None.
76
77  <Side Effects>
78    None.
79
80  <Returns>
81    Boolean. Is it installed?
82  """
83  return file_data.find('Did not find any seattle installs on') == -1
84
85 
86 
87def insert_timestamp_from_fn(any_fn):
88  """
89  <Purpose>
90    Gets the timestamp from a filename that has the following format:
91    blabla.timestamp where timestamp is an actualy unixtime
92   
93  <Arguments>
94    None.   
95   
96  <Exceptions>
97    If the time is not valid or the fn is not valid, an empty string is returned
98
99  <Side Effects>
100    None.
101
102  <Returns>
103    Returns HTML formatted time.
104  """
105 
106  # fn must be in format [blah].TIMESTAMP where timestamp is a valid unix time
107  junk, sep, timestamp = any_fn.rpartition('.')
108  try:
109    return "<br><br><b>Log from "+time.ctime(float(timestamp))
110  except Exception, e:
111    print "Error in inserting timestamp. ",
112    print e
113    return ''
114   
115
116def key_to_string(key):
117  # helper for the below method. Basically looks up the humanized name for the key
118  for i in range(len(knownstates)):
119    if knownstates[i] == key:
120      return knownstates_string[i]
121     
122
123
124# internal method
125def parse_vessel_string(vesseldict_string):
126  """
127  <Purpose>
128    Parses the vesseldict and then attempts to figure out what state the node is in.
129   
130  <Arguments>
131    vesseldict_string:
132      the vesseldict file dumped as a string.
133   
134  <Exceptions>
135    None.
136
137  <Side Effects>
138    None.
139
140  <Returns>
141    Tuple of an array and an integer.
142   
143    The array is of tuples where each tuple is: (success_status, 'v#', explanation_string)
144    The integer is the number of vessel_key_counters (aka how many states we have per this node)
145  """
146 
147  # string should be a valid dict, so just eval it and conver it to a dict object
148  vesseldict = eval(vesseldict_string)
149 
150  # each array element is a tuple with (success_status, 'v#', explanation_string)
151  return_array = []
152 
153  # keep track of how many keys we have
154  vessel_key_counter = 0
155 
156  # look up each vessel
157  for each_vessel in vesseldict.keys():
158    # this is the Xth vessel on the node
159    vesselX_dict = vesseldict[each_vessel]
160    # if it has the key we're looking for
161    if 'userkeys' in vesselX_dict:
162      # null/empty
163     
164      if not vesselX_dict['userkeys']:
165        # empty key
166        return_array.append((False, each_vessel, 'userkey is empty/null'))
167      else: # not empty!
168        # Kon: it's wrapped in an array for some reason? perhaps the node
169        #       is intended to be in more than one state in the future?
170        pubkey_dict = vesselX_dict['userkeys'][0]
171       
172        # pubkey_dict is the pubkey that corresponds to the nodestate, so lets look it up
173        if pubkey_dict in knownstates:
174          # good it's valid
175          return_array.append((True, each_vessel, key_to_string(pubkey_dict)))
176          # increment how many state-keys we have
177          vessel_key_counter += 1
178        else:
179          # oh oh... unknown pubkey!
180          return_array.append((False, each_vessel, 'Unknown pubkey!:'+str(pubkey_dict)))
181    else:
182      # doesn't have a 'userkey' entry
183      return_array.append((False, each_vessel, 'userkey does not exist'))
184  return (return_array, vessel_key_counter)
185
186
187def get_node_state(file_data):
188  """
189  <Purpose>
190    Gets the node state and returns info that can be processed by make_summary.py.
191   
192  <Arguments>
193    file_data:
194      logfile of a node as a string.
195   
196  <Exceptions>
197    None.
198
199  <Side Effects>
200    None.
201
202  <Returns>
203    On failure:
204      None
205    On success:
206      (SuccessStatus, (array of node states, # of states), html_color)
207  """
208  # internal flag that's used in parsing the file.
209  vesseldict_loop = False
210 
211  # parse the file line by line until we find the portion with the vesseldict
212  # and then we'll need to find the end of vesseldict string.
213  for each_line in file_data.splitlines():
214    if each_line.find('File contents of vesseldict:') > -1 or vesseldict_loop:
215      # start of vesseldict! make sure to store this in a seperate string
216      # format is
217     
218      # File contents of vesseldict:
219      # [vesseldict_as_string]
220      # End contents of vesseldict
221     
222      # if we've already set the flag... then..
223      if vesseldict_loop:
224        # if this line doesn't start with the error msg, then this is the dict string
225        if each_line.startswith('vesseldict is missing'):
226          return (False, (['vesseldict is missing'], 0), deploy_html.colors_map['Error'])
227         
228        elif each_line.startswith('End contents of vesseldict'):
229          # done dumping file, unset vesseldict_loop flag so we'll return with a None.
230          vesseldict_loop = False
231         
232        else:
233          # we have our string!
234          vesseldict_string = each_line
235          # the string and we'll get the node_state_array and the number of states the node is in
236          node_state_array, state_counter = parse_vessel_string(vesseldict_string)
237         
238          # did we succeed? we should expect to have only one state per node
239          success_status = (state_counter == 1)
240         
241          if success_status:
242            return (True, (node_state_array, state_counter), deploy_html.colors_map['Success'])
243          else:
244            for each_vessel in node_state_array:
245              # each_vessel[0]: Boolean, has a key?
246              # each_vessel[1]: String, v1 (the vessel #)
247              # each_vessel[2]: human-readable string
248              if 'Unknown pubkey' in each_vessel[2]:
249                return (False, (['Unknown pubkey found in '+each_vessel[1]], state_counter), deploy_html.colors_map['Error'])
250           
251          return (False, (['No keys found'], state_counter), deploy_html.colors_map['Error'])
252      else:
253        # set the flag so we can enter the loop again and read the next line
254        vesseldict_loop = True
255  return
256
257 
258 
259def get_node_version(file_data):
260  """
261  <Purpose>
262    Returns the node version.
263   
264  <Arguments>
265    file_data:
266      logfile read in as string for a node.
267   
268  <Exceptions>
269    None.
270
271  <Side Effects>
272    None.
273
274  <Returns>
275    (SuccessStatus, String)
276  """
277
278  # keep track of all the version strings we'll grab
279  version_array = []
280 
281  # parse each line of the log, we're looking for 'version =' string
282  for each_line in file_data.splitlines():
283    if each_line.startswith('version ='):
284      # string starst with what we want, so strip the line and add
285      # the version to our array of versions
286      junk, sep, version = each_line.rpartition('=')
287      version = version.strip(' "\'')
288      version_array.append(version)
289 
290  # count how many version strings we've found so far..
291  if len(version_array) == 2:
292    # assume that node upgraded
293    return (True, "Upgraded from "+version_array[0]+" to "+version_array[1], deploy_html.colors_map['Success'])
294  elif len(version_array) == 1:
295    # just return node version
296    return (True, version_array[0], deploy_html.colors_map['Success'])
297  else:
298    # unexpected!
299    return (False, "Unexpected number of version strings in log:\n"+\
300      str(version_array), deploy_html.colors_map['SmallError'])
301
302
303
304def get_nodes_up(summary_file):
305  """
306  <Purpose>
307    Cheap way of seeing how many of the nodes our tests actually ran on..
308    sum up the "versions", which is a unique line per host-log.  This can be slightly
309    inaccurate (within several nodes, eg: if nodes upgraded?).
310   
311  <Arguments>
312    summary_file:
313      path to the summary.log file (htmlsummary.log)
314   
315  <Exceptions>
316    None.
317
318  <Side Effects>
319    None.
320
321  <Returns>
322    Tuple of form (nodes_up, HumanString)
323  """
324 
325    #
326   
327  out, err, retcode = deploy_main.shellexec2('grep ^version '+summary_file+\
328      ' | sort | uniq -c | awk \'{ print $1 }\'')
329  # each line starts with a number, so convert to int and give it a try
330  try:
331    # this is how many computers are 'up'
332    counter = 0
333    for line in out.splitlines():
334      counter += int(line)
335  except ValueError, e:
336    # ignore it, we don't really care
337    pass
338  except Exception, e:
339    print 'Error in get_nodes_up'
340    print e
341  finally:
342    return (counter, str(counter)+' hosts responded in a timely fashion '+\
343        'and ran our tests.\n\n')
344     
345     
346     
347def get_uniq_machines(controller_file):
348  """
349  <Purpose>
350    find out how many machines total we surveyed line looks like:
351   
352    Jun 16 2009 01:56:07 | Setup:  Found 950 unique hosts to connect to.
353   
354  <Arguments>
355    controller_file:
356      path to the controller.log file
357   
358  <Exceptions>
359    None.
360
361  <Side Effects>
362    None.
363
364  <Returns>
365    returns an (int, HumanString)
366  """
367
368  out, err, retcode = deploy_main.shellexec2("awk '/Found/ { print $8 } ' "+controller_file)
369  try:
370    out = out.strip('\n\r ')
371    return (str(int(out)), 'There were '+out+' unique hosts surveyed\n\n')
372  except ValueError, ve:
373    print 'Unexpected number of uniq hosts returned from shell.'
374    print ve
375  except Exception, e:
376    print 'Error in get_uniq_machines()'
377    print e
378   
379   
380   
381def check_is_nm_running(file_data):
382  """
383  <Purpose>
384    Tells you if NM is running and it's status.
385   
386  <Arguments>
387    file_data:
388      log file of a node as string.
389   
390  <Exceptions>
391    None.
392
393  <Side Effects>
394    file_data
395
396  <Returns>
397    Tuple in the form of (BooleanErrorStatus, StringDesc)
398  """
399 
400  return_value = ''
401  # now check if we have any NM errors
402  if file_data.find('Node Manager is not running.') > -1:
403    return_value = (False, 'Not running', deploy_html.colors_map['Error'])
404  elif file_data.find('[NodeManager]') > -1:
405    return_value = (False, 'Error state', deploy_html.colors_map['Error'])
406  else:
407    return_value = (True, 'Running', deploy_html.colors_map['Success'])
408  return return_value
409 
410 
411 
412def check_is_su_running(file_data):
413  """
414  <Purpose>
415    Tells you if SU is running and it's status.
416   
417  <Arguments>
418    file_data:
419      log file of a node as string.
420   
421  <Exceptions>
422    None.
423
424  <Side Effects>
425    file_data
426
427  <Returns>
428    Tuple in the form of (BooleanErrorStatus, StringDesc)
429  """
430 
431  return_value = ''
432  # now check if we have any NM errors
433  if file_data.find('Software Updater is not running.') > -1:
434    return_value = (False, 'Not running', deploy_html.colors_map['Error'])
435  elif file_data.find('[SoftwareUpdater]') > -1:
436    if file_data.find('Software Updater memory usage is unusually high') > -1:
437      return_value = (False, 'High Memory Usage', deploy_html.colors_map['SmallError'])
438    else:
439      return_value = (False, 'Error State', deploy_html.colors_map['Error'])
440  else:
441    return_value = (True, 'Running', deploy_html.colors_map['Success'])
442  return return_value
Note: See TracBrowser for help on using the browser.