Please note that the CVS and issue trackers have moved to GitHub. These Trac pages are no longer kept up-to-date.

root/seattle/trunk/deploymentscripts/make_summary.py@5637

Revision 2810, 14.3 KB (checked in by konp, 10 years ago)

Fixed high cpu usage, added summary tool, a number of other fixes and improvements as well. Note: blackbox node keys have not been added yet.

Line 
1"""
2<Program Name>
3  make_summary.py
4
5<Started>
6  June 2009
7
8<Author>
9  n2k8000@u.washington.edu
10  Konstantin Pik
11
12<Purpose>
13  This file produces summary files and the complete log files from all of the machines
14  surveyed.  The log files are collected from their respective directories, and then
15  they are throw into one file. The HTML file is also created via this file.
16
17
18<Usage>
19  python make_sumary.py
20 
21  Will parse all logs in deploy_logs directory and use time.time() for the timestamp.
22"""
23
24import os
25import sys
26
27import deploy_html
28import deploy_main
29import deploy_stats
30import deploy_helper
31         
32         
33
34
35def build_summary():
36  """
37  <Purpose>
38    This function collects all the important log files from the subdirectories
39    and outputs them in a summary.log
40
41  <Arguments>
42    None.
43
44  <Exceptions>
45    Error opening/creating the log file.
46
47  <Side Effects>
48    None.
49
50  <Returns>
51    None.
52  """
53
54  sep = '---------------------'
55  uniq_fn, timestamp = deploy_html.generate_uniq_fn()
56 
57  # collect all log files into a summary file
58  summary_fn = 'detailed.'+uniq_fn
59 
60
61  # directory structure is as follows (for the files we want)
62  # ./deploy.logs/[remote_host]/deployrun.log
63  # ./deploy.logs/[remote_host]/[remote host].deployrun.err.log
64
65  #try:
66  # make sure that the dir exists
67  if not os.path.isdir('./detailed_logs'):
68    os.mkdir('./detailed_logs')
69   
70  summary_file_handle = open('./detailed_logs/'+summary_fn, 'w')
71 
72  # states map to #s
73  node_states_counter = {}
74
75  # num of states -> to # of occurences
76  num_node_states = {}
77 
78  # has the following keys:
79  # SU is running -> how many computers have a SU running
80  # NM is running -> how many computers have a NM running
81  # SU -> how may computers have just SU running
82  # NM -> how many comptuers have just NM running
83  # Both SU and NM are running -> how many computers have SU and NM running
84  # none -> how many computer have neither SU nor NM running
85  su_nm_stats_header = ['SU/NM Info', 'Number of Nodes']
86  su_nm_stats = {}
87  su_nm_stats['SU is running'] = 0
88  su_nm_stats['NM is running'] = 0
89  su_nm_stats['Only SU is running'] = 0
90  su_nm_stats['Only NM is running'] = 0
91  su_nm_stats['SU/NM are not running'] = 0
92  su_nm_stats['Both SU and NM are running'] = 0
93 
94  # will have version that map to # of currently installed
95  node_version_dict = {}
96  # This'll keep track of the # of not installed computers
97  node_version_dict['Not Installed'] = 0
98  # This'ss kep track of the node ips/hostnames that have seattle missing
99  node_version_dict['Not Installed Node Name'] = []
100 
101  # this dictionary will be used to build up our html page with all the node
102  # information. the keys to this dictionary are the nodenames, they map to an
103  # array of values which are the values in the table for that node. then
104  # we'll use the deploy_html lib to build up our html tables and write them to the file.
105 
106  html_dict = {}
107 
108  # used as the headers for the table built up in html_dict
109  html_dict_headers = ['Node Name', 'NodeManager Status', 
110      'SoftwareUpdater Status', 'Node Version', 'Node Status', 'Details']
111 
112  # the html FN that we'll be using
113 
114  # for every folder in the logs directory 
115  for logfolder in os.listdir('./deploy.logs'):
116    # each dir should have TWO files (at most), but we only care about one for our
117    # summary file
118    # check that it's a directory.
119    if os.path.isdir('./deploy.logs/'+logfolder):
120      # it's a directory! good!
121      for logfile in os.listdir('./deploy.logs/'+logfolder):
122        # now check that each file until we get a file by the name of
123        # 'deployrun.log'
124        if os.path.isfile('./deploy.logs/'+logfolder+'/'+logfile):
125          # It's a file.. is it the right name?
126          errfn = logfolder+'.deployrun.err.log'
127         
128          if logfile == 'deployrun.log' or logfile == errfn:
129           
130           
131            # Awesome it's the one we want!
132            # the logfolder = the remote host (by ip or hostname)
133            summary_file_handle.write('\nLog from '+logfolder)
134
135            # make the HTML page. the logfolder is the nodename
136            #deploy_html.html_write('./deploy.logs/'+logfolder+'/'+logfile, logfolder, uniq_fn)
137           
138            logfile_name = './deploy.logs/'+logfolder+'/'+logfile
139            logfile_handle = open(logfile_name, 'r')
140
141            if not os.path.isdir('./detailed_logs/'+logfolder):
142              os.mkdir('./detailed_logs/'+logfolder)
143             
144            detailed_handle = open('./detailed_logs/'+logfolder+'/'+timestamp, 'a')
145           
146            node_file_as_string = deploy_html.read_whole_file(logfile_handle)
147            final_file_content = deploy_helper.summarize_all_blocks(node_file_as_string)
148           
149            # write to both the files
150            summary_file_handle.write(final_file_content)
151            detailed_handle.write(final_file_content)
152           
153                 
154            # create a temp array that we'll use to build up the info, and
155            # then throw in to the html_dict
156            temp_array = []           
157           
158            # now check if the node has seattle installed or not
159            if deploy_stats.check_is_seattle_installed(node_file_as_string):
160             
161              # now we need the NM status
162              NM_success_status, NM_desc_string, bgcolor  = deploy_stats.check_is_nm_running(node_file_as_string)
163              if NM_success_status or NM_desc_string.lower().find('not') == -1:
164                su_nm_stats['NM is running'] += 1
165
166              temp_array.append((NM_desc_string, bgcolor))
167             
168             
169              # next we need the SU status
170              SU_success_status, SU_desc_string, bgcolor  = deploy_stats.check_is_su_running(node_file_as_string)
171              # if it is running then increment the running counter by 1
172              if SU_success_status or SU_desc_string.lower().find('not') == -1:
173                su_nm_stats['SU is running'] += 1
174              temp_array.append((SU_desc_string, bgcolor))
175             
176              # make sure to record the stats
177              # the not is a hack for the high mem usage which returns false
178              if SU_desc_string.lower().find('not') == -1 or SU_success_status:
179                if NM_desc_string.lower().find('not') == -1 or NM_success_status:
180                  # su and nm are running
181                  su_nm_stats['Both SU and NM are running'] += 1
182                else:
183                  # only su is running, nm is not
184                  su_nm_stats['Only SU is running'] += 1
185              else:
186                if NM_desc_string.lower().find('not') == -1 or NM_success_status:
187                  # only NM is running
188                  su_nm_stats['Only NM is running'] += 1
189                else:
190                  # neither is running
191                  su_nm_stats['SU/NM are not running'] += 1
192             
193              # now get the node version
194              success_status, version_string, bgcolor = deploy_stats.get_node_version(node_file_as_string)
195              temp_array.append((version_string, bgcolor))
196             
197              # keep track of how many of each version/output we have (including errors and upgrades)
198              if version_string not in node_version_dict.keys():
199                node_version_dict[version_string] = 1
200              else:
201                node_version_dict[version_string] += 1
202             
203             
204              # and now the node state
205              try:
206                (success_status, (node_state_array, state_counter), html_color)  = deploy_stats.get_node_state(node_file_as_string)
207              except Exception, e:
208                (success_status, (node_state_array, state_counter), html_color) = (False, ([], 0), deploy_html.colors_map['Error'])
209             
210             
211             
212              # the following chunk of code keeps track of how many nodes have X states on them
213              # has # of states | number
214              if str(state_counter) in num_node_states.keys():
215                # has the key, just get the value and increment by one
216                num_node_states[str(state_counter)] = num_node_states[str(state_counter)] + 1
217              else:
218                # set it to one, and create the key
219                num_node_states[str(state_counter)] = 1
220             
221              # this'll be the string we'll dump to the temp_array.
222             
223              node_state_success = ''
224              for each_vessel in node_state_array:
225                # tuple (SuccessState, vesselID, explanation_str)
226                if each_vessel[0]:
227                  # success!
228                  node_state_success += str(each_vessel[2])+','
229                  summary_file_handle.write('\nVessel state:\t'+str(each_vessel[1])+':'+str(each_vessel[2]))
230                  detailed_handle.write('\nVessel state:\t'+str(each_vessel[1])+':'+str(each_vessel[2]))
231                 
232                  # This next chunk of code keeps track of what states each nodes are in and how many we have
233                  # in that particular state
234                  if str(each_vessel[2]) in node_states_counter.keys():
235                    node_states_counter[str(each_vessel[2])] = node_states_counter[str(each_vessel[2])] + 1
236                  else:
237                    node_states_counter[str(each_vessel[2])] = 1
238                 
239                else:
240                  summary_file_handle.write('\nVessel state:\t'+str(each_vessel[1])+':'+str(each_vessel[2]))
241                  # don't write the detailed log if we fail.
242               
243                detailed_handle.write('\n')
244                summary_file_handle.write('\n')
245               
246              if state_counter == 1:
247                temp_array.append((node_state_success[0:-1], deploy_html.colors_map['Success']))
248              else:
249                if state_counter == 0:
250                  if node_state_array:
251                    # if the array isn't null we have some msg to print, otherwise it's an error
252                    temp_array.append((node_state_array[0], deploy_html.colors_map['Error']))
253                  else:
254                    temp_array.append(('Did not get vesseldict', deploy_html.colors_map['Error']))
255                  # no keys on the node, print the human-friendly version (also could be an unknown key)
256                 
257
258                  #temp_array.append(('No node-state keys found', deploy_html.colors_map['Error']))
259                 
260                #else: # state_counter > 1:
261                  #temp_array.append(('Multiple states on node!', deploy_html.colors_map['Error']))
262             
263              # end getting the node state here
264
265            else: # no seattle installed!           
266              temp_array = ['', '', '', ('Seattle is not installed', deploy_html.colors_map['Warning'])]
267              node_version_dict['Not Installed'] = node_version_dict['Not Installed'] + 1
268              # mark the node as not having seattle installed, we'll write a
269              # file that'll have all the missing seattle installs on the nodes
270              # also, logfolder is the name of the node.
271              node_version_dict['Not Installed Node Name'].append(logfolder)
272
273            html_link = deploy_html.make_link_to_detailed(logfolder, uniq_fn)
274            temp_array.append(html_link)           
275            # add what we have to the html_dict
276            html_dict[logfolder] = temp_array
277
278           
279
280                 
281            if os.path.isfile('./deploy.logs/controller.log'):
282              deploy_main.shellexec2('cp ./deploy.logs/controller.log ./detailed_logs/controller.'+timestamp)
283
284            if os.path.isfile('./deploy.logs/deploy.err.log'):
285              deploy_main.shellexec2('cp ./deploy.logs/deploy.err.log ./detailed_logs/deploy.err.'+timestamp)             
286           
287            logfile_handle.close()
288            detailed_handle.close()
289
290            summary_file_handle.write('\n'+sep+'\n')
291  #except Exception, e:
292  #  print e
293  #finally:
294  summary_file_handle.close()
295 
296  # this'll generate the actual html files from the tables and dicts
297 
298  # this generates the node-states table
299  html_node_states_counter = deploy_html.html_table_from_dict(node_states_counter, ['Node State', 'Number of nodes'])
300 
301  # this generates the number of nodes in each state table
302  html_num_states = deploy_html.html_table_from_dict(num_node_states, ['Number of states', 'Occurence of said number of keys'])
303 
304  # this generates the table of nm/su stats (X running NM, Y running SU, etc)
305  html_su_nm_stats = deploy_html.html_table_from_dict(su_nm_stats, su_nm_stats_header)
306 
307  # this generates the table with the version breakdown
308  html_version_info = deploy_html.html_table_from_dict(node_version_dict, ['Node Version', 'Number of nodes'])
309 
310  # this generates the main table of nodes and infos.
311  html_main_table = deploy_html.html_table_from_dict2(html_dict, html_dict_headers)
312 
313  # write to a file the clean nodes (nodes where seattle is not installed)
314  try:
315    # this is the file we'll write to
316    empty_nodes_fh = open('missing.list', 'w+')
317    # for each node...
318    for each_node in node_version_dict['Not Installed Node Name']:
319      # write it to file
320      empty_nodes_fh.write(str(each_node)+'\n')
321    # close the filehandle
322    empty_nodes_fh.close()
323  except Exception, e:
324    print 'Error while trying to write missing.list in make_summary'
325    print e
326   
327 
328  # write the html to a file
329  deploy_html.html_write(uniq_fn, html_main_table)
330 
331  # add the following stuff to the top of the file
332  deploy_html.html_add_to_top(uniq_fn, html_node_states_counter)
333  deploy_html.html_add_to_top(uniq_fn, html_num_states)
334  deploy_html.html_add_to_top(uniq_fn, html_su_nm_stats)
335  deploy_html.html_add_to_top(uniq_fn, html_version_info)
336 
337  # get the # of uniq machines and add that to the top of the file as well.
338  num_nodes, human_string = deploy_stats.get_uniq_machines('./detailed_logs/controller.'+timestamp)
339  deploy_html.html_add_to_top(uniq_fn, human_string)
340
341 
342  # total responsive machines = sum up all versions
343  sum = 0
344  for each_key in node_version_dict.keys():
345    # make sure it's not a string key
346    if each_key.find('Not') == -1:
347      sum += node_version_dict[each_key]
348   
349  deploy_html.html_add_to_top(uniq_fn, str(sum)+' hosts responded in a timely fashion '+\
350      'and ran our tests.')
351 
352  deploy_html.html_add_to_top(uniq_fn, deploy_stats.insert_timestamp_from_fn(uniq_fn))
353 
354  return
355
356 
357 
358if __name__ == "__main__":
359  build_summary()
Note: See TracBrowser for help on using the browser.