Please note that the CVS and issue trackers have moved to GitHub. These Trac pages are no longer kept up-to-date.

root/seattle/trunk/deploymentscripts/deploy_main.py@5637

Revision 2810, 30.5 KB (checked in by konp, 10 years ago)

Fixed high cpu usage, added summary tool, a number of other fixes and improvements as well. Note: blackbox node keys have not been added yet.

Line 
1#!/usr/bin/python
2"""
3<Program Name>
4  deploy_main.py
5
6<Started>
7  May 2009
8
9<Author>
10  n2k8000@u.washington.edu
11  Konstantin Pik
12
13<Purpose>
14  Deploy test scripts on remote machines and then wait to collect output.
15
16  Detailed:
17
18  This script creates a .tar with several files in it, reads in a list of
19  remote hosts from a file, and then uploads the .tar file to the remote
20  computers where files are extracted and executed and then the resulting
21  log files are downloaded to this computer under the deploy.log/ dir. In
22  That directory will be a list of folders - one for each IP, and the log
23  files created by that computer run.
24 
25  A local log file, controller.log is created which logs the execution
26  progress of scripts.  A summary.log file is created with the summaries
27  of the output (basically all the necessary information).
28 
29  The active working directory for the latest logs is always deploy_logs
30  and older directories are renamed and gzipped with some numeral expression
31 
32  Arguments may be specified in any order.
33
34<Usage>
35
36  NOTE: -i is for compatability, and is technically not needed.
37 
38  python deploy.py [-v 0|1|2] [ --nokeep ] [ -h | --help ] [-c customscript1 customscript2]
39        [-i instructional_list] [--cleanonly]
40
41
42  -v 0
43      Very silent running mode. With many hosts, it may look like the script is
44       frozen so this is not recommended.
45  -v 1
46      Default verbosity level. Same as -v 3 but makes more compact log files by
47        filtering out excess information.
48  -v 2
49      Specifies if a more verbose mode is to be used. When verbose
50        mode is flagged, then the local log file (controller.log) will contain
51        a bunch of data (such as most of the stdout from the remote host).
52        This is not recommended to be set if you are connecting to a lot
53        of machines. On the other hand, it presents everything in one file
54        neatly.
55  --nokeep
56      If --nokeep is specified, the old log files will be deleted and not
57        moved (and appended with a .number). Note that the default action
58        is to keep all old log files
59  -h, --help
60      Shows usage info with all flags.
61  -c fn1 fn2 ...
62      Must be followed by a valid python script file.  This file will be packaged
63        and distributed to all the computers.  NOTE: As of this version, only ONE
64        script will execute remotely.
65  -i instructional_list
66      Must be followed by a valid IP list file. The hostnames/ips in this file
67        will be treated slightly different (lower threadcount to avoid connection
68        refused message).
69  -l list
70      Custom list of IPs to use.
71
72     
73  TODO:
74  --cleanonly
75      If specified the remote computers will only be cleaned (all files created
76        by this script will be removed).
77
78  IMPORTANT: One file that is not created by this script (BUT IS IMPORTANT) is the
79    dictionary file of hashes created by verifyfiles.py. As of this release, I have
80    provided a sample version (version h).
81"""
82
83
84
85import os
86import subprocess
87import time
88import thread
89import sys
90import getopt
91
92# import of local deploy_* libraries
93import deploy_logging
94import deploy_network
95import deploy_threading
96
97# Verbose Flag - specifies whether we dump a bunch of stuff to the local log
98# directory or not. -v means false, -vv means true.
99verbosity = 1
100
101# custom list
102custom_list_file = ''
103
104# default number of tries to retry a connection if it was refused
105number_of_default_retries = 3
106
107# The file that we read our hostlist from.
108custom_host_file = 'iplist2.list'
109
110# The variable that'll keep track of the custom scripts we'll be using
111custom_script_name = ''
112
113
114
115def print_notification():
116  """
117  <Purpose>
118    Internal helper method. Just prints a notification.
119     
120  <Arguments>
121    None.
122   
123  <Exceptions>
124    None.
125
126  <Side Effects>
127    None.
128
129  <Returns>
130    None.
131  """
132  print deploy_logging.sep
133  print 'This script will create an updated tar of the necessary files, and then'
134  print 'it will execute a script that uploads the tar to the remote computers.'
135  print 'After that it will attempt to run the scripts remotely, and log all '
136  print 'produced output'
137  print deploy_logging.sep
138
139 
140 
141def shellexec2(cmd_str):
142  """
143  <Purpose>
144    Uses subprocess to execute the command string in the shell.
145     
146  <Arguments>
147    cmd_str:  The string to be treated as a command (or set of commands,
148                deploy_logging.separated by ;).
149   
150  <Exceptions>
151    None.
152
153  <Side Effects>
154    None.
155
156  <Returns>
157    A tuple containing (stdout, strerr, returncode)
158
159    Detailed:
160    stdout: stdout printed to console during command execution.
161    strerr: error (note: some programs print to strerr instead of stdout)
162    returncode: the return code of our call. If there are multiple commands,
163                then this is the return code of the last command executed.
164  """
165
166  # get a handle to the subprocess we're creating..
167  handle = subprocess.Popen(cmd_str, shell=True, 
168      stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
169
170  # execute and grab the stdout and err
171  stdoutdata, strerrdata = handle.communicate("")
172
173  # The return code... 
174  returncode = handle.returncode
175 
176  return stdoutdata, strerrdata, returncode
177
178 
179 
180def prep_local_dirs(keep):
181  """
182  <Purpose>
183    Just prepares local directories - cleans the old log folders if needed
184      or moves them around, and creates a temp folder that'll be used later
185     
186  <Arguments>
187    keep: Boolean. Do we keep the old log directory?
188   
189  <Exceptions>
190    None.
191
192  <Side Effects>
193    None.
194
195  <Returns>
196    None.
197  """
198  # delete the directory if it exists
199  if not keep:
200    if os.path.isdir("./deploy.logs/"):
201      shellexec2('rm -rf ./deploy.logs/')
202  else:
203    # move old log dir if one exists
204    if os.path.isdir("./deploy.logs/"):
205     
206      # find an 'integer'-suffixed directory that hasn't been taken yet
207      dirindex = 1
208     
209      print 'Trying to move old log directory...'
210      while os.path.isdir("./deploy.logs."+str(dirindex)+"/") or os.path.isfile('deploy.logs.'+str(dirindex)+'.tgz'):
211        time.sleep(.2)
212        dirindex += 1
213      # got a folder index that doesn't exist
214      shellexec2('mv ./deploy.logs/ ./deploy.logs.'+str(dirindex))
215      print 'Moved old log directory successfully to ./deploy.logs.'+str(dirindex)
216      print 'Tarring the directory...'
217      shellexec2('tar -czf deploy.logs.'+str(dirindex)+'.tgz deploy.logs.'+str(dirindex))
218      print 'Tar created, removing uncompressed files...'
219      shellexec2('rm -rf deploy.logs.'+str(dirindex))
220     
221  # set up logs directory if one doesn't exist
222  if not os.path.isdir("./deploy.logs/"):
223    os.mkdir("./deploy.logs/")
224    deploy_logging.log('Info', "Setting up logs directory..")
225  else:
226    deploy_logging.log('Info', "Logs directory found..")
227    deploy_logging.log('', deploy_logging.sep)
228   
229  if not os.path.isdir('./deploy.logs/temp'):
230    os.mkdir('./deploy.logs/temp/')
231   
232  return
233
234 
235 
236def get_custom_script_name():
237  """
238  <Purpose>
239    for use by other modules, will return the value of the custom_script_name
240    therefore making it available. deploy_network.py uses this to figure out
241    which custom script to run.
242     
243  <Arguments>
244    None.
245   
246  <Exceptions>
247    None.
248
249  <Side Effects>
250    None.
251
252  <Returns>
253    A string denoting the file name of the custom script.
254  """
255  global custom_script_name
256  return custom_script_name
257 
258 
259 
260def print_usage():
261  """
262  <Purpose>
263    Prints the usage information for this script.
264     
265  <Arguments>
266    None.
267   
268  <Exceptions>
269    None.
270
271  <Side Effects>
272    None.
273
274  <Returns>
275    None.
276  """
277  print "\nUsage: \ndeploy.py [-v 0|1|2] [--nokeep]"+\
278          " [-h | --help] \n\t[-c customscript1 customscriptN]"+\
279          "\n\t [-i instructional_list] [-l iplist2.list] \n" # TODO
280  return
281
282 
283 
284def main():
285  """
286  <Purpose>
287    Entry point into the program. Called when program starts up. Sets all the
288      parameters, and checks that they're all valid if set.
289     
290  <Arguments>
291    None.
292   
293  <Exceptions>
294    Errors are raised when something goes wrong with getops and some invalid
295    args are specifies.
296
297  <Side Effects>
298    None.
299
300  <Returns>
301    Integer. 1 on error. Nothing on clean exit.
302  """
303  global custom_script_name
304  global run_custom_script_only
305  # set the flag by default to keep the old log files.
306  keep = True
307 
308  try:
309    valid_params = 'nokeep cleanonly help cleanonly customonly'.split()
310    optlist, list = getopt.getopt(sys.argv[1:], ':hl:v:c:i:', valid_params)
311  except getopt.GetoptError, e:
312    print_usage()
313    print e
314    return 1
315  except Exception, e:
316    print_usage()
317    print e
318    print "Unexpected Error"
319    return 1
320
321  for opt in optlist:
322
323    if opt[0] == '--customonly':
324      # run custom files only
325      run_custom_script_only = True
326     
327    if opt[0] == '--cleanonly':
328      #cleanonly()
329      pass
330     
331    # custom list file to use
332    if opt[0] == '-l':
333      # TODO: check that it's a valid file
334      custom_host_file = str(opt[1])
335
336    # show help menu
337    if opt[0] == '-h' or opt[0] == '--help':
338      print_usage()
339      return
340
341    # set verbosity
342    if opt[0] == '-v':
343      # make sure we have a verbosity specified
344      if opt[1]:
345        int_val = int(opt[1])
346        if int_val >= 0 or int_val <= 2:
347          verbosity = int(opt[1])
348        else:
349          verbosity = 1 # default verbosity
350      else:
351        print "Warning: Invalid verbosity, defaulting to 1"
352
353    # custom file(s) to deploy
354    if opt[0] == '-c':
355      # not multiple and not one script specified..
356      if not len(list) and not len(opt[1]):
357        print "Error: No custom scripts specified. Remove -c flag please"
358        return
359      print "Loading custom script"
360
361      if list:
362        for custom_script in list:
363          # check that the file exists
364          if os.path.isfile(custom_script):
365            print '\tAdding '+custom_script
366            custom_script_name += custom_script+'  '
367          else:
368            print 'Error: '+custom_script+', a custom script is not found'
369            return
370      else: # just one script file
371     
372        # is the file we were given a real file?
373        if os.path.isfile(opt[1]):
374          print '\tAdding '+opt[1]
375          custom_script_name = opt[1]
376        else:
377          print 'Error: '+custom_script_name+', a custom script is not found'
378          return 
379   
380    # don't save old log files
381    if opt[0] == '--nokeep':
382      print "Erasing old log files"
383      keep = False
384
385    # instructional machines list
386    if opt[0] == '-i':
387      print 'Reading in instructional machine file...'
388      if opt[1]:
389        # check that the file exists
390        if os.path.isfile(opt[1]):
391          hostname_path = opt[1]
392          # good now read in the file and add it to the global that
393          # keeps track of our instructional machines
394         
395          deploy_threading.thread_communications['machine_list'] =\
396              get_remote_hosts_from_file(hostname_path)
397        else:
398          print "ERROR: Specified instructional machine filepath is"+\
399            " not a valid file ("+opt[1]+")"
400          return
401      else:
402        print 'Invalid instructional machine path specified, not going to die.'
403        return
404
405  # print intro
406  print_notification()
407
408  # Execute the tar creation script
409  out, err, returncode = shellexec2('python create_tar.py '+custom_script_name)
410
411  # Just formatting the out and err from executing the shell script.
412  out, err = deploy_logging.format_stdout_and_err(out, err)
413
414  # print if not empty
415  if out:
416    print out
417  if err:
418    print err
419  print deploy_logging.sep
420
421  # if all went sucessfully..
422  if returncode == 0:
423    # setup all the directories..
424    prep_local_dirs(keep)
425   
426    print "Entering upload and execution script... (this may take a while)"
427
428    # call the deploy script that'll pick up from here..
429    deploy()
430    print deploy_logging.sep
431
432    # cleanup the local temp directory
433    shellexec2('rm -rf ./deploy.logs/temp/')
434
435
436    print 'Compacting...'
437    # summarize the logfile before building the summary
438    shellexec2('python log_maintenance.py dummyarg')
439   
440    print 'Building summary logfile..'   
441    deploy_logging.build_summary()
442    print deploy_logging.sep
443    deploy_logging.log('Finished', 'All finished.')
444
445  # returns 1 if there was an error.
446  elif returncode == 1:
447    print 'Error in creating tar.. Aborting'
448  else: # just so we catch all the conditions..
449    print 'CRITICAL ERROR! script returned with unexpected retcode ('+\
450      str(returncode)+')'
451 
452
453 
454def upload_tar(user, remote_host, tar_filename = "deploy.tar"):
455  """
456  <Purpose>
457    This function will upload the tar to the remote_host via scp by logging
458    in as user@remote_host, and log the return code as well as anything
459    printed to stderr or stdout (which is expected to be empty).
460   
461    Uses remote_upload_file to upload the actual file.
462     
463  <Arguments>
464    user:
465      the user to log in as on the remote machine.
466    remote_host:
467      the remote machine's IP to which we'll be uploading files.
468    tar_filename:
469      Optional. Default is deploy.tar. The tar file to upload to the remote
470        host.
471   
472  <Exceptions>
473    None.
474
475  <Side Effects>
476    None.
477
478  <Returns>
479    A tuple: (returncode, stdout, stderr)
480  """
481
482  # call helper to scp 
483  stdoutdata, stderrdata, returncode = deploy_network.remote_upload_file(tar_filename, user, remote_host)
484
485  # check the return code..
486  if returncode == 0:
487    deploy_logging.log(remote_host, 'Successfully uploaded deploy.tar')
488  else:
489    deploy_logging.logerror(remote_host+': Trouble uploading deploy.tar')
490 
491  return (str(returncode), stdoutdata, stderrdata)
492
493 
494 
495def get_current_time():
496  """
497  <Purpose>
498    Uses python's time lib to return the current time. This is formatted
499    in what I find a convenient way to show time in the logs.
500     
501  <Arguments>
502    None.
503   
504  <Exceptions>
505    None.
506
507  <Side Effects>
508    None.
509
510  <Returns>
511    The date as a string.
512   
513    Sample date returned: May 16 2009 00:21:51
514  """
515  return time.strftime("%b %d %Y %T")
516
517 
518 
519def get_remote_hosts_from_file(fname = custom_host_file, nolog = False):
520  """
521  <Purpose>
522    Returns a list of the IP as read from file specified.
523
524    File format is:
525 
526    !user:[username]
527    [IPs]
528
529    [username] is the username that will be used until a new $username is specified
530      in the same format. NOTE: Username is case sensitive.
531    [IPs] are a list of IPs/hostname (one per line) associated with that username
532
533  <Arguments>
534    fname:
535      Optional. Default is 'iplist.list'. The filename containing the IPs of
536        the remote machines.  File must be in the same directory as this
537        script.
538    nolog:
539      Optional. Default is False. If set to true, then nothing will be written to the logfile.
540   
541  <Exceptions>
542    Catches a thrown exception if the IP file is not found.
543
544  <Side Effects>
545    None.
546
547  <Returns>
548    Returns a list of tuples with (username, ip) on success, False on failure
549  """
550
551  global custom_host_file
552  fname = custom_host_file
553 
554  # IP file must be in the same dir as this script
555  try:
556    file_of_ips = open(fname, 'r')
557  except Exception, e:
558    deploy_logging.log('Error', 'Are you missing your list of remote hosts? ('+str(e)+')')
559    try:
560      file_of_ips.close()
561    except Exception, e:
562      # not sure if we really opened it
563      pass
564    return False
565  else:
566    # flag on whether we have any remote hosts (there are users, and comments
567    # in the file as well
568    have_one_ip = False
569
570    # initialize dict   
571    users_ip_tuple_list = []
572
573    current_username = ''
574
575    # Python docs suggest doing this instead of reading in whole file into mem:
576    for line in file_of_ips:
577
578
579      # if first chars match what we want ('!user:' is 6 chars long)
580      if line[0:6].lower() == '!user:':
581        # grab everything after the '!user:' string
582        # -1 so we drop the \n and leading/trailing spaces
583        current_username = line[6:-1].strip()
584      else:
585        # ignore blank lines and spaces
586        if line.strip('\n '):
587          # and ignore comments (lines starting with #)
588          if line.strip('\n ')[0] != '#':
589            # if we get here, then we have an IP so we need to  check that
590            # user is not empty.. log err if it is and complain.
591            if not current_username:
592              deploy_logging.logerror('Critical Error: No username specified for remote host group!')
593              file_of_ips.close()
594              return False
595
596            # add (username, remote_host) pair while casting remote_host to lowercase in case
597            # it's a hostname for easy comparison if needed everywhere
598            users_ip_tuple_list.append((current_username, line.rstrip('\n ').lower()))
599            # set flag that we have at least one ip
600            have_one_ip = True
601
602    # return true only if we have at least ONE ip that we added to the list
603    # and not just a bunch of users
604    if have_one_ip:
605      # lets make the list a set, which is a cheap way of getting rid of
606      # duplicates, then cast back to list.
607      finalized_list = list(set(users_ip_tuple_list))
608      if not nolog:
609        deploy_logging.log('Setup', "Found "+str(len(finalized_list))+" unique hosts to connect to.")
610      file_of_ips.close()
611      return finalized_list
612    file_of_ips.close()
613    return False
614
615
616
617def deploy():
618  """
619  <Purpose>
620    This function is the brains behind the deploy script. All the main calls
621    originate from this function.
622
623    -Gets list of remote hosts from a file
624    -Calls function to execute cleanup/setup on remote hosts before
625      we can run remote scripts and then that same function executes
626      the remote script files
627
628  <Arguments>
629    None.
630
631  <Exceptions>
632    Exit if hostlist file was not found.
633
634  <Side Effects>
635    None.
636
637  <Returns>
638    None.
639  """
640
641  # Get list of hosts
642  myhosts = get_remote_hosts_from_file()
643
644  if not myhosts: # if we didn't find any hosts.. crap out!
645    print "Didn't find any remote hosts file!"
646    deploy_logging.logerror("Didn't find any remote hosts file!")
647    # return if we don't have instructional machines to process
648    if 'machine_list' not in deploy_threading.thread_communications.keys():
649      return
650  else:
651    # check if we also have intructional machines, and if we do, then
652    # make sure we're not being tricked - remove all instructional machines
653    # from the myhosts list
654    if 'machine_list' in deploy_threading.thread_communications.keys():
655      # we have instructional machines
656      machine_list = deploy_threading.thread_communications['machine_list']
657      myhosts = list(set(myhosts)-set(machine_list))
658 
659  # initialize thread_communications dictionary to a list which will have
660  # our unreachable hosts
661  deploy_threading.thread_communications['unreachable_host'] = []
662
663  # this will keep track of the proc id's that are launched on different
664  # threads. These are ssh/scp processes. We keep track of these because
665  # we want to make sure that when we exit deploy.py, we kill all of these
666  # processes - they should be killed by that time unless there was some kind
667  # of error.
668  deploy_threading.thread_communications['running_process_ids'] = []
669 
670  # initial run
671  connect_and_do_work(myhosts)
672
673  # now do the same for the instructional machines if we have any:
674  if 'machine_list' in deploy_threading.thread_communications.keys():
675    connect_and_do_work(deploy_threading.thread_communications['machine_list'], 3)
676 
677
678  # if we had unreachable hosts..   
679  if deploy_threading.has_unreachable_hosts():
680    # Currently, set NOT to retry hosts.  Since it's running regularly as a service,
681    # there is no need as 99% of these hosts time out anyway, so it just takes
682    # a lot longer than it should.
683    for i in range(0):     
684     
685      # increase timeout time by 25% each time
686      deploy_network.default_connection_timeout =\
687          str(int(float(deploy_network.default_connection_timeout) * 1.25))
688     
689      # 1. use list of unreachable hosts list as our list to retry
690      last_failed_hosts = deploy_threading.thread_communications['unreachable_host']
691
692      # 2. reset the unreachable hosts list
693      deploy_threading.thread_communications['unreachable_host'] = []
694      deploy_logging.log("Notice", "Trying to connect to failed hosts (connection attempt #"+str(i+2)+")")
695      connect_and_do_work(last_failed_hosts)
696 
697 
698  print "Checking that all child threads/processes are dead..."
699  for each_tuple in deploy_threading.thread_communications['running_process_ids']:
700    try:
701      # tuple is (pid, expiretime, remotehost, username)
702      procid = int(each_tuple[0])
703      os.kill(procid, 9)
704    except OSError, ose:
705      pass
706    except Exception, e:
707      print "Something went wrong while trying to kill process "+\
708          str(procid)+", "+str(e)
709 
710  deploy_logging.log("Info", "Seeing if there exist instructional machines we need to cleanup still...")
711 
712  # do we have any instructional machines?
713  if 'instructional_machines' in deploy_threading.thread_communications.keys():
714    for instructional_machine in deploy_threading.thread_communications['instructional_machines']:
715      threadable_cleanup_final(instructional_machine)
716 
717  print 'Finished.'
718  deploy_logging.log("Info", "All threads completely finished...")
719  return
720
721 
722 
723def connect_and_do_work(myhosts, max_threads = deploy_threading.threadsmax):
724  """
725  <Purpose>
726    Does the actual distribution of nodes within max_threads. myhosts is
727      a list of tuples, and so we pop one and pass it off to a thread until
728      we have max_threads running, and if one of them is done while we've
729      still got more nodes to process we'll launch another new thread.
730
731  <Arguments>
732    myhosts:
733      list of tuples containing (username, remote_host). The format is the
734      same as returned when the list of IPs is read from file
735    max_threads:
736      Max # of threads to launch.
737
738  <Exceptions>
739    Exception on trouble starting a thread.
740
741  <Side Effects>
742    Blocks until all threads are finished.
743
744  <Returns>
745    None.
746  """
747 
748  if myhosts:
749    deploy_threading.start_thread(threadable_process_node, myhosts, max_threads)
750   
751    # no more hosts! (let's block until all threads are done)
752    running_threads = deploy_threading.threading_currentlyrunning()
753   
754    # spin until all threads are done..
755    while running_threads:
756      time.sleep(30)
757      helper_print_processing_threads()
758      running_threads = deploy_threading.threading_currentlyrunning()
759
760     
761    # reset kill flag for the thread monitoring status of the timeouts
762    deploy_threading.thread_communications['kill_flag'] = True
763
764  return
765
766 
767 
768def helper_humanize_nodelist(node_list):
769  # just converts (user, hostname) tuple list to user@hostname string
770  humanized = ''
771  for node in node_list:
772    humanized += helper_humanize_node(node)+', '
773  # fence post problem. just strip off the last two characters which should
774  # be a space and a comma
775  return humanized[0:-2]
776
777 
778 
779def helper_humanize_node(node):
780  """
781  <Purpose>
782     Converts node (in tuple/list format) to a human readable format
783
784  <Arguments>
785    node: first element is the username, second is the remote_host
786
787  <Exceptions>
788    None.
789
790  <Side Effects>
791    None.
792
793  <Returns>
794    None.
795  """
796  user = node[0]
797  remote_host = node[1]
798  return user+'@'+remote_host
799 
800 
801
802def helper_print_processing_threads():
803  """
804  <Purpose>
805     Helper that periodically lets us know how many threads are running and
806      which nodes they're responsible for
807
808  <Arguments>
809    None.
810
811  <Exceptions>
812    None.
813
814  <Side Effects>
815    None.
816
817  <Returns>
818    None.
819  """
820  print 'There are still some threads spinning.'
821  print 'Currently, there are some'+\
822      ' threads running ('+str(len(deploy_threading.thread_communications['hosts_left']))+' hosts left)',
823  print helper_humanize_nodelist(deploy_threading.thread_communications['hosts_left'])
824
825   
826
827def threadable_process_node(node_list):
828  """
829  <Purpose>
830    The parent function that calls child functions to do the little work. From
831    this function we can see the order of events:
832      1. upload tar
833      2. check that we got a response (if not add to unreachable for later)
834      3. run cleaning/setup scripts on remote machine
835      4. run actual test scripts on remote machine
836          (files are grabbed after all scripts execute, called from step4)
837
838  <Arguments>
839    node_list:
840      a list containing a single tuple of (user, remotehost)
841
842  <Exceptions>
843    None.
844
845  <Side Effects>
846    Modifies running thread counter.
847
848  <Returns>
849    None.
850  """
851 
852  try:
853
854    # node is a list containing one tuple
855    node = node_list[0]
856
857    # upload the .tar file.
858    # attempt to upload the .tar file to the computers. this'll modify a list of
859    # computers that we didn't connect to succesfully,so we'll remove them from
860    # the list of computers we want to run the rest of the scripts on.
861
862    threadable_remote_upload_tar(node_list)
863
864    # only continue if node was marked reachable
865    if deploy_threading.node_was_reachable(node):
866      # clean the node
867      threadable_remote_cleanup_all(node_list)
868      # run the scripts remotely now
869      threadable_remote_run_all(node_list)
870      # cleanup the files, but only if it's not an instructional machine
871      # the reason for this is because it's NFS and files could still be
872      # in use by the other machines. we'll add this to a special list
873      # in our thread_communications dict and we'll then clean these up
874      # when all threads are totally done
875      if not node[1].startswith('128.'):
876        threadable_cleanup_final(node_list)
877      else:
878        # check if array exists already
879        deploy_threading.add_instructional_node(node)
880       
881    # decrement # of threads running
882  except Exception, e:
883    deploy_logging.logerror("Error in thread assigned to "+node[1]+\
884        " threadable_process_node ("+str(e)+")")
885
886   
887   
888def threadable_cleanup_final(remote_machines):
889  """
890  <Purpose>
891    Cleans the files created by this script from the remote machine
892   
893  <Arguments>
894    remote_machines:
895      a list containing a single tuple of (user, remotehost)
896
897  <Exceptions>
898    None.
899
900  <Side Effects>
901
902  <Returns>
903    None.
904  """
905  # Assume single element if it's not a list
906  if type(remote_machines) != type([]):
907    remote_machines = [remote_machines]
908 
909  # for every machine in our list...
910  for machine_tuple in remote_machines:
911   
912    username = machine_tuple[0]
913    machine = machine_tuple[1]
914
915    deploy_logging.log('Final cleanup', 'Final cleanup of  '+machine)
916
917    # build up our list of files/folders we need to delete
918    cmd_list = []
919    cmd_list.append('rm -rf runlocaltests.py')
920    cmd_list.append('rm -rf hashes.dict')
921    cmd_list.append('rm -rf '+machine+'.deployrun.log')
922    cmd_list.append('rm -rf '+machine+'.deployrun.err.log')
923    cmd_list.append('rm -rf testprocess.py')
924    cmd_list.append('rm -rf verifyfiles.mix')
925    cmd_list.append('rm -rf '+machine+'.tgz')
926    cmd_list.append('rm -rf deploy.tar')
927    cmd_list.append('rm -rf cleanup_deploy.py')
928    #TODO: cleanup custom scripts as well here?
929   
930    # merge the commands into a string that we'll execute
931    cmd_str = '; '.join(cmd_list)
932    ssh_stdout, ssh_stderr, ssh_errcode = deploy_network.remote_shellexec(cmd_str, username, str(machine))
933
934    deploy_logging.print_to_log('Detailed cleanup', ssh_stdout, 
935        ssh_stderr, ssh_errcode)
936  return
937 
938 
939 
940def threadable_remote_upload_tar(remote_machines):
941  """
942  <Purpose>
943    Uploads the deploy.tar to each machine before running anything. Machines
944      that timeout are added to the unreachable_hosts list in the dictionary.
945
946  <Arguments>
947    remote_machines:
948      list of tuples with (user, ip) IPs that we have to cleanup.
949
950  <Exceptions>
951    None.
952
953  <Side Effects>
954    Temporarily locks thread_communications dict which is used by other threads trying
955    to upload (if they run into an error).
956
957  <Returns>
958    None.
959  """
960
961  # Assume single element if it's not a list
962  if type(remote_machines) != type([]):
963    remote_machines = [remote_machines]
964 
965  # for every machine in our list...
966  for machine_tuple in remote_machines:
967   
968    # split up the tuple
969    username = machine_tuple[0]
970    machine = machine_tuple[1]
971
972    deploy_logging.log('Setup', 'Attemping tar file upload via scp on '+machine)
973    scp_errcode, scp_stdout, scp_stderr = upload_tar(username, str(machine))
974
975    out, err = deploy_logging.format_stdout_and_err(scp_stdout, scp_stderr)
976
977    # check the error codes
978    if str(scp_errcode) == '0':
979      deploy_logging.log('Setup', ' scp file upload complete on '+machine)
980    elif str(scp_errcode) == '1':
981      deploy_logging.logerror('Could not establish a connection with '+machine+' ('+err+')')
982      deploy_threading.add_unreachable_host((username, machine))
983    else:
984      deploy_logging.logerror('scp returned unknown error code '+str(scp_errcode)+' ('+err+')')
985      deploy_threading.add_unreachable_host((username, machine))
986
987
988def threadable_remote_cleanup_all(remote_machines):
989  """
990  <Purpose>
991    Calls remote_runcleanup for each machine in remote_machines.
992
993  <Arguments>
994    remote_machines:
995      list of tuples with (user, ip) IPs that we have to cleanup.
996
997  <Exceptions>
998    None.
999
1000  <Side Effects>
1001    None.
1002
1003  <Returns>
1004    None.
1005  """
1006  # Assume single element if it's not a list
1007  if type(remote_machines) != type([]):
1008    remote_machines = [remote_machines]
1009 
1010  # for every machine in our list...
1011  for machine_tuple in remote_machines:
1012   
1013    username = machine_tuple[0]
1014    machine = machine_tuple[1]
1015
1016
1017    deploy_logging.log('Cleanup/Setup', "Attempting to ssh and run cleaning scripts on "+\
1018        machine)
1019    # Run the remote cleanup script
1020    deploy_network.remote_runcleanup(username, str(machine))
1021    deploy_logging.log('Cleanup/Setup', " ssh and run cleanup scripts done on "+machine+\
1022        ". Moving on.")
1023
1024
1025def threadable_remote_run_all(remote_machines):
1026  """
1027  <Purpose>
1028    This function connects to the remote computer and executes the
1029    actual test scripts.
1030
1031    This function is run_func threadable
1032
1033  <Arguments>
1034    remote_machines:
1035      list of tuples with (user, ip) that we'll run our tests on.
1036
1037  <Exceptions>
1038    None.
1039
1040  <Side Effects>
1041    None.
1042
1043  <Returns>
1044    None.
1045  """
1046
1047  # Assume single element if it's not a list
1048  if type(remote_machines) != type([]):
1049    remote_machines = [remote_machines]
1050
1051  # For every single machine we're assigned...
1052  for machine_tuple in remote_machines:
1053    # Run the files remotely
1054    username = machine_tuple[0]
1055    machine = machine_tuple[1]
1056    deploy_logging.log('Info', "Attempting to ssh and run scripts on "+machine+"...")
1057    deploy_network.remote_runscript(username, str(machine), custom_script_name)
1058    deploy_logging.log('Info', "Running scripts on "+str(machine)+" completed. Moving on.")
1059
1060
1061
1062# main() handles all the cool things we're about to do :)
1063if __name__ == "__main__":
1064  main()
Note: See TracBrowser for help on using the browser.