Please note that the CVS and issue trackers have moved to GitHub. These Trac pages are no longer kept up-to-date.

root/seattle/trunk/deploymentscripts/deploy_server_monitor.py@5637

Revision 2811, 7.8 KB (checked in by konp, 10 years ago)
Line 
1"""
2<Program Name>
3  deploy_server_monitor.py
4
5<Started>
6  July 2009
7
8<Author>
9  n2k8000@u.washington.edu
10  Konstantin Pik
11
12<Purpose>
13  The purpose of this file is to make sure that the web server is running, as
14  well as the monitoring scripts are started up every so often.
15
16<Usage>
17  python deploy_server_monitor.py
18 
19"""
20
21import time
22import sys
23import deploy_main
24import thread
25
26
27def webserver_is_running():
28  """
29  <Purpose>
30    Check to see if the webserver is running
31     
32  <Arguments>
33    None.
34   
35  <Exceptions>
36    None.
37
38  <Side Effects>
39    None.
40
41  <Returns>
42    Boolean. True/False: is the webserver running?
43  """
44  # True if running, false if not. checks via ps
45  out, err, retcode = deploy_main.shellexec2('ps -ef | grep deploy_server_final.py | grep -v grep')
46  # if -1, then not running, otherwise it is
47  return out.find('python deploy_server_final.py') > -1
48 
49 
50 
51def deploymentscript_is_running():
52  """
53  <Purpose>
54    IChecks to see if the deployment scripts (deploy_main.py) are running.
55     
56  <Arguments>
57    None.
58   
59  <Exceptions>
60    None.
61
62  <Side Effects>
63    None.
64
65  <Returns>
66    Boolean. True/False: are the scripts running
67  """
68  # True if running, false if not. checks via ps
69  out, err, retcode = deploy_main.shellexec2('ps -ef | grep deploy_main.py | grep -v grep')
70  # -1 if not running, otherwise it is
71  return out.find('python deploy_main.py') > -1
72
73
74 
75def server_monitor():
76  """
77  <Purpose>
78    This method runs on its own thread called from main().  It checks to see
79    if the webserver is running, and if it is not, it'll restart the webserver.
80    Thread checks to see if the webserver is running every 2 minutes.
81     
82  <Arguments>
83    None.
84   
85  <Exceptions>
86    None.
87
88  <Side Effects>
89    None.
90
91  <Returns>
92    None.
93  """
94 
95  # check if server's running every several mins
96  if not webserver_is_running():
97    # not running, restart it in a non-blocking way, and fwd all stdout to webserver.log
98    deploy_main.shellexec2('python deploy_server_final.py > ~/webserver.log 2>&1 < /dev/null&')
99    #deploy_main.shellexec2('python deploy_server_final.py > /dev/null 2> /dev/null < /dev/null&')
100   
101  time.sleep(120)
102 
103  # let this thread die, and start a new one.
104  thread.start_new_thread(server_monitor, ())
105
106
107def stop_web_server():
108  """
109  <Purpose>
110    Stops all instances of the webserver (if for some reasont there were
111    multiple instances running
112     
113  <Arguments>
114    None.
115   
116  <Exceptions>
117    None.
118
119  <Side Effects>
120    None.
121
122  <Returns>
123    None.
124  """
125 
126  # if for some reason there are multiple processes running
127  while webserver_is_running():
128    deploy_main.shellexec2("ps -ef | grep deploy_server_final.py | grep -v grep | awk ' { print $2 } ' | xargs kill -9")
129 
130 
131 
132def stop_deployment_scripts():
133  """
134  <Purpose>
135    Stops all instances of the deployment scripts (deploy_main.py) if there
136    were multiple instances launched for some reason (although this should
137    never occur unless someone was launching them manually).
138     
139  <Arguments>
140    None.
141   
142  <Exceptions>
143    None.
144
145  <Side Effects>
146    None.
147
148  <Returns>
149    None.
150  """
151  while deploymentscript_is_running():
152    deploy_main.shellexec2("ps -ef | grep deploy_main.py | grep -v grep | awk ' { print $2 } ' | xargs kill -9")
153
154   
155def stop_ssh_scp():
156  """
157  <Purpose>
158    Stops all possibly hung ssh/scp processes.
159     
160  <Arguments>
161    None.
162   
163  <Exceptions>
164    None.
165
166  <Side Effects>
167    Might close the users ssh session.
168
169  <Returns>
170    None.
171  """
172
173  deploy_main.shellexec2("ps -ef | grep 'ssh -T' | awk '{ if ($1 == \"nsr\") print $2 } ' | xargs kill -9")
174  deploy_main.shellexec2("ps -ef | grep 'ssh -x' | awk '{ if ($1 == \"nsr\") print $2 } ' | xargs kill -9")
175  deploy_main.shellexec2("ps -ef | grep 'scp -o' | awk '{ if ($1 == \"nsr\") print $2 } ' | xargs kill -9")
176   
177   
178def check_ssh_agent():
179  """
180  <Purpose>
181    Checks to see if ssh-agent is running, if not it should start it.
182     
183  <Arguments>
184    None.
185   
186  <Exceptions>
187    None.
188
189  <Side Effects>
190    None.
191
192  <Returns>
193    None.
194  """
195  # checks to see if ssh-agent is running, and if not, then it'll start it
196  # at this point, as the script is intended to run on nsr, the key has no passphrase
197  out, err, returncode = deploy_main.shellexec2("ps -ef | grep ssh-agent | awk '{ if ($1 == \"nsr\") print $8 }'")
198  if out.find('ssh-agent') > -1:
199    # good, at least one instance is running
200    pass
201  else:
202    print "ssh-agent is not running"
203    # not running.. let's boot it up
204    deploy_main.shellexec2("eval `ssh-agent`; ssh-add ")
205 
206 
207 
208def script_monitor():
209  """
210  <Purpose>
211    This method runs on its own thread.  It checks to see if the scripts
212    are done and once they are, it'll launch them again roughly every 90 mins.
213    If the script are not done after 90 mins, the thread will sleep for 5 mins
214    at a time for a recheck.
215     
216  <Arguments>
217    None.
218   
219  <Exceptions>
220    None.
221
222  <Side Effects>
223    See stop_ssh_scp().
224
225  <Returns>
226    None.
227  """
228 
229  # if the timeout is up, make sure that the last round of tests has finished
230  while deploymentscript_is_running():
231    # while it's still running, sleep 5 mins at a time until it's not done
232    time.sleep(60 * 5)
233 
234  # kill all old, possibly hung ssh-processes
235  # bug?: this'll close anyone's ssh-session who's connected as
236  # nsr@blackbox when scripts connect.
237  stop_ssh_scp()
238  #check_ssh_agent()
239  # run in non-blocking way.
240  deploy_main.shellexec2('python deploy_main.py -c custom.py > /dev/null 2> /dev/null < /dev/null&')
241  # sleep for 1.5 hrs. if scrips aren't done yet, it'll stall 5 mins at a time
242  time.sleep(60 * 90)
243 
244  thread.start_new_thread(script_monitor, ())
245
246
247def is_monitor_already_running():
248  """
249  <Purpose>
250    Checks to see whether another monitor process (deploy_server_monitor.py) is already
251    running.
252     
253  <Arguments>
254    None.
255   
256  <Exceptions>
257    None.
258
259  <Side Effects>
260    None.
261
262  <Returns>
263    Boolean. True/False: is more than one monitor running?
264  """
265 
266  # check to see whether another instance of this script is already running
267  out, err, retcode = deploy_main.shellexec2("ps -ef | grep deploy_server_monitor | grep -v grep "+\
268    "| awk '{ if ($1 == \"nsr\") print $1 } ' | sort | uniq -c | awk ' { print $1 } '")
269  if out:
270    try:
271      num_running = int(out)
272      if num_running == 1:
273        return False
274      else:
275        return True
276    except Exception, e:
277      # something went wrong..
278      print 'Error in is_monitor_already_running'
279      return True
280  else:
281    return False
282     
283
284def main():
285  """
286  <Purpose>
287    Entry point. Launches the two monitoring threads:
288      - the http server monitor
289      - the deployment scripts monitor
290     
291  <Arguments>
292    If sys.argv[1] has an argument and that argument is 'kill', then we
293    need to stop everything and kill everything.
294   
295  <Exceptions>
296    None.
297
298  <Side Effects>
299    None.
300
301  <Returns>
302    None.
303  """
304
305  # if we have an arg passed to us, then we need to kill the server
306  if len(sys.argv) == 2:
307    # is it kill?
308    if sys.argv[1] == 'kill':
309      # stop the web server
310      stop_web_server()
311      # sto the deployment scripts
312      stop_deployment_scripts()
313      # cleanup any hung ssh/scp possibly left over from the deployment scripts
314      stop_ssh_scp()
315      print "Everything stopped successfully"
316  else:
317    # just need to launch the server, so if we're not already running
318    # then we'll launch the scripts, otherwise just clean exit.
319    if not is_monitor_already_running(): 
320      thread.start_new_thread(server_monitor, ())
321      thread.start_new_thread(script_monitor, ())
322     
323      while True:
324        # so we don't spam, we just need to spin this main thread and keep it
325        # from exiting
326        time.sleep(60)
327    else:
328      print "Monitor is already running."
329 
330 
331 
332if __name__ == "__main__":
333  main()
Note: See TracBrowser for help on using the browser.