Please note that the CVS and issue trackers have moved to GitHub. These Trac pages are no longer kept up-to-date.

root/seattle/trunk/deploymentscripts/deploy_vessel_info_p.py@5637

Revision 2810, 8.1 KB (checked in by konp, 10 years ago)

Fixed high cpu usage, added summary tool, a number of other fixes and improvements as well. Note: blackbox node keys have not been added yet.

Line 
1"""
2<Program Name>
3  deploy_vessel_info_p.py
4
5<Started>
6  July 2009
7
8<Author>
9  n2k8000@u.washington.edu
10  Konstantin Pik
11
12<Purpose>
13  This file takes care of quering the servers to which our nodes advertise to, and then
14  based on what is advertised it diff's it with the iplist2.list file and reports how many
15  nodes are not checked (aka "other" nodes).
16 
17  Three files are written on a successfull run:
18 
19  advertised_nodes_uniq.list:
20    The list of the uniq nodes (nodes not in the iplist2.list file).
21   
22  advertised_nodes_list.list
23    The list of all the nodes advertised
24   
25  advertised_nodes_rdns.list:
26    The list of all the nodes that have been reverse dns looked up.
27"""
28
29import repyhelper
30import os
31import deploy_main
32import deploy_helper
33import parallelize_repy
34
35print 'Import .repy files into namespace...',
36# make sure we have access to the rsa lib in the local namespace
37repyhelper.translate_and_import('rsa.repy')
38repyhelper.translate_and_import('advertise.repy')
39print 'Done'
40
41
42
43
44
45print 'Loading keys from files...',
46canonicalpublickey = rsa_file_to_publickey("canonical.publickey")
47
48v2publickey = rsa_file_to_publickey("v2.publickey")
49
50# The key used for new donations...
51acceptdonationpublickey = rsa_file_to_publickey("acceptdonation.publickey")
52
53# Used for our first attempt at doing something sensible...
54movingtoonepercentpublickey = rsa_file_to_publickey("movingtoonepercent.publickey")
55onepercentpublickey = rsa_file_to_publickey("onepercent.publickey")
56
57# Used as the second onepercentpublickey -- used to correct ivan's
58# mistake of deleting vesselport entries from the geni database
59movingtoonepercent2publickey = rsa_file_to_publickey("movingtoonepercent2.publickey")
60onepercent2publickey = rsa_file_to_publickey("onepercent2.publickey")
61
62
63# Getting us out of the mess we started with
64#genilookuppublickey = rsa_file_to_publickey("genilookup.publickey")
65movingtogenilookuppublickey = rsa_file_to_publickey("movingtogenilookup.publickey")
66
67# Used as the many events onepercent publickey -- This has 50 events per vessel
68movingtoonepercentmanyeventspublickey = rsa_file_to_publickey("movingtoonepercentmanyevents.publickey")
69onepercentmanyeventspublickey = rsa_file_to_publickey("onepercentmanyevents.publickey")
70
71knownstates = [canonicalpublickey, acceptdonationpublickey, 
72           movingtoonepercentpublickey, onepercentpublickey,
73           movingtoonepercent2publickey, onepercent2publickey,
74           movingtoonepercentmanyeventspublickey, onepercentmanyeventspublickey,
75           movingtogenilookuppublickey, v2publickey]
76
77# string representations of the above states
78knownstates_string_representation = ['canonicalpublickey', 'acceptdonationpublickey', 
79           'movingtoonepercentpublickey', 'onepercentpublickey',
80           'movingtoonepercent2publickey', 'onepercent2publickey',
81           'movingtoonepercentmanyeventspublickey', 'onepercentmanyeventspublickey',
82           'movingtogenilookuppublickey', 'v2']
83
84print 'Done'
85
86
87def main():
88
89  # delete the old advertised_nodes_list.list file
90  try:
91    os.remove('advertised_nodes_list.list') 
92  except OSError, ose:
93    pass
94  except Exception, e:
95    print e
96    return
97 
98  advertised_nodes_list = []
99 
100  # enumerate through each key we have
101  for i in range(len(knownstates)):
102    current_key = knownstates[i]
103    print 'Nodes corresponding to '+knownstates_string_representation[i]+'...',
104   
105    # query for all the nodes we have
106    nodes_in_state = advertise_lookup(current_key, 10000000)
107   
108    # this is the counter for the number of nodes in that state
109    total_counter = 0
110    try:
111      # write all of these nodes to file
112      advertised_nodes_handle = open('advertised_nodes_list.list', 'a')
113     
114      # write the type of nodes these are as a comment
115      advertised_nodes_handle.write('\n\n# '+knownstates_string_representation[i])
116      print 'writing to file...',
117     
118      counter = 0
119      total_counter += counter
120      for each_node in nodes_in_state:
121        # strip the :port from each node, make sure it's not an empty string
122        if each_node:
123          nodeip, sep, port = each_node.rpartition(':')
124          advertised_nodes_list.append(nodeip)
125          advertised_nodes_handle.write('\n'+str(nodeip))
126          counter += 1
127      advertised_nodes_handle.write('\n\tA total of '+str(counter))
128      advertised_nodes_handle.close()
129      print 'Finished'
130     
131    except Exception, e:
132      print 'An error occured while writing to file ('+str(e)+')'
133 
134  # this makes it uniq
135  advertised_nodes_list = list(set(advertised_nodes_list))
136  if not advertised_nodes_list:
137    print "You've hit a timeout with the server, it thinks you're spamming it. Please wait"
138    return
139  print advertised_nodes_list
140 
141 
142 
143  # now we read in the other iplist file, ignore all !user: lines and comments
144  # just use the method from deploy_main to do that, but it'll return an array
145  # of tuples (user, hostname/ip) so we'll just grab the hostname/ip field
146  remote_host_tuples = deploy_main.get_remote_hosts_from_file('iplist2.list', True)
147 
148  # this'll keep track of the actual hostnames/ips
149  pl_nodes = []
150
151  for each_host in remote_host_tuples:
152    # each_host[0]: username
153    # each_host[1]: hostname/ip
154    pl_nodes.append(each_host[1])
155   
156  # keeps track of ip->hostname and hostname->ip mappings
157  dns_dict = {}
158 
159  # this'll keep track of the already checked nodes
160  dns_dict['flagged'] = []
161 
162  # keeps track of the rerversed dns entries
163  advertise_reversedns = []
164 
165  # loopbacks, network, eg: 192.*
166  networkiplist = []
167
168  # start multiple threads to reverse lookup all the hostnames/ips as our iplist2.list file
169  # might have mixed forms (1.1.1.1 <-> bla.foo.bar, and we have to match those up)
170  func_handle = parallelize_repy.parallelize_initfunction(advertised_nodes_list, deploy_helper.dnslookup, 15)
171  while not parallelize_repy.parallelize_isfunctionfinished(func_handle):
172    time.sleep(1)
173   
174  # Function is done
175  results_dict = parallelize_repy.parallelize_getresults(func_handle)
176  for each_key in results_dict.keys():
177    print results_dict[each_key]
178   
179  for each_tuple in results_dict['returned']:
180    iphostname = each_tuple[0]
181    reverse_iphostname = each_tuple[1]
182   
183    print str(iphostname)+' resolves to '+reverse_iphostname
184   
185    if iphostname == reverse_iphostname:
186      networkiplist.append(iphostname)
187    else:
188      dns_dict[iphostname] = reverse_iphostname
189      dns_dict[reverse_iphostname] = iphostname
190      advertise_reversedns.append(reverse_iphostname)
191 
192  try:
193    # write the reverse dns's looked up
194    reversed_filehandle = open('advertised_nodes_rdns.list', 'w+')
195    for each_host in advertise_reversedns:
196      reversed_filehandle.write('\n'+each_host)
197    reversed_filehandle.close()
198  except Exception, e:
199    print e
200   
201  # combine the two lists
202  all_advertised_nodes = advertise_reversedns + advertised_nodes_list
203 
204  uniq_counter = 0
205  uniq_list = []
206 
207  print '\n\n'
208 
209  for each_node in all_advertised_nodes:
210    # make sure this node isn't flagged as already checked
211    try:
212      if each_node not in dns_dict['flagged']:
213        if each_node not in pl_nodes:
214          # make sure the reversedns counterpart isn't in the list either
215          if dns_dict[each_node] not in pl_nodes:
216            uniq_counter += 1
217            print each_node+' is not in pl_list'
218            uniq_list.append(each_node)
219            # flag it as already checked
220        # flag the node's counterpart as checked
221        dns_dict['flagged'].append(dns_dict[each_node])
222    except KeyError, ke:
223      # key error means it's an ip in the networkiplist
224      uniq_counter += 1
225      print each_node+' is not in pl_list (ke)'
226      uniq_list.append(each_node)
227    except Exception, e:
228      print e
229 
230  try:
231    # write the reverse dns's looked up
232    uniq_filehandle = open('advertised_nodes_uniq.list', 'w+')
233    for each_host in uniq_list:
234      uniq_filehandle.write('\n'+each_host)
235    uniq_filehandle.close()
236  except Exception, e:
237    print e
238 
239  print '\n\n'
240  print 'The following files were created: advertised_nodes_uniq.list, advertised_nodes_list.list, and advertised_nodes_rdns.list'
241  print 'The number of non-PL/University nodes is '+str(uniq_counter) 
242 
243 
244   
245if __name__ == '__main__':
246  main()
Note: See TracBrowser for help on using the browser.