← Back to team overview

yahoo-eng-team team mailing list archive

[Bug 1280140] [NEW] cleanup_running_deleted_instances peroidic task failed with instance not found

 

Public bug reported:

this is because the db  query is not including the deleted instance while
_delete_instance_files() in libvirt driver.

I can reproduce this bug both in master and stable havana.
reproduce steps:
1. create an instance
2. stop nova-compute
3. wait for nova-manage serivce list display xxx of nova-compute
4. modify the running_deleted_instance_poll_interval=60
running_deleted_instance_action = reap,
and start nova-compute and wait for this clean up peroidic task
5. a warnning will be given in the compute log:
2014-02-14 16:22:25.917 WARNING nova.compute.manager [-] [instance: c32db267-21a0-41e7-9d50-931d8396d8cb] Periodic cleanup failed to delete instance: Instance c32db267-21a0-41e7-9d50-931d8396d8cb could not be found.

the debug trace is:
ipdb> n
> /opt/stack/nova/nova/virt/libvirt/driver.py(1006)cleanup()
   1005 block_device_mapping = driver.block_device_info_get_mapping(
-> 1006 block_device_info)
   1007 for vol in block_device_mapping:

ipdb> n
> /opt/stack/nova/nova/virt/libvirt/driver.py(1007)cleanup()
   1006 block_device_info)
-> 1007 for vol in block_device_mapping:
   1008 connection_info = vol['connection_info']

ipdb> n
> /opt/stack/nova/nova/virt/libvirt/driver.py(1041)cleanup()
   1040 
-> 1041 if destroy_disks:
   1042 self._delete_instance_files(instance)

ipdb> n
> /opt/stack/nova/nova/virt/libvirt/driver.py(1042)cleanup()
   1041 if destroy_disks:
-> 1042 self._delete_instance_files(instance)
   1043 

ipdb> s
--Call--
> /opt/stack/nova/nova/virt/libvirt/driver.py(4950)_delete_instance_files()
   4949 
-> 4950 def _delete_instance_files(self, instance):
   4951 # NOTE(mikal): a shim to handle this file not using instance objects


ipdb> n
> /opt/stack/nova/nova/virt/libvirt/driver.py(4953)_delete_instance_files()
   4952 # everywhere. Remove this when that conversion happens.

-> 4953 context = nova_context.get_admin_context()
   4954 inst_obj = instance_obj.Instance.get_by_uuid(context, instance['uuid'])

ipdb> n
> /opt/stack/nova/nova/virt/libvirt/driver.py(4954)_delete_instance_files()
   4953 context = nova_context.get_admin_context()
-> 4954 inst_obj = instance_obj.Instance.get_by_uuid(context, instance['uuid'])
   4955 

ipdb> n
InstanceNotFound: Instance...found.',)
> /opt/stack/nova/nova/virt/libvirt/driver.py(4954)_delete_instance_files()
   4953 context = nova_context.get_admin_context()
-> 4954 inst_obj = instance_obj.Instance.get_by_uuid(context, instance['uuid'])
   4955 

ipdb> n
--Return--
None
> /opt/stack/nova/nova/virt/libvirt/driver.py(4954)_delete_instance_files()
   4953 context = nova_context.get_admin_context()
-> 4954 inst_obj = instance_obj.Instance.get_by_uuid(context, instance['uuid'])
   4955 

ipdb> n
InstanceNotFound: Instance...found.',)
> /opt/stack/nova/nova/virt/libvirt/driver.py(1042)cleanup()
   1041 if destroy_disks:
-> 1042 self._delete_instance_files(instance)
   1043 

ipdb> n
--Return--
None
> /opt/stack/nova/nova/virt/libvirt/driver.py(1042)cleanup()
   1041 if destroy_disks:
-> 1042 self._delete_instance_files(instance)
   1043 

ipdb> n
InstanceNotFound: Instance...found.',)
> /opt/stack/nova/nova/virt/libvirt/driver.py(931)destroy()
    930 self.cleanup(context, instance, network_info, block_device_info,
--> 931 destroy_disks)
    932 

ipdb> n
--Return--
None
> /opt/stack/nova/nova/virt/libvirt/driver.py(931)destroy()
    930 self.cleanup(context, instance, network_info, block_device_info,
--> 931 destroy_disks)
    932 

ipdb> n
InstanceNotFound: Instance...found.',)
> /opt/stack/nova/nova/compute/manager.py(1905)_shutdown_instance()
   1904 self.driver.destroy(context, instance, network_info,
-> 1905 block_device_info)
   1906 except exception.InstancePowerOffFailure:

ipdb> n
> /opt/stack/nova/nova/compute/manager.py(1906)_shutdown_instance()
   1905 block_device_info)
-> 1906 except exception.InstancePowerOffFailure:
   1907 # if the instance can't power off, don't release the ip


ipdb> n
> /opt/stack/nova/nova/compute/manager.py(1910)_shutdown_instance()
   1909 pass
-> 1910 except Exception:
   1911 with excutils.save_and_reraise_exception():

ipdb> n
> /opt/stack/nova/nova/compute/manager.py(1911)_shutdown_instance()
   1910 except Exception:
-> 1911 with excutils.save_and_reraise_exception():
   1912 # deallocate ip and fail without proceeding to


ipdb> n
> /opt/stack/nova/nova/compute/manager.py(1914)_shutdown_instance()
   1913 # volume api calls, preserving current behavior

-> 1914 self._try_deallocate_network(context, instance,
   1915 requested_networks)

ipdb> n
> /opt/stack/nova/nova/compute/manager.py(1915)_shutdown_instance()
   1914 self._try_deallocate_network(context, instance,
-> 1915 requested_networks)
   1916 

ipdb> n
2014-02-14 16:22:02.701 DEBUG nova.compute.manager [-] [instance: c32db267-21a0-41e7-9d50-931d8396d8cb] Deallocating network for instance from (pid=19192) _deallocate_network /opt/stack/nova/nova/compute/manager.py:1531
2014-02-14 16:22:02.704 DEBUG oslo.messaging._drivers.amqpdriver [-] MSG_ID is e529a4edb22b480cb0641a62718e9b04 from (pid=19192) _send /opt/stack/oslo.messaging/oslo/messaging/_drivers/amqpdriver.py:358
2014-02-14 16:22:02.705 DEBUG oslo.messaging._drivers.amqp [-] UNIQUE_ID is aed682f94730441aaa14e43a37c86227. from (pid=19192) _add_unique_id /opt/stack/oslo.messaging/oslo/messaging/_drivers/amqp.py:333
2014-02-14 16:22:02.718 WARNING nova.openstack.common.loopingcall [-] task run outlasted interval by 11.632922 sec
InstanceNotFound: Instance...found.',)
> /opt/stack/nova/nova/compute/manager.py(1915)_shutdown_instance()
   1914 self._try_deallocate_network(context, instance,
-> 1915 requested_networks)
   1916 

ipdb> n
--Return--
None
> /opt/stack/nova/nova/compute/manager.py(1915)_shutdown_instance()
   1914 self._try_deallocate_network(context, instance,
-> 1915 requested_networks)
   1916 

ipdb> l
   1910 except Exception:
   1911 with excutils.save_and_reraise_exception():
   1912 # deallocate ip and fail without proceeding to

   1913 # volume api calls, preserving current behavior

   1914 self._try_deallocate_network(context, instance,
-> 1915 requested_networks)
   1916 
   1917 self._try_deallocate_network(context, instance, requested_networks)
   1918 
   1919 for bdm in vol_bdms:
   1920 try:

ipdb> n
InstanceNotFound: Instance...found.',)
> /opt/stack/nova/nova/compute/manager.py(5225)_cleanup_running_deleted_instances()
   5224 self._shutdown_instance(context, instance, bdms,
-> 5225 notify=False)
   5226 self._cleanup_volumes(context, instance['uuid'], bdms)

ipdb> n
> /opt/stack/nova/nova/compute/manager.py(5227)_cleanup_running_deleted_instances()
   5226 self._cleanup_volumes(context, instance['uuid'], bdms)
-> 5227 except Exception as e:
   5228 LOG.warning(_("Periodic cleanup failed to delete "

ipdb> n
> /opt/stack/nova/nova/compute/manager.py(5228)_cleanup_running_deleted_instances()
   5227 except Exception as e:
-> 5228 LOG.warning(_("Periodic cleanup failed to delete "
   5229 "instance: %s"),

ipdb> n
> /opt/stack/nova/nova/compute/manager.py(5230)_cleanup_running_deleted_instances()
   5229 "instance: %s"),
-> 5230 unicode(e), instance=instance)
   5231 else:

ipdb> n
2014-02-14 16:22:25.917 WARNING nova.compute.manager [-] [instance: c32db267-21a0-41e7-9d50-931d8396d8cb] Periodic cleanup failed to delete instance: Instance c32db267-21a0-41e7-9d50-931d8396d8cb could not be found.

** Affects: nova
     Importance: Undecided
     Assignee: wangpan (hzwangpan)
         Status: In Progress

** Changed in: nova
     Assignee: (unassigned) => wangpan (hzwangpan)

-- 
You received this bug notification because you are a member of Yahoo!
Engineering Team, which is subscribed to OpenStack Compute (nova).
https://bugs.launchpad.net/bugs/1280140

Title:
  cleanup_running_deleted_instances peroidic task failed with instance
  not found

Status in OpenStack Compute (Nova):
  In Progress

Bug description:
  this is because the db  query is not including the deleted instance while
  _delete_instance_files() in libvirt driver.

  I can reproduce this bug both in master and stable havana.
  reproduce steps:
  1. create an instance
  2. stop nova-compute
  3. wait for nova-manage serivce list display xxx of nova-compute
  4. modify the running_deleted_instance_poll_interval=60
  running_deleted_instance_action = reap,
  and start nova-compute and wait for this clean up peroidic task
  5. a warnning will be given in the compute log:
  2014-02-14 16:22:25.917 WARNING nova.compute.manager [-] [instance: c32db267-21a0-41e7-9d50-931d8396d8cb] Periodic cleanup failed to delete instance: Instance c32db267-21a0-41e7-9d50-931d8396d8cb could not be found.

  the debug trace is:
  ipdb> n
  > /opt/stack/nova/nova/virt/libvirt/driver.py(1006)cleanup()
     1005 block_device_mapping = driver.block_device_info_get_mapping(
  -> 1006 block_device_info)
     1007 for vol in block_device_mapping:

  ipdb> n
  > /opt/stack/nova/nova/virt/libvirt/driver.py(1007)cleanup()
     1006 block_device_info)
  -> 1007 for vol in block_device_mapping:
     1008 connection_info = vol['connection_info']

  ipdb> n
  > /opt/stack/nova/nova/virt/libvirt/driver.py(1041)cleanup()
     1040 
  -> 1041 if destroy_disks:
     1042 self._delete_instance_files(instance)

  ipdb> n
  > /opt/stack/nova/nova/virt/libvirt/driver.py(1042)cleanup()
     1041 if destroy_disks:
  -> 1042 self._delete_instance_files(instance)
     1043 

  ipdb> s
  --Call--
  > /opt/stack/nova/nova/virt/libvirt/driver.py(4950)_delete_instance_files()
     4949 
  -> 4950 def _delete_instance_files(self, instance):
     4951 # NOTE(mikal): a shim to handle this file not using instance objects

  
  ipdb> n
  > /opt/stack/nova/nova/virt/libvirt/driver.py(4953)_delete_instance_files()
     4952 # everywhere. Remove this when that conversion happens.

  -> 4953 context = nova_context.get_admin_context()
     4954 inst_obj = instance_obj.Instance.get_by_uuid(context, instance['uuid'])

  ipdb> n
  > /opt/stack/nova/nova/virt/libvirt/driver.py(4954)_delete_instance_files()
     4953 context = nova_context.get_admin_context()
  -> 4954 inst_obj = instance_obj.Instance.get_by_uuid(context, instance['uuid'])
     4955 

  ipdb> n
  InstanceNotFound: Instance...found.',)
  > /opt/stack/nova/nova/virt/libvirt/driver.py(4954)_delete_instance_files()
     4953 context = nova_context.get_admin_context()
  -> 4954 inst_obj = instance_obj.Instance.get_by_uuid(context, instance['uuid'])
     4955 

  ipdb> n
  --Return--
  None
  > /opt/stack/nova/nova/virt/libvirt/driver.py(4954)_delete_instance_files()
     4953 context = nova_context.get_admin_context()
  -> 4954 inst_obj = instance_obj.Instance.get_by_uuid(context, instance['uuid'])
     4955 

  ipdb> n
  InstanceNotFound: Instance...found.',)
  > /opt/stack/nova/nova/virt/libvirt/driver.py(1042)cleanup()
     1041 if destroy_disks:
  -> 1042 self._delete_instance_files(instance)
     1043 

  ipdb> n
  --Return--
  None
  > /opt/stack/nova/nova/virt/libvirt/driver.py(1042)cleanup()
     1041 if destroy_disks:
  -> 1042 self._delete_instance_files(instance)
     1043 

  ipdb> n
  InstanceNotFound: Instance...found.',)
  > /opt/stack/nova/nova/virt/libvirt/driver.py(931)destroy()
      930 self.cleanup(context, instance, network_info, block_device_info,
  --> 931 destroy_disks)
      932 

  ipdb> n
  --Return--
  None
  > /opt/stack/nova/nova/virt/libvirt/driver.py(931)destroy()
      930 self.cleanup(context, instance, network_info, block_device_info,
  --> 931 destroy_disks)
      932 

  ipdb> n
  InstanceNotFound: Instance...found.',)
  > /opt/stack/nova/nova/compute/manager.py(1905)_shutdown_instance()
     1904 self.driver.destroy(context, instance, network_info,
  -> 1905 block_device_info)
     1906 except exception.InstancePowerOffFailure:

  ipdb> n
  > /opt/stack/nova/nova/compute/manager.py(1906)_shutdown_instance()
     1905 block_device_info)
  -> 1906 except exception.InstancePowerOffFailure:
     1907 # if the instance can't power off, don't release the ip

  
  ipdb> n
  > /opt/stack/nova/nova/compute/manager.py(1910)_shutdown_instance()
     1909 pass
  -> 1910 except Exception:
     1911 with excutils.save_and_reraise_exception():

  ipdb> n
  > /opt/stack/nova/nova/compute/manager.py(1911)_shutdown_instance()
     1910 except Exception:
  -> 1911 with excutils.save_and_reraise_exception():
     1912 # deallocate ip and fail without proceeding to

  
  ipdb> n
  > /opt/stack/nova/nova/compute/manager.py(1914)_shutdown_instance()
     1913 # volume api calls, preserving current behavior

  -> 1914 self._try_deallocate_network(context, instance,
     1915 requested_networks)

  ipdb> n
  > /opt/stack/nova/nova/compute/manager.py(1915)_shutdown_instance()
     1914 self._try_deallocate_network(context, instance,
  -> 1915 requested_networks)
     1916 

  ipdb> n
  2014-02-14 16:22:02.701 DEBUG nova.compute.manager [-] [instance: c32db267-21a0-41e7-9d50-931d8396d8cb] Deallocating network for instance from (pid=19192) _deallocate_network /opt/stack/nova/nova/compute/manager.py:1531
  2014-02-14 16:22:02.704 DEBUG oslo.messaging._drivers.amqpdriver [-] MSG_ID is e529a4edb22b480cb0641a62718e9b04 from (pid=19192) _send /opt/stack/oslo.messaging/oslo/messaging/_drivers/amqpdriver.py:358
  2014-02-14 16:22:02.705 DEBUG oslo.messaging._drivers.amqp [-] UNIQUE_ID is aed682f94730441aaa14e43a37c86227. from (pid=19192) _add_unique_id /opt/stack/oslo.messaging/oslo/messaging/_drivers/amqp.py:333
  2014-02-14 16:22:02.718 WARNING nova.openstack.common.loopingcall [-] task run outlasted interval by 11.632922 sec
  InstanceNotFound: Instance...found.',)
  > /opt/stack/nova/nova/compute/manager.py(1915)_shutdown_instance()
     1914 self._try_deallocate_network(context, instance,
  -> 1915 requested_networks)
     1916 

  ipdb> n
  --Return--
  None
  > /opt/stack/nova/nova/compute/manager.py(1915)_shutdown_instance()
     1914 self._try_deallocate_network(context, instance,
  -> 1915 requested_networks)
     1916 

  ipdb> l
     1910 except Exception:
     1911 with excutils.save_and_reraise_exception():
     1912 # deallocate ip and fail without proceeding to

     1913 # volume api calls, preserving current behavior

     1914 self._try_deallocate_network(context, instance,
  -> 1915 requested_networks)
     1916 
     1917 self._try_deallocate_network(context, instance, requested_networks)
     1918 
     1919 for bdm in vol_bdms:
     1920 try:

  ipdb> n
  InstanceNotFound: Instance...found.',)
  > /opt/stack/nova/nova/compute/manager.py(5225)_cleanup_running_deleted_instances()
     5224 self._shutdown_instance(context, instance, bdms,
  -> 5225 notify=False)
     5226 self._cleanup_volumes(context, instance['uuid'], bdms)

  ipdb> n
  > /opt/stack/nova/nova/compute/manager.py(5227)_cleanup_running_deleted_instances()
     5226 self._cleanup_volumes(context, instance['uuid'], bdms)
  -> 5227 except Exception as e:
     5228 LOG.warning(_("Periodic cleanup failed to delete "

  ipdb> n
  > /opt/stack/nova/nova/compute/manager.py(5228)_cleanup_running_deleted_instances()
     5227 except Exception as e:
  -> 5228 LOG.warning(_("Periodic cleanup failed to delete "
     5229 "instance: %s"),

  ipdb> n
  > /opt/stack/nova/nova/compute/manager.py(5230)_cleanup_running_deleted_instances()
     5229 "instance: %s"),
  -> 5230 unicode(e), instance=instance)
     5231 else:

  ipdb> n
  2014-02-14 16:22:25.917 WARNING nova.compute.manager [-] [instance: c32db267-21a0-41e7-9d50-931d8396d8cb] Periodic cleanup failed to delete instance: Instance c32db267-21a0-41e7-9d50-931d8396d8cb could not be found.

To manage notifications about this bug go to:
https://bugs.launchpad.net/nova/+bug/1280140/+subscriptions


Follow ups

References