yahoo-eng-team team mailing list archive

Thread
Date
[Bug 1357453] [NEW] Resource tracker should create compute node record in constructor

To: yahoo-eng-team@xxxxxxxxxxxxxxxxxxx
From: Jay Pipes <1357453@xxxxxxxxxxxxxxxxxx>
Date: Fri, 15 Aug 2014 16:31:09 -0000
Reply-to: Bug 1357453 <1357453@xxxxxxxxxxxxxxxxxx>
Sender: bounces@xxxxxxxxxxxxx
Public bug reported:

Currently, the resource tracker lazily-creates the compute node record
in the database (via a call to the conductor's compute_node_create() API
call) during calls to update_available_resource():

```
    @utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
    def update_available_resource(self, context):
        """Override in-memory calculations of compute node resource usage based
        on data audited from the hypervisor layer.

        Add in resource claims in progress to account for operations that have
        declared a need for resources, but not necessarily retrieved them from
        the hypervisor layer yet.
        """
        LOG.audit(_("Auditing locally available compute resources"))
        resources = self.driver.get_available_resource(self.nodename)

        if not resources:
            # The virt driver does not support this function
            LOG.audit(_("Virt driver does not support "
                 "'get_available_resource'  Compute tracking is disabled."))
            self.compute_node = None
            return
        resources['host_ip'] = CONF.my_ip

        self._verify_resources(resources)

        self._report_hypervisor_resource_view(resources)

        if 'pci_passthrough_devices' in resources:
            if not self.pci_tracker:
                self.pci_tracker = pci_manager.PciDevTracker()
            self.pci_tracker.set_hvdevs(jsonutils.loads(resources.pop(
                'pci_passthrough_devices')))

        # Grab all instances assigned to this node:
        instances = objects.InstanceList.get_by_host_and_node(
            context, self.host, self.nodename)

        # Now calculate usage based on instance utilization:
        self._update_usage_from_instances(resources, instances)

        # Grab all in-progress migrations:
        capi = self.conductor_api
        migrations = capi.migration_get_in_progress_by_host_and_node(context,
                self.host, self.nodename)

        self._update_usage_from_migrations(context, resources,
migrations)

        # Detect and account for orphaned instances that may exist on the
        # hypervisor, but are not in the DB:
        orphans = self._find_orphaned_instances()
        self._update_usage_from_orphans(resources, orphans)

        # NOTE(yjiang5): Because pci device tracker status is not cleared in
        # this periodic task, and also because the resource tracker is not
        # notified when instances are deleted, we need remove all usages
        # from deleted instances.
        if self.pci_tracker:
            self.pci_tracker.clean_usage(instances, migrations, orphans)
            resources['pci_stats'] = jsonutils.dumps(self.pci_tracker.stats)
        else:
            resources['pci_stats'] = jsonutils.dumps([])

        self._report_final_resource_view(resources)

        metrics = self._get_host_metrics(context, self.nodename)
        resources['metrics'] = jsonutils.dumps(metrics)
        self._sync_compute_node(context, resources)

    def _sync_compute_node(self, context, resources):
        """Create or update the compute node DB record."""
        if not self.compute_node:
            # we need a copy of the ComputeNode record:
            service = self._get_service(context)
            if not service:
                # no service record, disable resource
                return

            compute_node_refs = service['compute_node']
            if compute_node_refs:
                for cn in compute_node_refs:
                    if cn.get('hypervisor_hostname') == self.nodename:
                        self.compute_node = cn
                        if self.pci_tracker:
                            self.pci_tracker.set_compute_node_id(cn['id'])
                        break

        if not self.compute_node:
            # Need to create the ComputeNode record:
            resources['service_id'] = service['id']
            self._create(context, resources)
            if self.pci_tracker:
                self.pci_tracker.set_compute_node_id(self.compute_node['id'])
            LOG.info(_('Compute_service record created for %(host)s:%(node)s')
                    % {'host': self.host, 'node': self.nodename})

        else:
            # just update the record:
            self._update(context, resources)
            LOG.info(_('Compute_service record updated for %(host)s:%(node)s')
                    % {'host': self.host, 'node': self.nodename})

    def _write_ext_resources(self, resources):
        resources['stats'] = {}
        resources['stats'].update(self.stats)
        self.ext_resources_handler.write_resources(resources)

    def _create(self, context, values):
        """Create the compute node in the DB."""
        # initialize load stats from existing instances:
        self._write_ext_resources(values)
        # NOTE(pmurray): the stats field is stored as a json string. The
        # json conversion will be done automatically by the ComputeNode object
        # so this can be removed when using ComputeNode.
        values['stats'] = jsonutils.dumps(values['stats'])

        self.compute_node = self.conductor_api.compute_node_create(context,
                                                                   values)

    def _get_service(self, context):
        try:
            return self.conductor_api.service_get_by_compute_host(context,
                                                                  self.host)
        except exception.NotFound:
            LOG.warn(_("No service record for host %s"), self.host)
```

There's really no reason to keep checking for whether self.compute_node
exists every time update_available_resource() is called. The compute
node record should simply be initialized if not existing in the
constructor of the resource tracker.

** Affects: nova
     Importance: Wishlist
         Status: Triaged


** Tags: low-hanging-fruit resource-tracker

-- 
You received this bug notification because you are a member of Yahoo!
Engineering Team, which is subscribed to OpenStack Compute (nova).
https://bugs.launchpad.net/bugs/1357453

Title:
  Resource tracker should create compute node record in constructor

Status in OpenStack Compute (Nova):
  Triaged

Bug description:
  Currently, the resource tracker lazily-creates the compute node record
  in the database (via a call to the conductor's compute_node_create()
  API call) during calls to update_available_resource():

  ```
      @utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
      def update_available_resource(self, context):
          """Override in-memory calculations of compute node resource usage based
          on data audited from the hypervisor layer.

          Add in resource claims in progress to account for operations that have
          declared a need for resources, but not necessarily retrieved them from
          the hypervisor layer yet.
          """
          LOG.audit(_("Auditing locally available compute resources"))
          resources = self.driver.get_available_resource(self.nodename)

          if not resources:
              # The virt driver does not support this function
              LOG.audit(_("Virt driver does not support "
                   "'get_available_resource'  Compute tracking is disabled."))
              self.compute_node = None
              return
          resources['host_ip'] = CONF.my_ip

          self._verify_resources(resources)

          self._report_hypervisor_resource_view(resources)

          if 'pci_passthrough_devices' in resources:
              if not self.pci_tracker:
                  self.pci_tracker = pci_manager.PciDevTracker()
              self.pci_tracker.set_hvdevs(jsonutils.loads(resources.pop(
                  'pci_passthrough_devices')))

          # Grab all instances assigned to this node:
          instances = objects.InstanceList.get_by_host_and_node(
              context, self.host, self.nodename)

          # Now calculate usage based on instance utilization:
          self._update_usage_from_instances(resources, instances)

          # Grab all in-progress migrations:
          capi = self.conductor_api
          migrations = capi.migration_get_in_progress_by_host_and_node(context,
                  self.host, self.nodename)

          self._update_usage_from_migrations(context, resources,
  migrations)

          # Detect and account for orphaned instances that may exist on the
          # hypervisor, but are not in the DB:
          orphans = self._find_orphaned_instances()
          self._update_usage_from_orphans(resources, orphans)

          # NOTE(yjiang5): Because pci device tracker status is not cleared in
          # this periodic task, and also because the resource tracker is not
          # notified when instances are deleted, we need remove all usages
          # from deleted instances.
          if self.pci_tracker:
              self.pci_tracker.clean_usage(instances, migrations, orphans)
              resources['pci_stats'] = jsonutils.dumps(self.pci_tracker.stats)
          else:
              resources['pci_stats'] = jsonutils.dumps([])

          self._report_final_resource_view(resources)

          metrics = self._get_host_metrics(context, self.nodename)
          resources['metrics'] = jsonutils.dumps(metrics)
          self._sync_compute_node(context, resources)

      def _sync_compute_node(self, context, resources):
          """Create or update the compute node DB record."""
          if not self.compute_node:
              # we need a copy of the ComputeNode record:
              service = self._get_service(context)
              if not service:
                  # no service record, disable resource
                  return

              compute_node_refs = service['compute_node']
              if compute_node_refs:
                  for cn in compute_node_refs:
                      if cn.get('hypervisor_hostname') == self.nodename:
                          self.compute_node = cn
                          if self.pci_tracker:
                              self.pci_tracker.set_compute_node_id(cn['id'])
                          break

          if not self.compute_node:
              # Need to create the ComputeNode record:
              resources['service_id'] = service['id']
              self._create(context, resources)
              if self.pci_tracker:
                  self.pci_tracker.set_compute_node_id(self.compute_node['id'])
              LOG.info(_('Compute_service record created for %(host)s:%(node)s')
                      % {'host': self.host, 'node': self.nodename})

          else:
              # just update the record:
              self._update(context, resources)
              LOG.info(_('Compute_service record updated for %(host)s:%(node)s')
                      % {'host': self.host, 'node': self.nodename})

      def _write_ext_resources(self, resources):
          resources['stats'] = {}
          resources['stats'].update(self.stats)
          self.ext_resources_handler.write_resources(resources)

      def _create(self, context, values):
          """Create the compute node in the DB."""
          # initialize load stats from existing instances:
          self._write_ext_resources(values)
          # NOTE(pmurray): the stats field is stored as a json string. The
          # json conversion will be done automatically by the ComputeNode object
          # so this can be removed when using ComputeNode.
          values['stats'] = jsonutils.dumps(values['stats'])

          self.compute_node = self.conductor_api.compute_node_create(context,
                                                                     values)

      def _get_service(self, context):
          try:
              return self.conductor_api.service_get_by_compute_host(context,
                                                                    self.host)
          except exception.NotFound:
              LOG.warn(_("No service record for host %s"), self.host)
  ```

  There's really no reason to keep checking for whether
  self.compute_node exists every time update_available_resource() is
  called. The compute node record should simply be initialized if not
  existing in the constructor of the resource tracker.

To manage notifications about this bug go to:
https://bugs.launchpad.net/nova/+bug/1357453/+subscriptions
Follow ups

[Bug 1357453] Re: Resource tracker should create compute node record in constructor
From: Sylvain Bauza, 2016-04-18
[Bug 1357453] Re: Resource tracker should create compute node record in constructor
From: Sylvain Bauza, 2014-08-26
[Bug 1357453] Re: Resource tracker should create compute node record in constructor
From: Sylvain Bauza, 2014-08-26
[Bug 1357453] [NEW] Resource tracker should create compute node record in constructor
From: Jay Pipes, 2014-08-15
References

[Bug 1357453] [NEW] Resource tracker should create compute node record in constructor
From: Jay Pipes, 2014-08-15