yahoo-eng-team team mailing list archive

Thread
Date
[Bug 1830747] [NEW] Error 500 trying to migrate an instance after wrong request_spec

To: yahoo-eng-team@xxxxxxxxxxxxxxxxxxx
From: Thomas Goirand <thomas@xxxxxxxxxx>
Date: Tue, 28 May 2019 14:51:25 -0000
Reply-to: Bug 1830747 <1830747@xxxxxxxxxxxxxxxxxx>
Sender: bounces@xxxxxxxxxxxxx
Public bug reported:

We've started an instance last Wednesday, and the compute where it ran
failed (maybe hardware issue?). Since the networking looked wrong (ie:
missing network interfaces), I tried to migrate the instance.

According to Matt, it looked like the request_spec entry for the
instance is wrong:

<mriedem> my guess is something like this happened: 1. create server in a group, 2. cold migrate the server which fails on host A and does a reschedule to host B which maybe also fails (would be good to know if previous cold migration attempts failed with reschedules), 3. try to cold migrate again which fails with the instance_group.uuid thing
<mriedem> the reschedule might be the key b/c like i said conductor has to rebuild a request spec and i think that's probably where we're doing a partial build of the request spec but missing the group uuid

Here's what I had in my novaapidb:

{
  "nova_object.name": "RequestSpec",
  "nova_object.version": "1.11",
  "nova_object.data": {
    "ignore_hosts": null,
    "requested_destination": null,
    "instance_uuid": "2098b550-c749-460a-a44e-5932535993a9",
    "num_instances": 1,
    "image": {
      "nova_object.name": "ImageMeta",
      "nova_object.version": "1.8",
      "nova_object.data": {
        "min_disk": 40,
        "disk_format": "raw",
        "min_ram": 0,
        "container_format": "bare",
        "properties": {
          "nova_object.name": "ImageMetaProps",
          "nova_object.version": "1.20",
          "nova_object.data": {},
          "nova_object.namespace": "nova"
        }
      },
      "nova_object.namespace": "nova",
      "nova_object.changes": [
        "properties",
        "min_ram",
        "container_format",
        "disk_format",
        "min_disk"
      ]
    },
    "availability_zone": "AZ3",
    "flavor": {
      "nova_object.name": "Flavor",
      "nova_object.version": "1.2",
      "nova_object.data": {
        "id": 28,
        "name": "cpu2-ram6-disk40",
        "is_public": true,
        "rxtx_factor": 1,
        "deleted_at": null,
        "root_gb": 40,
        "vcpus": 2,
        "memory_mb": 6144,
        "disabled": false,
        "extra_specs": {},
        "updated_at": null,
        "flavorid": "e29f3ee9-3f07-46d2-b2e2-efa4950edc95",
        "deleted": false,
        "swap": 0,
        "description": null,
        "created_at": "2019-02-07T07:48:21Z",
        "vcpu_weight": 0,
        "ephemeral_gb": 0
      },
      "nova_object.namespace": "nova"
    },
    "force_hosts": null,
    "retry": null,
    "instance_group": {
      "nova_object.name": "InstanceGroup",
      "nova_object.version": "1.11",
      "nova_object.data": {
        "members": null,
        "hosts": null,
        "policy": "anti-affinity"
      },
      "nova_object.namespace": "nova",
      "nova_object.changes": [
        "policy",
        "members",
        "hosts"
      ]
    },
    "scheduler_hints": {
      "group": [
        "295c99ea-2db6-469a-877f-454a3903a8d8"
      ]
    },
    "limits": {
      "nova_object.name": "SchedulerLimits",
      "nova_object.version": "1.0",
      "nova_object.data": {
        "disk_gb": null,
        "numa_topology": null,
        "memory_mb": null,
        "vcpu": null
      },
      "nova_object.namespace": "nova",
      "nova_object.changes": [
        "disk_gb",
        "vcpu",
        "memory_mb",
        "numa_topology"
      ]
    },
    "force_nodes": null,
    "project_id": "1bf4dbb3d2c746658f462bf8e59ec6be",
    "user_id": "255cca4584c24b16a684e3e8322b436b",
    "numa_topology": null,
    "is_bfv": false,
    "pci_requests": {
      "nova_object.name": "InstancePCIRequests",
      "nova_object.version": "1.1",
      "nova_object.data": {
        "instance_uuid": "2098b550-c749-460a-a44e-5932535993a9",
        "requests": []
      },
      "nova_object.namespace": "nova"
    }
  },
  "nova_object.namespace": "nova",
  "nova_object.changes": [
    "ignore_hosts",
    "requested_destination",
    "num_instances",
    "image",
    "availability_zone",
    "instance_uuid",
    "flavor",
    "scheduler_hints",
    "pci_requests",
    "instance_group",
    "limits",
    "project_id",
    "user_id",
    "numa_topology",
    "is_bfv",
    "retry"
  ]
}

** Affects: nova
     Importance: Undecided
         Status: New

-- 
You received this bug notification because you are a member of Yahoo!
Engineering Team, which is subscribed to OpenStack Compute (nova).
https://bugs.launchpad.net/bugs/1830747

Title:
  Error 500 trying to migrate an instance after wrong request_spec

Status in OpenStack Compute (nova):
  New

Bug description:
  We've started an instance last Wednesday, and the compute where it ran
  failed (maybe hardware issue?). Since the networking looked wrong (ie:
  missing network interfaces), I tried to migrate the instance.

  According to Matt, it looked like the request_spec entry for the
  instance is wrong:

  <mriedem> my guess is something like this happened: 1. create server in a group, 2. cold migrate the server which fails on host A and does a reschedule to host B which maybe also fails (would be good to know if previous cold migration attempts failed with reschedules), 3. try to cold migrate again which fails with the instance_group.uuid thing
  <mriedem> the reschedule might be the key b/c like i said conductor has to rebuild a request spec and i think that's probably where we're doing a partial build of the request spec but missing the group uuid

  Here's what I had in my novaapidb:

  {
    "nova_object.name": "RequestSpec",
    "nova_object.version": "1.11",
    "nova_object.data": {
      "ignore_hosts": null,
      "requested_destination": null,
      "instance_uuid": "2098b550-c749-460a-a44e-5932535993a9",
      "num_instances": 1,
      "image": {
        "nova_object.name": "ImageMeta",
        "nova_object.version": "1.8",
        "nova_object.data": {
          "min_disk": 40,
          "disk_format": "raw",
          "min_ram": 0,
          "container_format": "bare",
          "properties": {
            "nova_object.name": "ImageMetaProps",
            "nova_object.version": "1.20",
            "nova_object.data": {},
            "nova_object.namespace": "nova"
          }
        },
        "nova_object.namespace": "nova",
        "nova_object.changes": [
          "properties",
          "min_ram",
          "container_format",
          "disk_format",
          "min_disk"
        ]
      },
      "availability_zone": "AZ3",
      "flavor": {
        "nova_object.name": "Flavor",
        "nova_object.version": "1.2",
        "nova_object.data": {
          "id": 28,
          "name": "cpu2-ram6-disk40",
          "is_public": true,
          "rxtx_factor": 1,
          "deleted_at": null,
          "root_gb": 40,
          "vcpus": 2,
          "memory_mb": 6144,
          "disabled": false,
          "extra_specs": {},
          "updated_at": null,
          "flavorid": "e29f3ee9-3f07-46d2-b2e2-efa4950edc95",
          "deleted": false,
          "swap": 0,
          "description": null,
          "created_at": "2019-02-07T07:48:21Z",
          "vcpu_weight": 0,
          "ephemeral_gb": 0
        },
        "nova_object.namespace": "nova"
      },
      "force_hosts": null,
      "retry": null,
      "instance_group": {
        "nova_object.name": "InstanceGroup",
        "nova_object.version": "1.11",
        "nova_object.data": {
          "members": null,
          "hosts": null,
          "policy": "anti-affinity"
        },
        "nova_object.namespace": "nova",
        "nova_object.changes": [
          "policy",
          "members",
          "hosts"
        ]
      },
      "scheduler_hints": {
        "group": [
          "295c99ea-2db6-469a-877f-454a3903a8d8"
        ]
      },
      "limits": {
        "nova_object.name": "SchedulerLimits",
        "nova_object.version": "1.0",
        "nova_object.data": {
          "disk_gb": null,
          "numa_topology": null,
          "memory_mb": null,
          "vcpu": null
        },
        "nova_object.namespace": "nova",
        "nova_object.changes": [
          "disk_gb",
          "vcpu",
          "memory_mb",
          "numa_topology"
        ]
      },
      "force_nodes": null,
      "project_id": "1bf4dbb3d2c746658f462bf8e59ec6be",
      "user_id": "255cca4584c24b16a684e3e8322b436b",
      "numa_topology": null,
      "is_bfv": false,
      "pci_requests": {
        "nova_object.name": "InstancePCIRequests",
        "nova_object.version": "1.1",
        "nova_object.data": {
          "instance_uuid": "2098b550-c749-460a-a44e-5932535993a9",
          "requests": []
        },
        "nova_object.namespace": "nova"
      }
    },
    "nova_object.namespace": "nova",
    "nova_object.changes": [
      "ignore_hosts",
      "requested_destination",
      "num_instances",
      "image",
      "availability_zone",
      "instance_uuid",
      "flavor",
      "scheduler_hints",
      "pci_requests",
      "instance_group",
      "limits",
      "project_id",
      "user_id",
      "numa_topology",
      "is_bfv",
      "retry"
    ]
  }

To manage notifications about this bug go to:
https://bugs.launchpad.net/nova/+bug/1830747/+subscriptions
Follow ups

[Bug 1830747] Fix included in openstack/nova pike-eol
From: OpenStack Infra, 2022-08-01
[Bug 1830747] Re: Error 500 trying to migrate an instance after wrong request_spec
From: OpenStack Infra, 2019-06-03
[Bug 1830747] Re: Error 500 trying to migrate an instance after wrong request_spec
From: Matt Riedemann, 2019-05-28