yahoo-eng-team team mailing list archive
-
yahoo-eng-team team
-
Mailing list archive
-
Message #78639
[Bug 1830747] [NEW] Error 500 trying to migrate an instance after wrong request_spec
Public bug reported:
We've started an instance last Wednesday, and the compute where it ran
failed (maybe hardware issue?). Since the networking looked wrong (ie:
missing network interfaces), I tried to migrate the instance.
According to Matt, it looked like the request_spec entry for the
instance is wrong:
<mriedem> my guess is something like this happened: 1. create server in a group, 2. cold migrate the server which fails on host A and does a reschedule to host B which maybe also fails (would be good to know if previous cold migration attempts failed with reschedules), 3. try to cold migrate again which fails with the instance_group.uuid thing
<mriedem> the reschedule might be the key b/c like i said conductor has to rebuild a request spec and i think that's probably where we're doing a partial build of the request spec but missing the group uuid
Here's what I had in my novaapidb:
{
"nova_object.name": "RequestSpec",
"nova_object.version": "1.11",
"nova_object.data": {
"ignore_hosts": null,
"requested_destination": null,
"instance_uuid": "2098b550-c749-460a-a44e-5932535993a9",
"num_instances": 1,
"image": {
"nova_object.name": "ImageMeta",
"nova_object.version": "1.8",
"nova_object.data": {
"min_disk": 40,
"disk_format": "raw",
"min_ram": 0,
"container_format": "bare",
"properties": {
"nova_object.name": "ImageMetaProps",
"nova_object.version": "1.20",
"nova_object.data": {},
"nova_object.namespace": "nova"
}
},
"nova_object.namespace": "nova",
"nova_object.changes": [
"properties",
"min_ram",
"container_format",
"disk_format",
"min_disk"
]
},
"availability_zone": "AZ3",
"flavor": {
"nova_object.name": "Flavor",
"nova_object.version": "1.2",
"nova_object.data": {
"id": 28,
"name": "cpu2-ram6-disk40",
"is_public": true,
"rxtx_factor": 1,
"deleted_at": null,
"root_gb": 40,
"vcpus": 2,
"memory_mb": 6144,
"disabled": false,
"extra_specs": {},
"updated_at": null,
"flavorid": "e29f3ee9-3f07-46d2-b2e2-efa4950edc95",
"deleted": false,
"swap": 0,
"description": null,
"created_at": "2019-02-07T07:48:21Z",
"vcpu_weight": 0,
"ephemeral_gb": 0
},
"nova_object.namespace": "nova"
},
"force_hosts": null,
"retry": null,
"instance_group": {
"nova_object.name": "InstanceGroup",
"nova_object.version": "1.11",
"nova_object.data": {
"members": null,
"hosts": null,
"policy": "anti-affinity"
},
"nova_object.namespace": "nova",
"nova_object.changes": [
"policy",
"members",
"hosts"
]
},
"scheduler_hints": {
"group": [
"295c99ea-2db6-469a-877f-454a3903a8d8"
]
},
"limits": {
"nova_object.name": "SchedulerLimits",
"nova_object.version": "1.0",
"nova_object.data": {
"disk_gb": null,
"numa_topology": null,
"memory_mb": null,
"vcpu": null
},
"nova_object.namespace": "nova",
"nova_object.changes": [
"disk_gb",
"vcpu",
"memory_mb",
"numa_topology"
]
},
"force_nodes": null,
"project_id": "1bf4dbb3d2c746658f462bf8e59ec6be",
"user_id": "255cca4584c24b16a684e3e8322b436b",
"numa_topology": null,
"is_bfv": false,
"pci_requests": {
"nova_object.name": "InstancePCIRequests",
"nova_object.version": "1.1",
"nova_object.data": {
"instance_uuid": "2098b550-c749-460a-a44e-5932535993a9",
"requests": []
},
"nova_object.namespace": "nova"
}
},
"nova_object.namespace": "nova",
"nova_object.changes": [
"ignore_hosts",
"requested_destination",
"num_instances",
"image",
"availability_zone",
"instance_uuid",
"flavor",
"scheduler_hints",
"pci_requests",
"instance_group",
"limits",
"project_id",
"user_id",
"numa_topology",
"is_bfv",
"retry"
]
}
** Affects: nova
Importance: Undecided
Status: New
--
You received this bug notification because you are a member of Yahoo!
Engineering Team, which is subscribed to OpenStack Compute (nova).
https://bugs.launchpad.net/bugs/1830747
Title:
Error 500 trying to migrate an instance after wrong request_spec
Status in OpenStack Compute (nova):
New
Bug description:
We've started an instance last Wednesday, and the compute where it ran
failed (maybe hardware issue?). Since the networking looked wrong (ie:
missing network interfaces), I tried to migrate the instance.
According to Matt, it looked like the request_spec entry for the
instance is wrong:
<mriedem> my guess is something like this happened: 1. create server in a group, 2. cold migrate the server which fails on host A and does a reschedule to host B which maybe also fails (would be good to know if previous cold migration attempts failed with reschedules), 3. try to cold migrate again which fails with the instance_group.uuid thing
<mriedem> the reschedule might be the key b/c like i said conductor has to rebuild a request spec and i think that's probably where we're doing a partial build of the request spec but missing the group uuid
Here's what I had in my novaapidb:
{
"nova_object.name": "RequestSpec",
"nova_object.version": "1.11",
"nova_object.data": {
"ignore_hosts": null,
"requested_destination": null,
"instance_uuid": "2098b550-c749-460a-a44e-5932535993a9",
"num_instances": 1,
"image": {
"nova_object.name": "ImageMeta",
"nova_object.version": "1.8",
"nova_object.data": {
"min_disk": 40,
"disk_format": "raw",
"min_ram": 0,
"container_format": "bare",
"properties": {
"nova_object.name": "ImageMetaProps",
"nova_object.version": "1.20",
"nova_object.data": {},
"nova_object.namespace": "nova"
}
},
"nova_object.namespace": "nova",
"nova_object.changes": [
"properties",
"min_ram",
"container_format",
"disk_format",
"min_disk"
]
},
"availability_zone": "AZ3",
"flavor": {
"nova_object.name": "Flavor",
"nova_object.version": "1.2",
"nova_object.data": {
"id": 28,
"name": "cpu2-ram6-disk40",
"is_public": true,
"rxtx_factor": 1,
"deleted_at": null,
"root_gb": 40,
"vcpus": 2,
"memory_mb": 6144,
"disabled": false,
"extra_specs": {},
"updated_at": null,
"flavorid": "e29f3ee9-3f07-46d2-b2e2-efa4950edc95",
"deleted": false,
"swap": 0,
"description": null,
"created_at": "2019-02-07T07:48:21Z",
"vcpu_weight": 0,
"ephemeral_gb": 0
},
"nova_object.namespace": "nova"
},
"force_hosts": null,
"retry": null,
"instance_group": {
"nova_object.name": "InstanceGroup",
"nova_object.version": "1.11",
"nova_object.data": {
"members": null,
"hosts": null,
"policy": "anti-affinity"
},
"nova_object.namespace": "nova",
"nova_object.changes": [
"policy",
"members",
"hosts"
]
},
"scheduler_hints": {
"group": [
"295c99ea-2db6-469a-877f-454a3903a8d8"
]
},
"limits": {
"nova_object.name": "SchedulerLimits",
"nova_object.version": "1.0",
"nova_object.data": {
"disk_gb": null,
"numa_topology": null,
"memory_mb": null,
"vcpu": null
},
"nova_object.namespace": "nova",
"nova_object.changes": [
"disk_gb",
"vcpu",
"memory_mb",
"numa_topology"
]
},
"force_nodes": null,
"project_id": "1bf4dbb3d2c746658f462bf8e59ec6be",
"user_id": "255cca4584c24b16a684e3e8322b436b",
"numa_topology": null,
"is_bfv": false,
"pci_requests": {
"nova_object.name": "InstancePCIRequests",
"nova_object.version": "1.1",
"nova_object.data": {
"instance_uuid": "2098b550-c749-460a-a44e-5932535993a9",
"requests": []
},
"nova_object.namespace": "nova"
}
},
"nova_object.namespace": "nova",
"nova_object.changes": [
"ignore_hosts",
"requested_destination",
"num_instances",
"image",
"availability_zone",
"instance_uuid",
"flavor",
"scheduler_hints",
"pci_requests",
"instance_group",
"limits",
"project_id",
"user_id",
"numa_topology",
"is_bfv",
"retry"
]
}
To manage notifications about this bug go to:
https://bugs.launchpad.net/nova/+bug/1830747/+subscriptions
Follow ups