← Back to team overview

yahoo-eng-team team mailing list archive

[Bug 1740795] [NEW] nova lacks debug output for selected page size when hw:mem_page_size is specified

 

Public bug reported:

nova lacks debug output for selected page size when hw:mem_page_size is
specified

Administrators currently are left completely in the dark as to which
page size is selected by nova and why. This output is especially useful
in the case of hw:mem_page_size=any.

/usr/lib/python2.7/site-packages/nova/virt/hardware.py
~~~
  37 MEMPAGES_SMALL = -1
  38 MEMPAGES_LARGE = -2
  39 MEMPAGES_ANY = -3
(...)
 933 def _get_flavor_image_meta(key, flavor, image_meta):
 934     """Extract both flavor- and image-based variants of metadata."""
 935     flavor_key = ':'.join(['hw', key])
 936     image_key = '_'.join(['hw', key])
 937 
 938     flavor_policy = flavor.get('extra_specs', {}).get(flavor_key)
 939     image_policy = image_meta.properties.get(image_key)
 940 
 941     return flavor_policy, image_policy
(...)
 944 def _numa_get_pagesize_constraints(flavor, image_meta):
 945     """Return the requested memory page size
 946 
 947     :param flavor: a Flavor object to read extra specs from
 948     :param image_meta: nova.objects.ImageMeta object instance
 949 
 950     :raises: MemoryPageSizeInvalid if flavor extra spec or image
 951              metadata provides an invalid hugepage value
 952     :raises: MemoryPageSizeForbidden if flavor extra spec request
 953              conflicts with image metadata request
 954     :returns: a page size requested or MEMPAGES_*
 955     """
 956 
 957     def check_and_return_pages_size(request):
 958         if request == "any":
 959             return MEMPAGES_ANY
 960         elif request == "large":
 961             return MEMPAGES_LARGE
 962         elif request == "small":
 963             return MEMPAGES_SMALL
 964         else:
 965             try:
 966                 request = int(request)
 967             except ValueError:
 968                 try:
 969                     request = strutils.string_to_bytes(
 970                         request, return_int=True) / units.Ki
 971                 except ValueError:
 972                     request = 0
 973 
 974         if request <= 0:
 975             raise exception.MemoryPageSizeInvalid(pagesize=request)
 976 
 977         return request
 978 
 979     flavor_request, image_request = _get_flavor_image_meta(
 980         'mem_page_size', flavor, image_meta)
 981 
 982     if not flavor_request and image_request:
 983         raise exception.MemoryPageSizeForbidden(
 984             pagesize=image_request,
 985             against="<empty>")
 986 
 987     if not flavor_request:
 988         # Nothing was specified for hugepages,
 989         # let's the default process running.
 990         return None
 991 
 992     pagesize = check_and_return_pages_size(flavor_request)
 993     if image_request and (pagesize in (MEMPAGES_ANY, MEMPAGES_LARGE)):
 994         return check_and_return_pages_size(image_request)
 995     elif image_request:
 996         raise exception.MemoryPageSizeForbidden(
 997             pagesize=image_request,
 998             against=flavor_request)
 999 
1000     return pagesize
~~~

If the flavor is set to any, and the image properties are not set, then this will return:
MEMPAGES_ANY

In the same file, there is the following code:
~~~
 620 def _numa_cell_supports_pagesize_request(host_cell, inst_cell):
 621     """Determine whether the cell can accept the request.
 622 
 623     :param host_cell: host cell to fit the instance cell onto
 624     :param inst_cell: instance cell we want to fit
 625 
 626     :raises: exception.MemoryPageSizeNotSupported if custom page
 627              size not supported in host cell
 628     :returns: the page size able to be handled by host_cell
 629     """
 630     avail_pagesize = [page.size_kb for page in host_cell.mempages]
 631     avail_pagesize.sort(reverse=True)
 632 
 633     def verify_pagesizes(host_cell, inst_cell, avail_pagesize):
 634         inst_cell_mem = inst_cell.memory * units.Ki
 635         for pagesize in avail_pagesize:
 636             if host_cell.can_fit_hugepages(pagesize, inst_cell_mem):
 637                 return pagesize
 638 
 639     if inst_cell.pagesize == MEMPAGES_SMALL:
 640         return verify_pagesizes(host_cell, inst_cell, avail_pagesize[-1:])
 641     elif inst_cell.pagesize == MEMPAGES_LARGE:
 642         return verify_pagesizes(host_cell, inst_cell, avail_pagesize[:-1])
 643     elif inst_cell.pagesize == MEMPAGES_ANY:
 644         return verify_pagesizes(host_cell, inst_cell, avail_pagesize)
 645     else:
 646         return verify_pagesizes(host_cell, inst_cell, [inst_cell.pagesize])
~~~

Let's quickly verify the implications of the pagesize.sort and the avail_pagesize[-1:] and [:-1]:
~~~
>>> arr = [0,1,2,3,4,5]
>>> arr.sort(reverse=True)
>>> arr
[5, 4, 3, 2, 1, 0]
>>> arr[-1:]
[0]
>>> arr[:-1]
[5, 4, 3, 2, 1]
~~~

So MEMPAGES_SMALL will offer only the smallest page size. MEMPAGES_LARGE
will offer any but the smallest page size. MEMPAGES_ANY will offer all
page sizes.

This in then fed into:
~~~
 633     def verify_pagesizes(host_cell, inst_cell, avail_pagesize):
 634         inst_cell_mem = inst_cell.memory * units.Ki
 635         for pagesize in avail_pagesize:
 636             if host_cell.can_fit_hugepages(pagesize, inst_cell_mem):
 637                 return pagesize
~~~

The page sizes are ordered from largest to smallest. verify_pagesizes
walks the pagesizes in descending order. If
`host_cell.can_fit_hugepages(pagesize, inst_cell_mem)`, then we return
the pagesize. In your case, this means that we walk over all large
pages, and ignore them due to the fact that can_fit_hugepages returns
`false` when comparing pagesize against inst_cell_mem.

>From can_fit_hugepages
~~~
146     def can_fit_hugepages(self, pagesize, memory):
147         """Returns whether memory can fit into hugepages size
148 
149         :param pagesize: a page size in KibB
150         :param memory: a memory size asked to fit in KiB
151 
152         :returns: whether memory can fit in hugepages
153         :raises: MemoryPageSizeNotSupported if page size not supported
154         """
155         for pages in self.mempages:
156             if pages.size_kb == pagesize:
157                 return (memory <= pages.free_kb and
158                         (memory % pages.size_kb) == 0)
159         raise exception.MemoryPageSizeNotSupported(pagesize=pagesize)
~~~

So: return `true` if requested instance memory <= free_kb for the page
category and the requested memory can be divided by page size_kb without
a remainder (modulo operation returns 0).

Finally, here is the piece of code that calls all of the above:
~~~
 919     pagesize = None
 920     if instance_cell.pagesize:
 921         pagesize = _numa_cell_supports_pagesize_request(
 922             host_cell, instance_cell)
 923         if not pagesize:
 924             LOG.debug('Host does not support requested memory pagesize. '
 925                       'Requested: %d kB', instance_cell.pagesize)
 926             return
 927 
 928     instance_cell.id = host_cell.id
 929     instance_cell.pagesize = pagesize
 930     return instance_cell
~~~
Observe how instance_cell.pagesize is replaced from whatever is specified to a numeric value! Unfortunately, the code lacks a `LOG.debug` which would make troubleshooting easier.

What would help us gain some insight is this little modification:
/usr/lib/python2.7/site-packages/nova/virt/hardware.py
~~~
 919     pagesize = None
 920     if instance_cell.pagesize:
 921         pagesize = _numa_cell_supports_pagesize_request(
 922             host_cell, instance_cell)
 923         if not pagesize:
 924             LOG.debug('Host does not support requested memory pagesize. '
 925                       'Requested: %d kB', instance_cell.pagesize)
 926             return
 927 
+ 928    LOG.debug('Selected memory pagesize: %(selected_mem_pagesize)d kB. '
+ 929               'Requested memory pagesize: %(requested_mem_pagesize)d '
+ 930               '(small = -1, large = -2, any = -3)',
+ 931               {'selected_mem_pagesize' : pagesize,
+ 932                'requested_mem_pagesize': instance_cell.pagesize})
 933              
 934     instance_cell.id = host_cell.id
 935     instance_cell.pagesize = pagesize
 936     return instance_cell
~~~

The additional log output would then yield:
~~~
[root@overcloud-compute-0 ~]# tail -f /var/log/nova/nova-compute.log | grep Selec
2018-01-01 22:02:34.018 433399 DEBUG nova.virt.hardware [req-fc00ae35-7ec0-427f-b1e5-3d8f904292f7 ae9fb97fb2c24155be8850edebb1814e 75f18c3dbade48bfb6f6040091cd71ba - - -] Selected memory pagesize: 2048 kB. Requested memory pagesize: -2 (small = -1, large = -2, any = -3) _numa_fit_instance_cell /usr/lib/python2.7/site-packages/nova/virt/hardware.py:944
2018-01-01 22:03:12.160 433399 DEBUG nova.virt.hardware [req-1e0be24d-e444-495c-9817-79642615e9f7 ae9fb97fb2c24155be8850edebb1814e 75f18c3dbade48bfb6f6040091cd71ba - - -] Selected memory pagesize: 2048 kB. Requested memory pagesize: 2048 (small = -1, large = -2, any = -3) _numa_fit_instance_cell /usr/lib/python2.7/site-packages/nova/virt/hardware.py:944
2018-01-01 22:03:37.910 433399 DEBUG nova.virt.hardware [req-4c63a1c0-76c7-4b53-87e0-a94fa6daa344 ae9fb97fb2c24155be8850edebb1814e 75f18c3dbade48bfb6f6040091cd71ba - - -] Selected memory pagesize: 2048 kB. Requested memory pagesize: -3 (small = -1, large = -2, any = -3) _numa_fit_instance_cell /usr/lib/python2.7/site-packages/nova/virt/hardware.py:944
~~~

** Affects: nova
     Importance: Undecided
     Assignee: Andreas Karis (akaris)
         Status: In Progress

** Changed in: nova
     Assignee: (unassigned) => Andreas Karis (akaris)

** Changed in: nova
       Status: New => In Progress

-- 
You received this bug notification because you are a member of Yahoo!
Engineering Team, which is subscribed to OpenStack Compute (nova).
https://bugs.launchpad.net/bugs/1740795

Title:
  nova lacks debug output for selected page size when hw:mem_page_size
  is specified

Status in OpenStack Compute (nova):
  In Progress

Bug description:
  nova lacks debug output for selected page size when hw:mem_page_size
  is specified

  Administrators currently are left completely in the dark as to which
  page size is selected by nova and why. This output is especially
  useful in the case of hw:mem_page_size=any.

  /usr/lib/python2.7/site-packages/nova/virt/hardware.py
  ~~~
    37 MEMPAGES_SMALL = -1
    38 MEMPAGES_LARGE = -2
    39 MEMPAGES_ANY = -3
  (...)
   933 def _get_flavor_image_meta(key, flavor, image_meta):
   934     """Extract both flavor- and image-based variants of metadata."""
   935     flavor_key = ':'.join(['hw', key])
   936     image_key = '_'.join(['hw', key])
   937 
   938     flavor_policy = flavor.get('extra_specs', {}).get(flavor_key)
   939     image_policy = image_meta.properties.get(image_key)
   940 
   941     return flavor_policy, image_policy
  (...)
   944 def _numa_get_pagesize_constraints(flavor, image_meta):
   945     """Return the requested memory page size
   946 
   947     :param flavor: a Flavor object to read extra specs from
   948     :param image_meta: nova.objects.ImageMeta object instance
   949 
   950     :raises: MemoryPageSizeInvalid if flavor extra spec or image
   951              metadata provides an invalid hugepage value
   952     :raises: MemoryPageSizeForbidden if flavor extra spec request
   953              conflicts with image metadata request
   954     :returns: a page size requested or MEMPAGES_*
   955     """
   956 
   957     def check_and_return_pages_size(request):
   958         if request == "any":
   959             return MEMPAGES_ANY
   960         elif request == "large":
   961             return MEMPAGES_LARGE
   962         elif request == "small":
   963             return MEMPAGES_SMALL
   964         else:
   965             try:
   966                 request = int(request)
   967             except ValueError:
   968                 try:
   969                     request = strutils.string_to_bytes(
   970                         request, return_int=True) / units.Ki
   971                 except ValueError:
   972                     request = 0
   973 
   974         if request <= 0:
   975             raise exception.MemoryPageSizeInvalid(pagesize=request)
   976 
   977         return request
   978 
   979     flavor_request, image_request = _get_flavor_image_meta(
   980         'mem_page_size', flavor, image_meta)
   981 
   982     if not flavor_request and image_request:
   983         raise exception.MemoryPageSizeForbidden(
   984             pagesize=image_request,
   985             against="<empty>")
   986 
   987     if not flavor_request:
   988         # Nothing was specified for hugepages,
   989         # let's the default process running.
   990         return None
   991 
   992     pagesize = check_and_return_pages_size(flavor_request)
   993     if image_request and (pagesize in (MEMPAGES_ANY, MEMPAGES_LARGE)):
   994         return check_and_return_pages_size(image_request)
   995     elif image_request:
   996         raise exception.MemoryPageSizeForbidden(
   997             pagesize=image_request,
   998             against=flavor_request)
   999 
  1000     return pagesize
  ~~~

  If the flavor is set to any, and the image properties are not set, then this will return:
  MEMPAGES_ANY

  In the same file, there is the following code:
  ~~~
   620 def _numa_cell_supports_pagesize_request(host_cell, inst_cell):
   621     """Determine whether the cell can accept the request.
   622 
   623     :param host_cell: host cell to fit the instance cell onto
   624     :param inst_cell: instance cell we want to fit
   625 
   626     :raises: exception.MemoryPageSizeNotSupported if custom page
   627              size not supported in host cell
   628     :returns: the page size able to be handled by host_cell
   629     """
   630     avail_pagesize = [page.size_kb for page in host_cell.mempages]
   631     avail_pagesize.sort(reverse=True)
   632 
   633     def verify_pagesizes(host_cell, inst_cell, avail_pagesize):
   634         inst_cell_mem = inst_cell.memory * units.Ki
   635         for pagesize in avail_pagesize:
   636             if host_cell.can_fit_hugepages(pagesize, inst_cell_mem):
   637                 return pagesize
   638 
   639     if inst_cell.pagesize == MEMPAGES_SMALL:
   640         return verify_pagesizes(host_cell, inst_cell, avail_pagesize[-1:])
   641     elif inst_cell.pagesize == MEMPAGES_LARGE:
   642         return verify_pagesizes(host_cell, inst_cell, avail_pagesize[:-1])
   643     elif inst_cell.pagesize == MEMPAGES_ANY:
   644         return verify_pagesizes(host_cell, inst_cell, avail_pagesize)
   645     else:
   646         return verify_pagesizes(host_cell, inst_cell, [inst_cell.pagesize])
  ~~~

  Let's quickly verify the implications of the pagesize.sort and the avail_pagesize[-1:] and [:-1]:
  ~~~
  >>> arr = [0,1,2,3,4,5]
  >>> arr.sort(reverse=True)
  >>> arr
  [5, 4, 3, 2, 1, 0]
  >>> arr[-1:]
  [0]
  >>> arr[:-1]
  [5, 4, 3, 2, 1]
  ~~~

  So MEMPAGES_SMALL will offer only the smallest page size.
  MEMPAGES_LARGE will offer any but the smallest page size. MEMPAGES_ANY
  will offer all page sizes.

  This in then fed into:
  ~~~
   633     def verify_pagesizes(host_cell, inst_cell, avail_pagesize):
   634         inst_cell_mem = inst_cell.memory * units.Ki
   635         for pagesize in avail_pagesize:
   636             if host_cell.can_fit_hugepages(pagesize, inst_cell_mem):
   637                 return pagesize
  ~~~

  The page sizes are ordered from largest to smallest. verify_pagesizes
  walks the pagesizes in descending order. If
  `host_cell.can_fit_hugepages(pagesize, inst_cell_mem)`, then we return
  the pagesize. In your case, this means that we walk over all large
  pages, and ignore them due to the fact that can_fit_hugepages returns
  `false` when comparing pagesize against inst_cell_mem.

  From can_fit_hugepages
  ~~~
  146     def can_fit_hugepages(self, pagesize, memory):
  147         """Returns whether memory can fit into hugepages size
  148 
  149         :param pagesize: a page size in KibB
  150         :param memory: a memory size asked to fit in KiB
  151 
  152         :returns: whether memory can fit in hugepages
  153         :raises: MemoryPageSizeNotSupported if page size not supported
  154         """
  155         for pages in self.mempages:
  156             if pages.size_kb == pagesize:
  157                 return (memory <= pages.free_kb and
  158                         (memory % pages.size_kb) == 0)
  159         raise exception.MemoryPageSizeNotSupported(pagesize=pagesize)
  ~~~

  So: return `true` if requested instance memory <= free_kb for the page
  category and the requested memory can be divided by page size_kb
  without a remainder (modulo operation returns 0).

  Finally, here is the piece of code that calls all of the above:
  ~~~
   919     pagesize = None
   920     if instance_cell.pagesize:
   921         pagesize = _numa_cell_supports_pagesize_request(
   922             host_cell, instance_cell)
   923         if not pagesize:
   924             LOG.debug('Host does not support requested memory pagesize. '
   925                       'Requested: %d kB', instance_cell.pagesize)
   926             return
   927 
   928     instance_cell.id = host_cell.id
   929     instance_cell.pagesize = pagesize
   930     return instance_cell
  ~~~
  Observe how instance_cell.pagesize is replaced from whatever is specified to a numeric value! Unfortunately, the code lacks a `LOG.debug` which would make troubleshooting easier.

  What would help us gain some insight is this little modification:
  /usr/lib/python2.7/site-packages/nova/virt/hardware.py
  ~~~
   919     pagesize = None
   920     if instance_cell.pagesize:
   921         pagesize = _numa_cell_supports_pagesize_request(
   922             host_cell, instance_cell)
   923         if not pagesize:
   924             LOG.debug('Host does not support requested memory pagesize. '
   925                       'Requested: %d kB', instance_cell.pagesize)
   926             return
   927 
  + 928    LOG.debug('Selected memory pagesize: %(selected_mem_pagesize)d kB. '
  + 929               'Requested memory pagesize: %(requested_mem_pagesize)d '
  + 930               '(small = -1, large = -2, any = -3)',
  + 931               {'selected_mem_pagesize' : pagesize,
  + 932                'requested_mem_pagesize': instance_cell.pagesize})
   933              
   934     instance_cell.id = host_cell.id
   935     instance_cell.pagesize = pagesize
   936     return instance_cell
  ~~~

  The additional log output would then yield:
  ~~~
  [root@overcloud-compute-0 ~]# tail -f /var/log/nova/nova-compute.log | grep Selec
  2018-01-01 22:02:34.018 433399 DEBUG nova.virt.hardware [req-fc00ae35-7ec0-427f-b1e5-3d8f904292f7 ae9fb97fb2c24155be8850edebb1814e 75f18c3dbade48bfb6f6040091cd71ba - - -] Selected memory pagesize: 2048 kB. Requested memory pagesize: -2 (small = -1, large = -2, any = -3) _numa_fit_instance_cell /usr/lib/python2.7/site-packages/nova/virt/hardware.py:944
  2018-01-01 22:03:12.160 433399 DEBUG nova.virt.hardware [req-1e0be24d-e444-495c-9817-79642615e9f7 ae9fb97fb2c24155be8850edebb1814e 75f18c3dbade48bfb6f6040091cd71ba - - -] Selected memory pagesize: 2048 kB. Requested memory pagesize: 2048 (small = -1, large = -2, any = -3) _numa_fit_instance_cell /usr/lib/python2.7/site-packages/nova/virt/hardware.py:944
  2018-01-01 22:03:37.910 433399 DEBUG nova.virt.hardware [req-4c63a1c0-76c7-4b53-87e0-a94fa6daa344 ae9fb97fb2c24155be8850edebb1814e 75f18c3dbade48bfb6f6040091cd71ba - - -] Selected memory pagesize: 2048 kB. Requested memory pagesize: -3 (small = -1, large = -2, any = -3) _numa_fit_instance_cell /usr/lib/python2.7/site-packages/nova/virt/hardware.py:944
  ~~~

To manage notifications about this bug go to:
https://bugs.launchpad.net/nova/+bug/1740795/+subscriptions


Follow ups