Skip to content

Topic/sentinel proc name conversion #1345

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions ompi/communicator/comm.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2012-2015 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Mellanox Technologies. All rights reserved.
Expand Down Expand Up @@ -699,7 +699,7 @@ static int ompi_comm_split_type_get_part (ompi_group_t *group, int *results, int
int include = false;

if (ompi_proc_is_sentinel (proc)) {
opal_process_name_t proc_name = ompi_proc_sentinel_to_name ((intptr_t) proc);
opal_process_name_t proc_name = ompi_proc_sentinel_to_name ((uintptr_t) proc);

u16ptr = &locality;

Expand Down
4 changes: 2 additions & 2 deletions ompi/dpm/dpm.c
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
for (i=0; i < size; i++) {
opal_process_name_t proc_name;
if (ompi_proc_is_sentinel (proc_list[i])) {
proc_name = ompi_proc_sentinel_to_name ((intptr_t) proc_list[i]);
proc_name = ompi_proc_sentinel_to_name ((uintptr_t) proc_list[i]);
} else {
proc_name = proc_list[i]->super.proc_name;
}
Expand Down Expand Up @@ -532,7 +532,7 @@ static int construct_peers(ompi_group_t *group, opal_list_t *peers)
return OMPI_ERR_NOT_FOUND;
}
if (ompi_proc_is_sentinel (proct)) {
proc_name = ompi_proc_sentinel_to_name ((intptr_t)proct);
proc_name = ompi_proc_sentinel_to_name ((uintptr_t)proct);
} else {
proc_name = proct->super.proc_name;
}
Expand Down
4 changes: 3 additions & 1 deletion ompi/group/group.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -352,7 +354,7 @@ static inline struct ompi_proc_t *ompi_group_dense_lookup (ompi_group_t *group,

/* replace sentinel value with an actual ompi_proc_t */
ompi_proc_t *real_proc =
(ompi_proc_t *) ompi_proc_for_name (ompi_proc_sentinel_to_name ((intptr_t) proc));
(ompi_proc_t *) ompi_proc_for_name (ompi_proc_sentinel_to_name ((uintptr_t) proc));

if (opal_atomic_cmpset_ptr (group->grp_proc_pointers + peer_id, proc, real_proc)) {
OBJ_RETAIN(real_proc);
Expand Down
4 changes: 3 additions & 1 deletion ompi/group/group_plist.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -61,7 +63,7 @@ static struct ompi_proc_t *ompi_group_dense_lookup_raw (ompi_group_t *group, con
{
if (OPAL_UNLIKELY(ompi_proc_is_sentinel (group->grp_proc_pointers[peer_id]))) {
ompi_proc_t *proc =
(ompi_proc_t *) ompi_proc_lookup (ompi_proc_sentinel_to_name ((intptr_t) group->grp_proc_pointers[peer_id]));
(ompi_proc_t *) ompi_proc_lookup (ompi_proc_sentinel_to_name ((uintptr_t) group->grp_proc_pointers[peer_id]));
if (NULL != proc) {
/* replace sentinel value with an actual ompi_proc_t */
group->grp_proc_pointers[peer_id] = proc;
Expand Down
4 changes: 3 additions & 1 deletion ompi/mca/pml/monitoring/pml_monitoring.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
* reserved.
* Copyright (c) 2013-2015 Inria. All rights reserved.
* Copyright (c) 2015 Bull SAS. All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -77,7 +79,7 @@ int mca_pml_monitoring_add_procs(struct ompi_proc_t **procs,
my_rank = i;
/* Extract the peer procname from the procs array */
if( ompi_proc_is_sentinel(procs[i]) ) {
tmp = ompi_proc_sentinel_to_name((intptr_t)procs[i]);
tmp = ompi_proc_sentinel_to_name((uintptr_t)procs[i]);
} else {
tmp = procs[i]->super.proc_name;
}
Expand Down
6 changes: 5 additions & 1 deletion ompi/mca/rte/orte/rte_orte.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* All rights reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$
Expand Down Expand Up @@ -59,6 +59,10 @@ typedef orte_ns_cmp_bitmask_t ompi_rte_cmp_bitmask_t;
#define OMPI_RTE_CMP_JOBID ORTE_NS_CMP_JOBID
#define OMPI_RTE_CMP_VPID ORTE_NS_CMP_VPID
#define OMPI_RTE_CMP_ALL ORTE_NS_CMP_ALL
#define OMPI_LOCAL_JOBID(jobid) ORTE_LOCAL_JOBID(jobid)
#define OMPI_JOB_FAMILY(jobid) ORTE_JOB_FAMILY(jobid)
#define OMPI_CONSTRUCT_JOBID(family,local) ORTE_CONSTRUCT_JOBID(family,local)

/* This is the DSS tag to serialize a proc name */
#define OMPI_NAME ORTE_NAME
#define OMPI_PROCESS_NAME_HTON ORTE_PROCESS_NAME_HTON
Expand Down
4 changes: 2 additions & 2 deletions ompi/proc/proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
* Copyright (c) 2014-2015 Research Organization for Information Science
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Mellanox Technologies. All rights reserved.
*
Expand Down Expand Up @@ -642,7 +642,7 @@ ompi_proc_pack(ompi_proc_t **proclist, int proclistsize,
ompi_proc_t *proc = proclist[i];

if (ompi_proc_is_sentinel (proc)) {
proc = ompi_proc_for_name_nolock (ompi_proc_sentinel_to_name ((intptr_t) proc));
proc = ompi_proc_for_name_nolock (ompi_proc_sentinel_to_name ((uintptr_t) proc));
}

/* send proc name */
Expand Down
63 changes: 56 additions & 7 deletions ompi/proc/proc.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
* Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* Copyright (c) 2015 Research Organization for Information Science
* Copyright (c) 2015-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
Expand Down Expand Up @@ -369,17 +369,66 @@ static inline bool ompi_proc_is_sentinel (ompi_proc_t *proc)
return (intptr_t) proc & 0x1;
}

static inline intptr_t ompi_proc_name_to_sentinel (opal_process_name_t name)
#if OPAL_SIZEOF_PROCESS_NAME_T == SIZEOF_VOID_P
/*
* we assume an ompi_proc_t is at least aligned on two bytes,
* so if the LSB of a pointer to an ompi_proc_t is 1, we have to handle
* this pointer as a sentinel instead of a pointer.
* a sentinel can be seen as an uint64_t with the following format :
* - bit 0 : 1
* - bits 1-15 : local jobid
* - bits 16-31 : job family
* - bits 32-63 : vpid
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW: that memory map isn't quite correct. I'm not sure how you are numbering bits, but assuming that they go from 0 as the LSB to 63 as the MSB, then the layout is:

0-31 : vpid
32-47:  local jobid
48-63:  job family

So you want to set your sentinel in bit 47, not bit 0. Am I missing something?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sentinel bit is the LSB, aka bit zero.
see my comment in the code above
an ompi_proc_t* can either be a genuine pointer, or a sentinel.
that would require 65 bits.
the trick is pointer are always aligned on two bytes, so the LSB of a pointer is always zero, and we can use this bit as the sentinel bit.
if it is one, it means this is not a pointer but a sentinel.
that still means we have to "pack" the opal_process_name_t into the remaining 63 bits, and we decided to drop the MSB of the local jobid.

note that cannot work "as is" 32 bits.

my best bet is to simply disable cutoff at runtime on 32 bits arch

an other option to keep memory usage low is to use the sentinel bit as an opal_process_name_t* instead of a "packed" opal_process_name_t, but I am not sure it is worth it.

I will likely use jobid and family as uint32_t as you mentionned, and fix the abstraction

makes more sense now ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fwiw, @hjelmn initially used the MSB as the sentinel bit, assuming the MSB of an ompi_proc_t address was always zero. this assumption was proven wrong on a linux/sparc Fujitsu cluster.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with using the MSB of the local jobid. What I'm confused by is that your documented bitmap is incorrect. The local jobid is in the middle of the object, not at the edge as you show.

Nathan mistakenly used the MSB of the entire structure instead of the local jobid. Your bitmap moves it to the LSB of the entire structure, which is the LSB of the vpid, not the local jobid.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am confused too ...
I guess the way we number bits differ

    unitptr_t tmp = (uintptr_t)ORTE_LOCAL_JOBID(name.jobid);
     sentinel |= ((tmp << 1) & 0xfffe);

where does the local jobid end into ?

  • bits 1:15 as I documented ?
  • bits 48:62 ?
    and btw, is this endian dependent ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rhc54 Ok, just making sure. I agree we need to update ompi_group_t. I have some ideas I can start implementing in the next month. Can target 2.1.x or 2.2.x for the changes.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hjelmn Thanks!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we use the LSB of the uint64_t, and shift the vpid to the correct place (so that we logically use the MSB of the jobid), I dont see why this will be an issue if we decide to make the orte_process_name_t a 32 bits value. Why are we trying to increase the opportunities for cache pollution by moving to a design that will require at least 2 extra indirections (the array of arrays solution).

There is a much simpler solution: dissociate the opal_process_name_t from the orte_process_name_t (in a mechanism similar to the lazy naming resolution for shared libraries). Make opal_process_name_t a simple int (32 to start with) that will be the index to an internal array that we alter every time the process known universe changes. This solution might be a little confusing as the index does not represent a global unique naming, but the ORTE name it points to does, but it has the benefit to let us (OPAL and OMPI) be totally independent on how ORTE wants to name it's processes.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I said, I'm not advocating any particular solution - I'm only pointing out that hinging the scheme on a particular size of the process name struct, or on the idea that we can arbitrarily throw away one of its bits without repercussions, is fragile. If you have a better scheme than the one I suggested, that's fine with me.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It has at least the advantage of making OPAL + OMPI totally independent on how the RTE names processes...

*/
static inline uintptr_t ompi_proc_name_to_sentinel (opal_process_name_t name)
{
uintptr_t tmp, sentinel = 0;
/* local jobid must fit in 15 bits */
assert(! (OMPI_LOCAL_JOBID(name.jobid) & 0x8000));
sentinel |= 0x1;
tmp = (uintptr_t)OMPI_LOCAL_JOBID(name.jobid);
sentinel |= ((tmp << 1) & 0xfffe);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been trying to understand this logic, and I'm not sure I believe that the "1" you want to set for the sentinel is winding up in the correct place. Isn't it supposed to be in the MSB position?

tmp = (uintptr_t)OMPI_JOB_FAMILY(name.jobid);
sentinel |= ((tmp << 16) & 0xffff0000);
tmp = (uintptr_t)name.vpid;
sentinel |= ((tmp << 32) & 0xffffffff00000000);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't this still assume that uintptr_t is 64-bits?

return sentinel;
}

static inline opal_process_name_t ompi_proc_sentinel_to_name (uintptr_t sentinel)
{
opal_process_name_t name;
uint32_t local, family;
uint32_t vpid;
assert(sentinel & 0x1);
local = (sentinel >> 1) & 0x7fff;
family = (sentinel >> 16) & 0xffff;
vpid = (sentinel >> 32) & 0xffffffff;
name.jobid = OMPI_CONSTRUCT_JOBID(family,local);
name.vpid = vpid;
return name;
}
#elif 4 == SIZEOF_VOID_P
/*
* currently, a sentinel is only made from the current jobid aka OMPI_PROC_MY_NAME->jobid
* so we only store the first 31 bits of the vpid
*/
static inline uintptr_t ompi_proc_name_to_sentinel (opal_process_name_t name)
{
return (*((intptr_t *) &name) << 1) | 0x1;
assert(OMPI_PROC_MY_NAME->jobid == name.jobid);
return (uintptr_t)((name.vpid <<1) | 0x1);
}

static inline opal_process_name_t ompi_proc_sentinel_to_name (intptr_t sentinel)
static inline opal_process_name_t ompi_proc_sentinel_to_name (uintptr_t sentinel)
{
sentinel >>= 1;
sentinel &= 0x7FFFFFFFFFFFFFFF;
return *((opal_process_name_t *) &sentinel);
opal_process_name_t name;
name.jobid = OMPI_PROC_MY_NAME->jobid;
name.vpid = sentinel >> 1;
return name;
}
#else
#error unsupported pointer size
#endif

END_C_DECLS

Expand Down
3 changes: 2 additions & 1 deletion opal/dss/dss_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
Expand Down Expand Up @@ -47,6 +47,7 @@ typedef struct {
opal_jobid_t jobid;
opal_vpid_t vpid;
} opal_process_name_t;
#define OPAL_SIZEOF_PROCESS_NAME_T 8

BEGIN_C_DECLS

Expand Down
3 changes: 3 additions & 0 deletions orte/util/name_fns.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ ORTE_DECLSPEC char *orte_pretty_print_timing(int64_t secs, int64_t usecs);
#define ORTE_CONSTRUCT_LOCAL_JOBID(local, job) \
( ((local) & 0xffff0000) | ((job) & 0x0000ffff) )

#define ORTE_CONSTRUCT_JOBID(family, local) \
ORTE_CONSTRUCT_LOCAL_JOBID(ORTE_CONSTRUCT_JOB_FAMILY(family), local)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this really work if family and local are just 16-bit values? My macros assumed they were 32-bit.

/* a macro for identifying that a proc is a daemon */
#define ORTE_JOBID_IS_DAEMON(n) \
!((n) & 0x0000ffff)
Expand Down