34
34
#include <net/sock.h>
35
35
#include <linux/in.h>
36
36
#include <linux/if_arp.h>
37
+ #include <linux/jhash.h>
37
38
#include "rds.h"
38
39
39
- /*
40
- * XXX this probably still needs more work.. no INADDR_ANY, and rbtrees aren't
41
- * particularly zippy.
42
- *
43
- * This is now called for every incoming frame so we arguably care much more
44
- * about it than we used to.
45
- */
46
- static DEFINE_RWLOCK ( rds_bind_lock );
47
- static struct rb_root rds_bind_tree = RB_ROOT ;
40
+ #define BIND_HASH_SIZE 1024
41
+ static struct hlist_head bind_hash_table [ BIND_HASH_SIZE ];
42
+ static DEFINE_SPINLOCK ( rds_bind_lock );
43
+
44
+ static struct hlist_head * hash_to_bucket ( __be32 addr , __be16 port )
45
+ {
46
+ return bind_hash_table + ( jhash_2words (( u32 ) addr , ( u32 ) port , 0 ) &
47
+ ( BIND_HASH_SIZE - 1 ) );
48
+ }
48
49
49
- static struct rds_sock * rds_bind_tree_walk (__be32 addr , __be16 port ,
50
- struct rds_sock * insert )
50
+ static struct rds_sock * rds_bind_lookup (__be32 addr , __be16 port ,
51
+ struct rds_sock * insert )
51
52
{
52
- struct rb_node * * p = & rds_bind_tree .rb_node ;
53
- struct rb_node * parent = NULL ;
54
53
struct rds_sock * rs ;
54
+ struct hlist_node * node ;
55
+ struct hlist_head * head = hash_to_bucket (addr , port );
55
56
u64 cmp ;
56
57
u64 needle = ((u64 )be32_to_cpu (addr ) << 32 ) | be16_to_cpu (port );
57
58
58
- while (* p ) {
59
- parent = * p ;
60
- rs = rb_entry (parent , struct rds_sock , rs_bound_node );
61
-
59
+ rcu_read_lock ();
60
+ hlist_for_each_entry_rcu (rs , node , head , rs_bound_node ) {
62
61
cmp = ((u64 )be32_to_cpu (rs -> rs_bound_addr ) << 32 ) |
63
62
be16_to_cpu (rs -> rs_bound_port );
64
63
65
- if (needle < cmp )
66
- p = & (* p )-> rb_left ;
67
- else if (needle > cmp )
68
- p = & (* p )-> rb_right ;
69
- else
64
+ if (cmp == needle ) {
65
+ rcu_read_unlock ();
70
66
return rs ;
67
+ }
71
68
}
69
+ rcu_read_unlock ();
72
70
73
71
if (insert ) {
74
- rb_link_node (& insert -> rs_bound_node , parent , p );
75
- rb_insert_color (& insert -> rs_bound_node , & rds_bind_tree );
72
+ /*
73
+ * make sure our addr and port are set before
74
+ * we are added to the list, other people
75
+ * in rcu will find us as soon as the
76
+ * hlist_add_head_rcu is done
77
+ */
78
+ insert -> rs_bound_addr = addr ;
79
+ insert -> rs_bound_port = port ;
80
+ rds_sock_addref (insert );
81
+
82
+ hlist_add_head_rcu (& insert -> rs_bound_node , head );
76
83
}
77
84
return NULL ;
78
85
}
@@ -86,15 +93,13 @@ static struct rds_sock *rds_bind_tree_walk(__be32 addr, __be16 port,
86
93
struct rds_sock * rds_find_bound (__be32 addr , __be16 port )
87
94
{
88
95
struct rds_sock * rs ;
89
- unsigned long flags ;
90
96
91
- read_lock_irqsave ( & rds_bind_lock , flags );
92
- rs = rds_bind_tree_walk ( addr , port , NULL );
97
+ rs = rds_bind_lookup ( addr , port , NULL );
98
+
93
99
if (rs && !sock_flag (rds_rs_to_sk (rs ), SOCK_DEAD ))
94
100
rds_sock_addref (rs );
95
101
else
96
102
rs = NULL ;
97
- read_unlock_irqrestore (& rds_bind_lock , flags );
98
103
99
104
rdsdebug ("returning rs %p for %pI4:%u\n" , rs , & addr ,
100
105
ntohs (port ));
@@ -116,28 +121,21 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
116
121
last = rover - 1 ;
117
122
}
118
123
119
- write_lock_irqsave (& rds_bind_lock , flags );
124
+ spin_lock_irqsave (& rds_bind_lock , flags );
120
125
121
126
do {
122
127
if (rover == 0 )
123
128
rover ++ ;
124
- if (!rds_bind_tree_walk (addr , cpu_to_be16 (rover ), rs )) {
125
- * port = cpu_to_be16 ( rover ) ;
129
+ if (!rds_bind_lookup (addr , cpu_to_be16 (rover ), rs )) {
130
+ * port = rs -> rs_bound_port ;
126
131
ret = 0 ;
132
+ rdsdebug ("rs %p binding to %pI4:%d\n" ,
133
+ rs , & addr , (int )ntohs (* port ));
127
134
break ;
128
135
}
129
136
} while (rover ++ != last );
130
137
131
- if (ret == 0 ) {
132
- rs -> rs_bound_addr = addr ;
133
- rs -> rs_bound_port = * port ;
134
- rds_sock_addref (rs );
135
-
136
- rdsdebug ("rs %p binding to %pI4:%d\n" ,
137
- rs , & addr , (int )ntohs (* port ));
138
- }
139
-
140
- write_unlock_irqrestore (& rds_bind_lock , flags );
138
+ spin_unlock_irqrestore (& rds_bind_lock , flags );
141
139
142
140
return ret ;
143
141
}
@@ -146,19 +144,19 @@ void rds_remove_bound(struct rds_sock *rs)
146
144
{
147
145
unsigned long flags ;
148
146
149
- write_lock_irqsave (& rds_bind_lock , flags );
147
+ spin_lock_irqsave (& rds_bind_lock , flags );
150
148
151
149
if (rs -> rs_bound_addr ) {
152
150
rdsdebug ("rs %p unbinding from %pI4:%d\n" ,
153
151
rs , & rs -> rs_bound_addr ,
154
152
ntohs (rs -> rs_bound_port ));
155
153
156
- rb_erase (& rs -> rs_bound_node , & rds_bind_tree );
154
+ hlist_del_init_rcu (& rs -> rs_bound_node );
157
155
rds_sock_put (rs );
158
156
rs -> rs_bound_addr = 0 ;
159
157
}
160
158
161
- write_unlock_irqrestore (& rds_bind_lock , flags );
159
+ spin_unlock_irqrestore (& rds_bind_lock , flags );
162
160
}
163
161
164
162
int rds_bind (struct socket * sock , struct sockaddr * uaddr , int addr_len )
@@ -198,5 +196,9 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
198
196
199
197
out :
200
198
release_sock (sk );
199
+
200
+ /* we might have called rds_remove_bound on error */
201
+ if (ret )
202
+ synchronize_rcu ();
201
203
return ret ;
202
204
}
0 commit comments