@@ -433,21 +433,6 @@ static void veth_set_multicast_list(struct net_device *dev)
433
433
{
434
434
}
435
435
436
- static struct sk_buff * veth_build_skb (void * head , int headroom , int len ,
437
- int buflen )
438
- {
439
- struct sk_buff * skb ;
440
-
441
- skb = build_skb (head , buflen );
442
- if (!skb )
443
- return NULL ;
444
-
445
- skb_reserve (skb , headroom );
446
- skb_put (skb , len );
447
-
448
- return skb ;
449
- }
450
-
451
436
static int veth_select_rxq (struct net_device * dev )
452
437
{
453
438
return smp_processor_id () % dev -> real_num_rx_queues ;
@@ -695,72 +680,143 @@ static void veth_xdp_rcv_bulk_skb(struct veth_rq *rq, void **frames,
695
680
}
696
681
}
697
682
698
- static struct sk_buff * veth_xdp_rcv_skb (struct veth_rq * rq ,
699
- struct sk_buff * skb ,
700
- struct veth_xdp_tx_bq * bq ,
701
- struct veth_stats * stats )
683
+ static void veth_xdp_get (struct xdp_buff * xdp )
702
684
{
703
- u32 pktlen , headroom , act , metalen , frame_sz ;
704
- void * orig_data , * orig_data_end ;
705
- struct bpf_prog * xdp_prog ;
706
- int mac_len , delta , off ;
707
- struct xdp_buff xdp ;
685
+ struct skb_shared_info * sinfo = xdp_get_shared_info_from_buff (xdp );
686
+ int i ;
708
687
709
- skb_prepare_for_gro (skb );
688
+ get_page (virt_to_page (xdp -> data ));
689
+ if (likely (!xdp_buff_has_frags (xdp )))
690
+ return ;
710
691
711
- rcu_read_lock ();
712
- xdp_prog = rcu_dereference (rq -> xdp_prog );
713
- if (unlikely (!xdp_prog )) {
714
- rcu_read_unlock ();
715
- goto out ;
716
- }
692
+ for (i = 0 ; i < sinfo -> nr_frags ; i ++ )
693
+ __skb_frag_ref (& sinfo -> frags [i ]);
694
+ }
717
695
718
- mac_len = skb -> data - skb_mac_header (skb );
719
- pktlen = skb -> len + mac_len ;
720
- headroom = skb_headroom (skb ) - mac_len ;
696
+ static int veth_convert_xdp_buff_from_skb (struct veth_rq * rq ,
697
+ struct xdp_buff * xdp ,
698
+ struct sk_buff * * pskb )
699
+ {
700
+ struct sk_buff * skb = * pskb ;
701
+ u32 frame_sz ;
721
702
722
703
if (skb_shared (skb ) || skb_head_is_locked (skb ) ||
723
- skb_is_nonlinear (skb ) || headroom < XDP_PACKET_HEADROOM ) {
704
+ skb_shinfo (skb )-> nr_frags ) {
705
+ u32 size , len , max_head_size , off ;
724
706
struct sk_buff * nskb ;
725
- int size , head_off ;
726
- void * head , * start ;
727
707
struct page * page ;
708
+ int i , head_off ;
728
709
729
- size = SKB_DATA_ALIGN (VETH_XDP_HEADROOM + pktlen ) +
730
- SKB_DATA_ALIGN (sizeof (struct skb_shared_info ));
731
- if (size > PAGE_SIZE )
710
+ /* We need a private copy of the skb and data buffers since
711
+ * the ebpf program can modify it. We segment the original skb
712
+ * into order-0 pages without linearize it.
713
+ *
714
+ * Make sure we have enough space for linear and paged area
715
+ */
716
+ max_head_size = SKB_WITH_OVERHEAD (PAGE_SIZE -
717
+ VETH_XDP_HEADROOM );
718
+ if (skb -> len > PAGE_SIZE * MAX_SKB_FRAGS + max_head_size )
732
719
goto drop ;
733
720
721
+ /* Allocate skb head */
734
722
page = alloc_page (GFP_ATOMIC | __GFP_NOWARN );
735
723
if (!page )
736
724
goto drop ;
737
725
738
- head = page_address (page );
739
- start = head + VETH_XDP_HEADROOM ;
740
- if (skb_copy_bits (skb , - mac_len , start , pktlen )) {
741
- page_frag_free (head );
726
+ nskb = build_skb (page_address (page ), PAGE_SIZE );
727
+ if (!nskb ) {
728
+ put_page (page );
742
729
goto drop ;
743
730
}
744
731
745
- nskb = veth_build_skb ( head , VETH_XDP_HEADROOM + mac_len ,
746
- skb -> len , PAGE_SIZE );
747
- if (! nskb ) {
748
- page_frag_free ( head );
732
+ skb_reserve ( nskb , VETH_XDP_HEADROOM );
733
+ size = min_t ( u32 , skb -> len , max_head_size );
734
+ if (skb_copy_bits ( skb , 0 , nskb -> data , size ) ) {
735
+ consume_skb ( nskb );
749
736
goto drop ;
750
737
}
738
+ skb_put (nskb , size );
751
739
752
740
skb_copy_header (nskb , skb );
753
741
head_off = skb_headroom (nskb ) - skb_headroom (skb );
754
742
skb_headers_offset_update (nskb , head_off );
743
+
744
+ /* Allocate paged area of new skb */
745
+ off = size ;
746
+ len = skb -> len - off ;
747
+
748
+ for (i = 0 ; i < MAX_SKB_FRAGS && off < skb -> len ; i ++ ) {
749
+ page = alloc_page (GFP_ATOMIC | __GFP_NOWARN );
750
+ if (!page ) {
751
+ consume_skb (nskb );
752
+ goto drop ;
753
+ }
754
+
755
+ size = min_t (u32 , len , PAGE_SIZE );
756
+ skb_add_rx_frag (nskb , i , page , 0 , size , PAGE_SIZE );
757
+ if (skb_copy_bits (skb , off , page_address (page ),
758
+ size )) {
759
+ consume_skb (nskb );
760
+ goto drop ;
761
+ }
762
+
763
+ len -= size ;
764
+ off += size ;
765
+ }
766
+
755
767
consume_skb (skb );
756
768
skb = nskb ;
769
+ } else if (skb_headroom (skb ) < XDP_PACKET_HEADROOM &&
770
+ pskb_expand_head (skb , VETH_XDP_HEADROOM , 0 , GFP_ATOMIC )) {
771
+ goto drop ;
757
772
}
758
773
759
774
/* SKB "head" area always have tailroom for skb_shared_info */
760
775
frame_sz = skb_end_pointer (skb ) - skb -> head ;
761
776
frame_sz += SKB_DATA_ALIGN (sizeof (struct skb_shared_info ));
762
- xdp_init_buff (& xdp , frame_sz , & rq -> xdp_rxq );
763
- xdp_prepare_buff (& xdp , skb -> head , skb -> mac_header , pktlen , true);
777
+ xdp_init_buff (xdp , frame_sz , & rq -> xdp_rxq );
778
+ xdp_prepare_buff (xdp , skb -> head , skb_headroom (skb ),
779
+ skb_headlen (skb ), true);
780
+
781
+ if (skb_is_nonlinear (skb )) {
782
+ skb_shinfo (skb )-> xdp_frags_size = skb -> data_len ;
783
+ xdp_buff_set_frags_flag (xdp );
784
+ } else {
785
+ xdp_buff_clear_frags_flag (xdp );
786
+ }
787
+ * pskb = skb ;
788
+
789
+ return 0 ;
790
+ drop :
791
+ consume_skb (skb );
792
+ * pskb = NULL ;
793
+
794
+ return - ENOMEM ;
795
+ }
796
+
797
+ static struct sk_buff * veth_xdp_rcv_skb (struct veth_rq * rq ,
798
+ struct sk_buff * skb ,
799
+ struct veth_xdp_tx_bq * bq ,
800
+ struct veth_stats * stats )
801
+ {
802
+ void * orig_data , * orig_data_end ;
803
+ struct bpf_prog * xdp_prog ;
804
+ struct xdp_buff xdp ;
805
+ u32 act , metalen ;
806
+ int off ;
807
+
808
+ skb_prepare_for_gro (skb );
809
+
810
+ rcu_read_lock ();
811
+ xdp_prog = rcu_dereference (rq -> xdp_prog );
812
+ if (unlikely (!xdp_prog )) {
813
+ rcu_read_unlock ();
814
+ goto out ;
815
+ }
816
+
817
+ __skb_push (skb , skb -> data - skb_mac_header (skb ));
818
+ if (veth_convert_xdp_buff_from_skb (rq , & xdp , & skb ))
819
+ goto drop ;
764
820
765
821
orig_data = xdp .data ;
766
822
orig_data_end = xdp .data_end ;
@@ -771,7 +827,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
771
827
case XDP_PASS :
772
828
break ;
773
829
case XDP_TX :
774
- get_page ( virt_to_page ( xdp . data ) );
830
+ veth_xdp_get ( & xdp );
775
831
consume_skb (skb );
776
832
xdp .rxq -> mem = rq -> xdp_mem ;
777
833
if (unlikely (veth_xdp_tx (rq , & xdp , bq ) < 0 )) {
@@ -783,7 +839,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
783
839
rcu_read_unlock ();
784
840
goto xdp_xmit ;
785
841
case XDP_REDIRECT :
786
- get_page ( virt_to_page ( xdp . data ) );
842
+ veth_xdp_get ( & xdp );
787
843
consume_skb (skb );
788
844
xdp .rxq -> mem = rq -> xdp_mem ;
789
845
if (xdp_do_redirect (rq -> dev , & xdp , xdp_prog )) {
@@ -806,18 +862,24 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
806
862
rcu_read_unlock ();
807
863
808
864
/* check if bpf_xdp_adjust_head was used */
809
- delta = orig_data - xdp .data ;
810
- off = mac_len + delta ;
865
+ off = orig_data - xdp .data ;
811
866
if (off > 0 )
812
867
__skb_push (skb , off );
813
868
else if (off < 0 )
814
869
__skb_pull (skb , - off );
815
- skb -> mac_header -= delta ;
870
+
871
+ skb_reset_mac_header (skb );
816
872
817
873
/* check if bpf_xdp_adjust_tail was used */
818
874
off = xdp .data_end - orig_data_end ;
819
875
if (off != 0 )
820
876
__skb_put (skb , off ); /* positive on grow, negative on shrink */
877
+
878
+ if (xdp_buff_has_frags (& xdp ))
879
+ skb -> data_len = skb_shinfo (skb )-> xdp_frags_size ;
880
+ else
881
+ skb -> data_len = 0 ;
882
+
821
883
skb -> protocol = eth_type_trans (skb , rq -> dev );
822
884
823
885
metalen = xdp .data - xdp .data_meta ;
@@ -833,7 +895,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
833
895
return NULL ;
834
896
err_xdp :
835
897
rcu_read_unlock ();
836
- page_frag_free ( xdp . data );
898
+ xdp_return_buff ( & xdp );
837
899
xdp_xmit :
838
900
return NULL ;
839
901
}
0 commit comments