@@ -432,21 +432,6 @@ static void veth_set_multicast_list(struct net_device *dev)
432
432
{
433
433
}
434
434
435
- static struct sk_buff * veth_build_skb (void * head , int headroom , int len ,
436
- int buflen )
437
- {
438
- struct sk_buff * skb ;
439
-
440
- skb = build_skb (head , buflen );
441
- if (!skb )
442
- return NULL ;
443
-
444
- skb_reserve (skb , headroom );
445
- skb_put (skb , len );
446
-
447
- return skb ;
448
- }
449
-
450
435
static int veth_select_rxq (struct net_device * dev )
451
436
{
452
437
return smp_processor_id () % dev -> real_num_rx_queues ;
@@ -694,72 +679,143 @@ static void veth_xdp_rcv_bulk_skb(struct veth_rq *rq, void **frames,
694
679
}
695
680
}
696
681
697
- static struct sk_buff * veth_xdp_rcv_skb (struct veth_rq * rq ,
698
- struct sk_buff * skb ,
699
- struct veth_xdp_tx_bq * bq ,
700
- struct veth_stats * stats )
682
+ static void veth_xdp_get (struct xdp_buff * xdp )
701
683
{
702
- u32 pktlen , headroom , act , metalen , frame_sz ;
703
- void * orig_data , * orig_data_end ;
704
- struct bpf_prog * xdp_prog ;
705
- int mac_len , delta , off ;
706
- struct xdp_buff xdp ;
684
+ struct skb_shared_info * sinfo = xdp_get_shared_info_from_buff (xdp );
685
+ int i ;
707
686
708
- skb_prepare_for_gro (skb );
687
+ get_page (virt_to_page (xdp -> data ));
688
+ if (likely (!xdp_buff_has_frags (xdp )))
689
+ return ;
709
690
710
- rcu_read_lock ();
711
- xdp_prog = rcu_dereference (rq -> xdp_prog );
712
- if (unlikely (!xdp_prog )) {
713
- rcu_read_unlock ();
714
- goto out ;
715
- }
691
+ for (i = 0 ; i < sinfo -> nr_frags ; i ++ )
692
+ __skb_frag_ref (& sinfo -> frags [i ]);
693
+ }
716
694
717
- mac_len = skb -> data - skb_mac_header (skb );
718
- pktlen = skb -> len + mac_len ;
719
- headroom = skb_headroom (skb ) - mac_len ;
695
+ static int veth_convert_xdp_buff_from_skb (struct veth_rq * rq ,
696
+ struct xdp_buff * xdp ,
697
+ struct sk_buff * * pskb )
698
+ {
699
+ struct sk_buff * skb = * pskb ;
700
+ u32 frame_sz ;
720
701
721
702
if (skb_shared (skb ) || skb_head_is_locked (skb ) ||
722
- skb_is_nonlinear (skb ) || headroom < XDP_PACKET_HEADROOM ) {
703
+ skb_shinfo (skb )-> nr_frags ) {
704
+ u32 size , len , max_head_size , off ;
723
705
struct sk_buff * nskb ;
724
- int size , head_off ;
725
- void * head , * start ;
726
706
struct page * page ;
707
+ int i , head_off ;
727
708
728
- size = SKB_DATA_ALIGN (VETH_XDP_HEADROOM + pktlen ) +
729
- SKB_DATA_ALIGN (sizeof (struct skb_shared_info ));
730
- if (size > PAGE_SIZE )
709
+ /* We need a private copy of the skb and data buffers since
710
+ * the ebpf program can modify it. We segment the original skb
711
+ * into order-0 pages without linearize it.
712
+ *
713
+ * Make sure we have enough space for linear and paged area
714
+ */
715
+ max_head_size = SKB_WITH_OVERHEAD (PAGE_SIZE -
716
+ VETH_XDP_HEADROOM );
717
+ if (skb -> len > PAGE_SIZE * MAX_SKB_FRAGS + max_head_size )
731
718
goto drop ;
732
719
720
+ /* Allocate skb head */
733
721
page = alloc_page (GFP_ATOMIC | __GFP_NOWARN );
734
722
if (!page )
735
723
goto drop ;
736
724
737
- head = page_address (page );
738
- start = head + VETH_XDP_HEADROOM ;
739
- if (skb_copy_bits (skb , - mac_len , start , pktlen )) {
740
- page_frag_free (head );
725
+ nskb = build_skb (page_address (page ), PAGE_SIZE );
726
+ if (!nskb ) {
727
+ put_page (page );
741
728
goto drop ;
742
729
}
743
730
744
- nskb = veth_build_skb ( head , VETH_XDP_HEADROOM + mac_len ,
745
- skb -> len , PAGE_SIZE );
746
- if (! nskb ) {
747
- page_frag_free ( head );
731
+ skb_reserve ( nskb , VETH_XDP_HEADROOM );
732
+ size = min_t ( u32 , skb -> len , max_head_size );
733
+ if (skb_copy_bits ( skb , 0 , nskb -> data , size ) ) {
734
+ consume_skb ( nskb );
748
735
goto drop ;
749
736
}
737
+ skb_put (nskb , size );
750
738
751
739
skb_copy_header (nskb , skb );
752
740
head_off = skb_headroom (nskb ) - skb_headroom (skb );
753
741
skb_headers_offset_update (nskb , head_off );
742
+
743
+ /* Allocate paged area of new skb */
744
+ off = size ;
745
+ len = skb -> len - off ;
746
+
747
+ for (i = 0 ; i < MAX_SKB_FRAGS && off < skb -> len ; i ++ ) {
748
+ page = alloc_page (GFP_ATOMIC | __GFP_NOWARN );
749
+ if (!page ) {
750
+ consume_skb (nskb );
751
+ goto drop ;
752
+ }
753
+
754
+ size = min_t (u32 , len , PAGE_SIZE );
755
+ skb_add_rx_frag (nskb , i , page , 0 , size , PAGE_SIZE );
756
+ if (skb_copy_bits (skb , off , page_address (page ),
757
+ size )) {
758
+ consume_skb (nskb );
759
+ goto drop ;
760
+ }
761
+
762
+ len -= size ;
763
+ off += size ;
764
+ }
765
+
754
766
consume_skb (skb );
755
767
skb = nskb ;
768
+ } else if (skb_headroom (skb ) < XDP_PACKET_HEADROOM &&
769
+ pskb_expand_head (skb , VETH_XDP_HEADROOM , 0 , GFP_ATOMIC )) {
770
+ goto drop ;
756
771
}
757
772
758
773
/* SKB "head" area always have tailroom for skb_shared_info */
759
774
frame_sz = skb_end_pointer (skb ) - skb -> head ;
760
775
frame_sz += SKB_DATA_ALIGN (sizeof (struct skb_shared_info ));
761
- xdp_init_buff (& xdp , frame_sz , & rq -> xdp_rxq );
762
- xdp_prepare_buff (& xdp , skb -> head , skb -> mac_header , pktlen , true);
776
+ xdp_init_buff (xdp , frame_sz , & rq -> xdp_rxq );
777
+ xdp_prepare_buff (xdp , skb -> head , skb_headroom (skb ),
778
+ skb_headlen (skb ), true);
779
+
780
+ if (skb_is_nonlinear (skb )) {
781
+ skb_shinfo (skb )-> xdp_frags_size = skb -> data_len ;
782
+ xdp_buff_set_frags_flag (xdp );
783
+ } else {
784
+ xdp_buff_clear_frags_flag (xdp );
785
+ }
786
+ * pskb = skb ;
787
+
788
+ return 0 ;
789
+ drop :
790
+ consume_skb (skb );
791
+ * pskb = NULL ;
792
+
793
+ return - ENOMEM ;
794
+ }
795
+
796
+ static struct sk_buff * veth_xdp_rcv_skb (struct veth_rq * rq ,
797
+ struct sk_buff * skb ,
798
+ struct veth_xdp_tx_bq * bq ,
799
+ struct veth_stats * stats )
800
+ {
801
+ void * orig_data , * orig_data_end ;
802
+ struct bpf_prog * xdp_prog ;
803
+ struct xdp_buff xdp ;
804
+ u32 act , metalen ;
805
+ int off ;
806
+
807
+ skb_prepare_for_gro (skb );
808
+
809
+ rcu_read_lock ();
810
+ xdp_prog = rcu_dereference (rq -> xdp_prog );
811
+ if (unlikely (!xdp_prog )) {
812
+ rcu_read_unlock ();
813
+ goto out ;
814
+ }
815
+
816
+ __skb_push (skb , skb -> data - skb_mac_header (skb ));
817
+ if (veth_convert_xdp_buff_from_skb (rq , & xdp , & skb ))
818
+ goto drop ;
763
819
764
820
orig_data = xdp .data ;
765
821
orig_data_end = xdp .data_end ;
@@ -770,7 +826,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
770
826
case XDP_PASS :
771
827
break ;
772
828
case XDP_TX :
773
- get_page ( virt_to_page ( xdp . data ) );
829
+ veth_xdp_get ( & xdp );
774
830
consume_skb (skb );
775
831
xdp .rxq -> mem = rq -> xdp_mem ;
776
832
if (unlikely (veth_xdp_tx (rq , & xdp , bq ) < 0 )) {
@@ -782,7 +838,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
782
838
rcu_read_unlock ();
783
839
goto xdp_xmit ;
784
840
case XDP_REDIRECT :
785
- get_page ( virt_to_page ( xdp . data ) );
841
+ veth_xdp_get ( & xdp );
786
842
consume_skb (skb );
787
843
xdp .rxq -> mem = rq -> xdp_mem ;
788
844
if (xdp_do_redirect (rq -> dev , & xdp , xdp_prog )) {
@@ -805,18 +861,24 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
805
861
rcu_read_unlock ();
806
862
807
863
/* check if bpf_xdp_adjust_head was used */
808
- delta = orig_data - xdp .data ;
809
- off = mac_len + delta ;
864
+ off = orig_data - xdp .data ;
810
865
if (off > 0 )
811
866
__skb_push (skb , off );
812
867
else if (off < 0 )
813
868
__skb_pull (skb , - off );
814
- skb -> mac_header -= delta ;
869
+
870
+ skb_reset_mac_header (skb );
815
871
816
872
/* check if bpf_xdp_adjust_tail was used */
817
873
off = xdp .data_end - orig_data_end ;
818
874
if (off != 0 )
819
875
__skb_put (skb , off ); /* positive on grow, negative on shrink */
876
+
877
+ if (xdp_buff_has_frags (& xdp ))
878
+ skb -> data_len = skb_shinfo (skb )-> xdp_frags_size ;
879
+ else
880
+ skb -> data_len = 0 ;
881
+
820
882
skb -> protocol = eth_type_trans (skb , rq -> dev );
821
883
822
884
metalen = xdp .data - xdp .data_meta ;
@@ -832,7 +894,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
832
894
return NULL ;
833
895
err_xdp :
834
896
rcu_read_unlock ();
835
- page_frag_free ( xdp . data );
897
+ xdp_return_buff ( & xdp );
836
898
xdp_xmit :
837
899
return NULL ;
838
900
}
0 commit comments